aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/00-INDEX10
-rw-r--r--Documentation/ABI/stable/sysfs-devices-node2
-rw-r--r--Documentation/ABI/testing/sysfs-class-cxl8
-rw-r--r--Documentation/ABI/testing/sysfs-devices-system-cpu1
-rw-r--r--Documentation/ABI/testing/sysfs-kernel-mm-hugepages2
-rw-r--r--Documentation/ABI/testing/sysfs-kernel-mm-ksm2
-rw-r--r--Documentation/ABI/testing/sysfs-kernel-slab4
-rw-r--r--Documentation/RCU/whatisRCU.txt2
-rw-r--r--Documentation/admin-guide/bcache.rst (renamed from Documentation/bcache.txt)0
-rw-r--r--Documentation/admin-guide/cgroup-v2.rst (renamed from Documentation/cgroup-v2.txt)0
-rw-r--r--Documentation/admin-guide/index.rst3
-rw-r--r--Documentation/admin-guide/kernel-parameters.txt214
-rw-r--r--Documentation/admin-guide/mm/concepts.rst222
-rw-r--r--Documentation/admin-guide/mm/hugetlbpage.rst (renamed from Documentation/vm/hugetlbpage.txt)263
-rw-r--r--Documentation/admin-guide/mm/idle_page_tracking.rst (renamed from Documentation/vm/idle_page_tracking.txt)56
-rw-r--r--Documentation/admin-guide/mm/index.rst36
-rw-r--r--Documentation/admin-guide/mm/ksm.rst189
-rw-r--r--Documentation/admin-guide/mm/numa_memory_policy.rst495
-rw-r--r--Documentation/admin-guide/mm/pagemap.rst (renamed from Documentation/vm/pagemap.txt)184
-rw-r--r--Documentation/admin-guide/mm/soft-dirty.rst (renamed from Documentation/vm/soft-dirty.txt)20
-rw-r--r--Documentation/admin-guide/mm/transhuge.rst418
-rw-r--r--Documentation/admin-guide/mm/userfaultfd.rst (renamed from Documentation/vm/userfaultfd.txt)66
-rw-r--r--Documentation/admin-guide/pm/intel_pstate.rst2
-rw-r--r--Documentation/admin-guide/pm/sleep-states.rst2
-rw-r--r--Documentation/admin-guide/ramoops.rst2
-rw-r--r--Documentation/arm/Marvell/README15
-rw-r--r--Documentation/block/cmdline-partition.txt21
-rw-r--r--Documentation/block/null_blk.txt9
-rw-r--r--Documentation/bpf/bpf_devel_QA.txt10
-rw-r--r--Documentation/core-api/atomic_ops.rst13
-rw-r--r--Documentation/core-api/cachetlb.rst (renamed from Documentation/cachetlb.txt)0
-rw-r--r--Documentation/core-api/circular-buffers.rst (renamed from Documentation/circular-buffers.txt)0
-rw-r--r--Documentation/core-api/gfp_mask-from-fs-io.rst66
-rw-r--r--Documentation/core-api/index.rst3
-rw-r--r--Documentation/core-api/kernel-api.rst60
-rw-r--r--Documentation/core-api/refcount-vs-atomic.rst2
-rw-r--r--Documentation/crypto/index.rst1
-rw-r--r--Documentation/dev-tools/kasan.rst2
-rw-r--r--Documentation/dev-tools/kselftest.rst5
-rw-r--r--Documentation/device-mapper/thin-provisioning.txt5
-rw-r--r--Documentation/devicetree/bindings/ata/ahci-platform.txt1
-rw-r--r--Documentation/devicetree/bindings/display/panel/panel-common.txt2
-rw-r--r--Documentation/devicetree/bindings/dma/renesas,rcar-dmac.txt1
-rw-r--r--Documentation/devicetree/bindings/hwmon/gpio-fan.txt2
-rw-r--r--Documentation/devicetree/bindings/hwmon/ltc2990.txt36
-rw-r--r--Documentation/devicetree/bindings/input/atmel,maxtouch.txt7
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/amlogic,meson-gpio-intc.txt11
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/arm,gic-v3.txt17
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/st,stm32-exti.txt3
-rw-r--r--Documentation/devicetree/bindings/net/can/rcar_canfd.txt4
-rw-r--r--Documentation/devicetree/bindings/net/dsa/b53.txt1
-rw-r--r--Documentation/devicetree/bindings/net/marvell-pp2.txt9
-rw-r--r--Documentation/devicetree/bindings/net/micrel-ksz90x1.txt7
-rw-r--r--Documentation/devicetree/bindings/net/renesas,ravb.txt1
-rw-r--r--Documentation/devicetree/bindings/pinctrl/allwinner,sunxi-pinctrl.txt6
-rw-r--r--Documentation/devicetree/bindings/serial/renesas,sci-serial.txt2
-rw-r--r--Documentation/devicetree/bindings/vendor-prefixes.txt1
-rw-r--r--Documentation/devicetree/overlay-notes.txt8
-rw-r--r--Documentation/doc-guide/parse-headers.rst4
-rw-r--r--Documentation/driver-api/clk.rst (renamed from Documentation/clk.txt)0
-rw-r--r--Documentation/driver-api/device_connection.rst2
-rw-r--r--Documentation/driver-api/gpio/driver.rst6
-rw-r--r--Documentation/driver-api/index.rst2
-rw-r--r--Documentation/driver-api/uio-howto.rst3
-rw-r--r--Documentation/features/core/cBPF-JIT/arch-support.txt (renamed from Documentation/features/lib/strncasecmp/arch-support.txt)14
-rw-r--r--Documentation/features/core/eBPF-JIT/arch-support.txt (renamed from Documentation/features/core/BPF-JIT/arch-support.txt)8
-rw-r--r--Documentation/features/core/generic-idle-thread/arch-support.txt4
-rw-r--r--Documentation/features/core/jump-labels/arch-support.txt2
-rw-r--r--Documentation/features/core/tracehook/arch-support.txt2
-rw-r--r--Documentation/features/debug/KASAN/arch-support.txt4
-rw-r--r--Documentation/features/debug/gcov-profile-all/arch-support.txt2
-rw-r--r--Documentation/features/debug/kgdb/arch-support.txt4
-rw-r--r--Documentation/features/debug/kprobes-on-ftrace/arch-support.txt2
-rw-r--r--Documentation/features/debug/kprobes/arch-support.txt4
-rw-r--r--Documentation/features/debug/kretprobes/arch-support.txt4
-rw-r--r--Documentation/features/debug/optprobes/arch-support.txt4
-rw-r--r--Documentation/features/debug/stackprotector/arch-support.txt2
-rw-r--r--Documentation/features/debug/uprobes/arch-support.txt6
-rw-r--r--Documentation/features/debug/user-ret-profiler/arch-support.txt2
-rw-r--r--Documentation/features/io/dma-api-debug/arch-support.txt31
-rw-r--r--Documentation/features/io/dma-contiguous/arch-support.txt4
-rw-r--r--Documentation/features/io/sg-chain/arch-support.txt2
-rw-r--r--Documentation/features/locking/cmpxchg-local/arch-support.txt4
-rw-r--r--Documentation/features/locking/lockdep/arch-support.txt4
-rw-r--r--Documentation/features/locking/queued-rwlocks/arch-support.txt10
-rw-r--r--Documentation/features/locking/queued-spinlocks/arch-support.txt8
-rw-r--r--Documentation/features/locking/rwsem-optimized/arch-support.txt10
-rw-r--r--Documentation/features/perf/kprobes-event/arch-support.txt6
-rw-r--r--Documentation/features/perf/perf-regs/arch-support.txt4
-rw-r--r--Documentation/features/perf/perf-stackdump/arch-support.txt4
-rw-r--r--Documentation/features/sched/membarrier-sync-core/arch-support.txt2
-rw-r--r--Documentation/features/sched/numa-balancing/arch-support.txt6
-rwxr-xr-xDocumentation/features/scripts/features-refresh.sh98
-rw-r--r--Documentation/features/seccomp/seccomp-filter/arch-support.txt6
-rw-r--r--Documentation/features/time/arch-tick-broadcast/arch-support.txt4
-rw-r--r--Documentation/features/time/clockevents/arch-support.txt4
-rw-r--r--Documentation/features/time/context-tracking/arch-support.txt2
-rw-r--r--Documentation/features/time/irq-time-acct/arch-support.txt4
-rw-r--r--Documentation/features/time/modern-timekeeping/arch-support.txt2
-rw-r--r--Documentation/features/time/virt-cpuacct/arch-support.txt2
-rw-r--r--Documentation/features/vm/ELF-ASLR/arch-support.txt4
-rw-r--r--Documentation/features/vm/PG_uncached/arch-support.txt2
-rw-r--r--Documentation/features/vm/THP/arch-support.txt2
-rw-r--r--Documentation/features/vm/TLB/arch-support.txt2
-rw-r--r--Documentation/features/vm/huge-vmap/arch-support.txt2
-rw-r--r--Documentation/features/vm/ioremap_prot/arch-support.txt2
-rw-r--r--Documentation/features/vm/numa-memblock/arch-support.txt4
-rw-r--r--Documentation/features/vm/pte_special/arch-support.txt2
-rw-r--r--Documentation/filesystems/Locking52
-rw-r--r--Documentation/filesystems/proc.txt8
-rw-r--r--Documentation/filesystems/tmpfs.txt5
-rw-r--r--Documentation/filesystems/vfs.txt15
-rw-r--r--Documentation/hwmon/hwmon-kernel-api.txt3
-rw-r--r--Documentation/hwmon/ltc299024
-rw-r--r--Documentation/i2c/busses/i2c-ocores2
-rw-r--r--Documentation/index.rst3
-rw-r--r--Documentation/ioctl/botching-up-ioctls.txt4
-rw-r--r--Documentation/media/uapi/rc/keytable.c.rst2
-rw-r--r--Documentation/media/uapi/v4l/v4l2grab.c.rst2
-rw-r--r--Documentation/memory-barriers.txt19
-rw-r--r--Documentation/networking/ppp_generic.txt6
-rw-r--r--Documentation/process/2.Process.rst72
-rw-r--r--Documentation/process/5.Posting.rst16
-rw-r--r--Documentation/process/index.rst1
-rw-r--r--Documentation/process/maintainer-pgp-guide.rst39
-rw-r--r--Documentation/process/submitting-patches.rst2
-rw-r--r--Documentation/scheduler/sched-deadline.txt25
-rw-r--r--Documentation/scsi/scsi_eh.txt15
-rw-r--r--Documentation/security/index.rst2
-rw-r--r--Documentation/sound/alsa-configuration.rst4
-rw-r--r--Documentation/sound/soc/codec.rst2
-rw-r--r--Documentation/sound/soc/platform.rst2
-rwxr-xr-xDocumentation/sphinx/parse-headers.pl4
-rw-r--r--Documentation/sysctl/vm.txt6
-rw-r--r--Documentation/trace/coresight.txt103
-rw-r--r--Documentation/trace/ftrace-uses.rst4
-rw-r--r--Documentation/trace/ftrace.rst7
-rw-r--r--Documentation/translations/ko_KR/memory-barriers.txt54
-rw-r--r--Documentation/translations/zh_CN/video4linux/v4l2-framework.txt4
-rw-r--r--Documentation/userspace-api/index.rst1
-rw-r--r--Documentation/userspace-api/spec_ctrl.rst94
-rw-r--r--Documentation/vfio.txt5
-rw-r--r--Documentation/virtual/kvm/cpuid.txt6
-rw-r--r--Documentation/vm/00-INDEX58
-rw-r--r--Documentation/vm/active_mm.rst91
-rw-r--r--Documentation/vm/active_mm.txt83
-rw-r--r--Documentation/vm/balance.rst (renamed from Documentation/vm/balance)15
-rw-r--r--Documentation/vm/cleancache.rst (renamed from Documentation/vm/cleancache.txt)105
-rw-r--r--Documentation/vm/conf.py10
-rw-r--r--Documentation/vm/frontswap.rst (renamed from Documentation/vm/frontswap.txt)59
-rw-r--r--Documentation/vm/highmem.rst (renamed from Documentation/vm/highmem.txt)87
-rw-r--r--Documentation/vm/hmm.rst (renamed from Documentation/vm/hmm.txt)78
-rw-r--r--Documentation/vm/hugetlbfs_reserv.rst (renamed from Documentation/vm/hugetlbfs_reserv.txt)220
-rw-r--r--Documentation/vm/hwpoison.rst (renamed from Documentation/vm/hwpoison.txt)141
-rw-r--r--Documentation/vm/index.rst50
-rw-r--r--Documentation/vm/ksm.rst87
-rw-r--r--Documentation/vm/ksm.txt178
-rw-r--r--Documentation/vm/mmu_notifier.rst99
-rw-r--r--Documentation/vm/mmu_notifier.txt93
-rw-r--r--Documentation/vm/numa.rst (renamed from Documentation/vm/numa)6
-rw-r--r--Documentation/vm/numa_memory_policy.txt452
-rw-r--r--Documentation/vm/overcommit-accounting80
-rw-r--r--Documentation/vm/overcommit-accounting.rst87
-rw-r--r--Documentation/vm/page_frags.rst (renamed from Documentation/vm/page_frags)5
-rw-r--r--Documentation/vm/page_migration.rst (renamed from Documentation/vm/page_migration)149
-rw-r--r--Documentation/vm/page_owner.rst (renamed from Documentation/vm/page_owner.txt)34
-rw-r--r--Documentation/vm/remap_file_pages.rst (renamed from Documentation/vm/remap_file_pages.txt)6
-rw-r--r--Documentation/vm/slub.rst361
-rw-r--r--Documentation/vm/slub.txt342
-rw-r--r--Documentation/vm/split_page_table_lock.rst (renamed from Documentation/vm/split_page_table_lock)12
-rw-r--r--Documentation/vm/swap_numa.rst (renamed from Documentation/vm/swap_numa.txt)55
-rw-r--r--Documentation/vm/transhuge.rst197
-rw-r--r--Documentation/vm/transhuge.txt527
-rw-r--r--Documentation/vm/unevictable-lru.rst (renamed from Documentation/vm/unevictable-lru.txt)117
-rw-r--r--Documentation/vm/z3fold.rst (renamed from Documentation/vm/z3fold.txt)6
-rw-r--r--Documentation/vm/zsmalloc.rst (renamed from Documentation/vm/zsmalloc.txt)60
-rw-r--r--Documentation/vm/zswap.rst (renamed from Documentation/vm/zswap.txt)71
-rw-r--r--Documentation/x86/x86_64/boot-options.txt13
-rw-r--r--LICENSES/exceptions/Linux-syscall-note2
-rw-r--r--LICENSES/other/Apache-2.0183
-rw-r--r--LICENSES/other/CC-BY-SA-4.0397
-rw-r--r--LICENSES/other/CDDL-1.0364
-rw-r--r--LICENSES/other/Linux-OpenIB26
-rw-r--r--LICENSES/other/X1137
-rw-r--r--LICENSES/preferred/GPL-2.06
-rw-r--r--MAINTAINERS86
-rw-r--r--Makefile13
-rw-r--r--arch/Kconfig7
-rw-r--r--arch/alpha/Kconfig17
-rw-r--r--arch/alpha/include/asm/dma-mapping.h8
-rw-r--r--arch/alpha/include/asm/pci.h5
-rw-r--r--arch/alpha/include/uapi/asm/siginfo.h14
-rw-r--r--arch/alpha/kernel/io.c14
-rw-r--r--arch/alpha/kernel/osf_sys.c11
-rw-r--r--arch/alpha/kernel/pci-noop.c33
-rw-r--r--arch/alpha/kernel/pci_iommu.c4
-rw-r--r--arch/alpha/kernel/signal.c20
-rw-r--r--arch/alpha/kernel/traps.c79
-rw-r--r--arch/alpha/mm/fault.c13
-rw-r--r--arch/arc/Kconfig11
-rw-r--r--arch/arc/include/asm/Kbuild1
-rw-r--r--arch/arc/include/asm/dma-mapping.h21
-rw-r--r--arch/arc/include/asm/pci.h6
-rw-r--r--arch/arc/mm/dma.c162
-rw-r--r--arch/arc/mm/fault.c2
-rw-r--r--arch/arm/Kconfig15
-rw-r--r--arch/arm/boot/compressed/Makefile8
-rw-r--r--arch/arm/boot/compressed/head.S20
-rw-r--r--arch/arm/boot/dts/bcm-cygnus.dtsi2
-rw-r--r--arch/arm/boot/dts/da850-lcdk.dts4
-rw-r--r--arch/arm/boot/dts/da850.dtsi13
-rw-r--r--arch/arm/boot/dts/dm8148-evm.dts2
-rw-r--r--arch/arm/boot/dts/dm8148-t410.dts2
-rw-r--r--arch/arm/boot/dts/dm8168-evm.dts2
-rw-r--r--arch/arm/boot/dts/dra62x-j5eco-evm.dts2
-rw-r--r--arch/arm/boot/dts/imx35.dtsi4
-rw-r--r--arch/arm/boot/dts/imx51-zii-rdu1.dts6
-rw-r--r--arch/arm/boot/dts/imx53.dtsi4
-rw-r--r--arch/arm/boot/dts/imx7s.dtsi1
-rw-r--r--arch/arm/boot/dts/logicpd-som-lv.dtsi11
-rw-r--r--arch/arm/boot/dts/r8a7790-lager.dts22
-rw-r--r--arch/arm/boot/dts/r8a7790.dtsi65
-rw-r--r--arch/arm/boot/dts/r8a7791-koelsch.dts12
-rw-r--r--arch/arm/boot/dts/r8a7791-porter.dts16
-rw-r--r--arch/arm/boot/dts/r8a7791.dtsi36
-rw-r--r--arch/arm/boot/dts/r8a7793-gose.dts10
-rw-r--r--arch/arm/boot/dts/r8a7793.dtsi37
-rw-r--r--arch/arm/boot/dts/stm32mp157-pinctrl.dtsi4
-rw-r--r--arch/arm/boot/dts/stm32mp157c.dtsi7
-rw-r--r--arch/arm/boot/dts/sun4i-a10.dtsi6
-rw-r--r--arch/arm/boot/dts/sun8i-h3-orangepi-one.dts1
-rw-r--r--arch/arm/boot/dts/sun8i-v3s-licheepi-zero-dock.dts2
-rw-r--r--arch/arm/boot/dts/tegra20.dtsi2
-rw-r--r--arch/arm/include/asm/assembler.h10
-rw-r--r--arch/arm/include/asm/kvm_mmu.h16
-rw-r--r--arch/arm/include/asm/pci.h7
-rw-r--r--arch/arm/include/uapi/asm/siginfo.h13
-rw-r--r--arch/arm/kernel/dma.c14
-rw-r--r--arch/arm/kernel/machine_kexec.c36
-rw-r--r--arch/arm/kernel/ptrace.c1
-rw-r--r--arch/arm/kernel/setup.c2
-rw-r--r--arch/arm/kernel/swp_emulate.c16
-rw-r--r--arch/arm/kernel/traps.c10
-rw-r--r--arch/arm/lib/getuser.S10
-rw-r--r--arch/arm/mach-axxia/Kconfig1
-rw-r--r--arch/arm/mach-bcm/Kconfig1
-rw-r--r--arch/arm/mach-davinci/board-da830-evm.c9
-rw-r--r--arch/arm/mach-davinci/board-da850-evm.c9
-rw-r--r--arch/arm/mach-davinci/board-dm355-evm.c15
-rw-r--r--arch/arm/mach-davinci/board-dm644x-evm.c10
-rw-r--r--arch/arm/mach-davinci/board-dm646x-evm.c5
-rw-r--r--arch/arm/mach-davinci/board-omapl138-hawk.c10
-rw-r--r--arch/arm/mach-davinci/dm646x.c3
-rw-r--r--arch/arm/mach-ep93xx/core.c2
-rw-r--r--arch/arm/mach-exynos/Kconfig1
-rw-r--r--arch/arm/mach-highbank/Kconfig1
-rw-r--r--arch/arm/mach-ixp4xx/avila-setup.c2
-rw-r--r--arch/arm/mach-ixp4xx/dsmg600-setup.c2
-rw-r--r--arch/arm/mach-ixp4xx/fsg-setup.c2
-rw-r--r--arch/arm/mach-ixp4xx/ixdp425-setup.c2
-rw-r--r--arch/arm/mach-ixp4xx/nas100d-setup.c2
-rw-r--r--arch/arm/mach-ixp4xx/nslu2-setup.c2
-rw-r--r--arch/arm/mach-keystone/pm_domain.c1
-rw-r--r--arch/arm/mach-omap1/ams-delta-fiq.c28
-rw-r--r--arch/arm/mach-omap2/powerdomain.c4
-rw-r--r--arch/arm/mach-pxa/palmz72.c2
-rw-r--r--arch/arm/mach-pxa/viper.c4
-rw-r--r--arch/arm/mach-rockchip/Kconfig1
-rw-r--r--arch/arm/mach-rpc/ecard.c16
-rw-r--r--arch/arm/mach-sa1100/simpad.c2
-rw-r--r--arch/arm/mach-shmobile/Kconfig1
-rw-r--r--arch/arm/mach-tegra/Kconfig1
-rw-r--r--arch/arm/mm/Kconfig7
-rw-r--r--arch/arm/mm/alignment.c1
-rw-r--r--arch/arm/mm/dma-mapping-nommu.c9
-rw-r--r--arch/arm/mm/dma-mapping.c25
-rw-r--r--arch/arm/mm/fault.c4
-rw-r--r--arch/arm/probes/kprobes/opt-arm.c4
-rw-r--r--arch/arm/vfp/vfpmodule.c5
-rw-r--r--arch/arm64/Kconfig22
-rw-r--r--arch/arm64/boot/dts/exynos/exynos5433.dtsi2
-rw-r--r--arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts1
-rw-r--r--arch/arm64/boot/dts/marvell/armada-cp110.dtsi7
-rw-r--r--arch/arm64/boot/dts/nvidia/tegra186-p3310.dtsi2
-rw-r--r--arch/arm64/boot/dts/socionext/uniphier-ld11.dtsi2
-rw-r--r--arch/arm64/boot/dts/socionext/uniphier-ld20-ref.dts8
-rw-r--r--arch/arm64/boot/dts/socionext/uniphier-ld20.dtsi2
-rw-r--r--arch/arm64/boot/dts/socionext/uniphier-pxs3.dtsi2
-rw-r--r--arch/arm64/include/asm/atomic_lse.h24
-rw-r--r--arch/arm64/include/asm/cputype.h6
-rw-r--r--arch/arm64/include/asm/kvm_emulate.h2
-rw-r--r--arch/arm64/include/asm/kvm_mmu.h16
-rw-r--r--arch/arm64/include/asm/pci.h5
-rw-r--r--arch/arm64/include/asm/spinlock.h5
-rw-r--r--arch/arm64/kernel/arm64ksyms.c8
-rw-r--r--arch/arm64/kernel/cpu_errata.c1
-rw-r--r--arch/arm64/kernel/fpsimd.c2
-rw-r--r--arch/arm64/kernel/sys_compat.c1
-rw-r--r--arch/arm64/kernel/traps.c1
-rw-r--r--arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c24
-rw-r--r--arch/arm64/lib/tishift.S15
-rw-r--r--arch/arm64/mm/dma-mapping.c10
-rw-r--r--arch/arm64/mm/fault.c69
-rw-r--r--arch/arm64/mm/init.c4
-rw-r--r--arch/arm64/mm/mmu.c16
-rw-r--r--arch/c6x/Kconfig4
-rw-r--r--arch/c6x/include/asm/Kbuild1
-rw-r--r--arch/c6x/include/asm/dma-mapping.h28
-rw-r--r--arch/c6x/include/asm/setup.h2
-rw-r--r--arch/c6x/kernel/Makefile2
-rw-r--r--arch/c6x/kernel/dma.c149
-rw-r--r--arch/c6x/kernel/traps.c9
-rw-r--r--arch/c6x/mm/dma-coherent.c40
-rw-r--r--arch/h8300/include/asm/pci.h2
-rw-r--r--arch/hexagon/Kconfig4
-rw-r--r--arch/hexagon/include/asm/io.h6
-rw-r--r--arch/hexagon/kernel/dma.c1
-rw-r--r--arch/hexagon/kernel/traps.c9
-rw-r--r--arch/hexagon/lib/checksum.c1
-rw-r--r--arch/hexagon/mm/vm_fault.c20
-rw-r--r--arch/ia64/Kconfig25
-rw-r--r--arch/ia64/hp/common/sba_iommu.c18
-rw-r--r--arch/ia64/hp/sim/simserial.c15
-rw-r--r--arch/ia64/include/asm/hardirq.h2
-rw-r--r--arch/ia64/include/asm/pci.h17
-rw-r--r--arch/ia64/include/uapi/asm/siginfo.h7
-rw-r--r--arch/ia64/kernel/brl_emu.c1
-rw-r--r--arch/ia64/kernel/dma-mapping.c10
-rw-r--r--arch/ia64/kernel/palinfo.c16
-rw-r--r--arch/ia64/kernel/perfmon.c16
-rw-r--r--arch/ia64/kernel/salinfo.c42
-rw-r--r--arch/ia64/kernel/setup.c12
-rw-r--r--arch/ia64/kernel/signal.c2
-rw-r--r--arch/ia64/kernel/traps.c31
-rw-r--r--arch/ia64/kernel/unaligned.c1
-rw-r--r--arch/ia64/mm/fault.c4
-rw-r--r--arch/ia64/sn/kernel/io_common.c5
-rw-r--r--arch/ia64/sn/kernel/sn2/prominfo_proc.c32
-rw-r--r--arch/ia64/sn/kernel/sn2/sn_proc_fs.c62
-rw-r--r--arch/m68k/68000/timers.c4
-rw-r--r--arch/m68k/apollo/config.c8
-rw-r--r--arch/m68k/configs/amiga_defconfig20
-rw-r--r--arch/m68k/configs/apollo_defconfig20
-rw-r--r--arch/m68k/configs/atari_defconfig20
-rw-r--r--arch/m68k/configs/bvme6000_defconfig20
-rw-r--r--arch/m68k/configs/hp300_defconfig20
-rw-r--r--arch/m68k/configs/mac_defconfig20
-rw-r--r--arch/m68k/configs/multi_defconfig20
-rw-r--r--arch/m68k/configs/mvme147_defconfig20
-rw-r--r--arch/m68k/configs/mvme16x_defconfig20
-rw-r--r--arch/m68k/configs/q40_defconfig20
-rw-r--r--arch/m68k/configs/sun3_defconfig20
-rw-r--r--arch/m68k/configs/sun3x_defconfig20
-rw-r--r--arch/m68k/include/asm/delay.h11
-rw-r--r--arch/m68k/include/asm/pci.h6
-rw-r--r--arch/m68k/include/asm/uaccess_mm.h16
-rw-r--r--arch/m68k/kernel/dma.c10
-rw-r--r--arch/m68k/kernel/setup_mm.c14
-rw-r--r--arch/m68k/kernel/signal.c23
-rw-r--r--arch/m68k/kernel/time.c25
-rw-r--r--arch/m68k/kernel/traps.c58
-rw-r--r--arch/m68k/mac/config.c2
-rw-r--r--arch/m68k/mm/fault.c25
-rw-r--r--arch/m68k/mm/kmap.c3
-rw-r--r--arch/m68k/mvme147/config.c4
-rw-r--r--arch/m68k/mvme16x/config.c4
-rw-r--r--arch/m68k/sun3/intersil.c8
-rw-r--r--arch/m68k/sun3x/time.c8
-rw-r--r--arch/microblaze/Kconfig1
-rw-r--r--arch/microblaze/include/asm/pci.h6
-rw-r--r--arch/microblaze/kernel/dma.c11
-rw-r--r--arch/microblaze/kernel/exceptions.c8
-rw-r--r--arch/microblaze/mm/fault.c12
-rw-r--r--arch/mips/Kconfig24
-rw-r--r--arch/mips/boot/compressed/uart-16550.c6
-rw-r--r--arch/mips/boot/dts/xilfpga/Makefile2
-rw-r--r--arch/mips/cavium-octeon/Kconfig12
-rw-r--r--arch/mips/generic/Platform1
-rw-r--r--arch/mips/include/asm/pci.h7
-rw-r--r--arch/mips/kernel/process.c4
-rw-r--r--arch/mips/kernel/ptrace.c24
-rw-r--r--arch/mips/kernel/ptrace32.c6
-rw-r--r--arch/mips/kernel/traps.c65
-rw-r--r--arch/mips/kvm/mips.c2
-rw-r--r--arch/mips/loongson64/Kconfig15
-rw-r--r--arch/mips/mm/c-r4k.c9
-rw-r--r--arch/mips/mm/dma-default.c10
-rw-r--r--arch/mips/mm/fault.c18
-rw-r--r--arch/mips/netlogic/Kconfig6
-rw-r--r--arch/mips/pci/ops-pmcmsp.c28
-rw-r--r--arch/mips/sibyte/common/bus_watcher.c16
-rw-r--r--arch/nds32/Kconfig10
-rw-r--r--arch/nds32/Kconfig.cpu5
-rw-r--r--arch/nds32/Makefile7
-rw-r--r--arch/nds32/include/asm/Kbuild3
-rw-r--r--arch/nds32/include/asm/bitfield.h3
-rw-r--r--arch/nds32/include/asm/cacheflush.h2
-rw-r--r--arch/nds32/include/asm/dma-mapping.h14
-rw-r--r--arch/nds32/include/asm/io.h2
-rw-r--r--arch/nds32/include/asm/page.h3
-rw-r--r--arch/nds32/include/asm/pgtable.h1
-rw-r--r--arch/nds32/kernel/dma.c196
-rw-r--r--arch/nds32/kernel/ex-entry.S2
-rw-r--r--arch/nds32/kernel/head.S28
-rw-r--r--arch/nds32/kernel/setup.c3
-rw-r--r--arch/nds32/kernel/stacktrace.c2
-rw-r--r--arch/nds32/kernel/traps.c35
-rw-r--r--arch/nds32/kernel/vdso.c10
-rw-r--r--arch/nds32/lib/copy_page.S3
-rw-r--r--arch/nds32/mm/alignment.c9
-rw-r--r--arch/nds32/mm/cacheflush.c74
-rw-r--r--arch/nds32/mm/fault.c18
-rw-r--r--arch/nds32/mm/init.c1
-rw-r--r--arch/nios2/kernel/traps.c8
-rw-r--r--arch/openrisc/kernel/dma.c11
-rw-r--r--arch/openrisc/kernel/traps.c30
-rw-r--r--arch/openrisc/mm/fault.c18
-rw-r--r--arch/parisc/Kconfig8
-rw-r--r--arch/parisc/Makefile3
-rw-r--r--arch/parisc/include/asm/hardirq.h8
-rw-r--r--arch/parisc/include/asm/pci.h23
-rw-r--r--arch/parisc/kernel/drivers.c9
-rw-r--r--arch/parisc/kernel/pci-dma.c17
-rw-r--r--arch/parisc/kernel/pci.c2
-rw-r--r--arch/parisc/kernel/pdc_chassis.c14
-rw-r--r--arch/parisc/kernel/ptrace.c10
-rw-r--r--arch/parisc/kernel/setup.c5
-rw-r--r--arch/parisc/kernel/smp.c3
-rw-r--r--arch/parisc/kernel/time.c2
-rw-r--r--arch/parisc/kernel/traps.c72
-rw-r--r--arch/parisc/kernel/unaligned.c15
-rw-r--r--arch/parisc/math-emu/driver.c8
-rw-r--r--arch/parisc/mm/fault.c54
-rw-r--r--arch/parisc/mm/init.c2
-rw-r--r--arch/powerpc/Kconfig27
-rw-r--r--arch/powerpc/include/asm/exception-64s.h29
-rw-r--r--arch/powerpc/include/asm/feature-fixups.h19
-rw-r--r--arch/powerpc/include/asm/ftrace.h29
-rw-r--r--arch/powerpc/include/asm/hardirq.h7
-rw-r--r--arch/powerpc/include/asm/kvm_book3s.h1
-rw-r--r--arch/powerpc/include/asm/paca.h1
-rw-r--r--arch/powerpc/include/asm/pci.h18
-rw-r--r--arch/powerpc/include/asm/security_features.h11
-rw-r--r--arch/powerpc/include/asm/topology.h13
-rw-r--r--arch/powerpc/include/uapi/asm/siginfo.h15
-rw-r--r--arch/powerpc/kernel/asm-offsets.c1
-rw-r--r--arch/powerpc/kernel/cpu_setup_power.S6
-rw-r--r--arch/powerpc/kernel/dma.c3
-rw-r--r--arch/powerpc/kernel/dt_cpu_ftrs.c1
-rw-r--r--arch/powerpc/kernel/eeh.c14
-rw-r--r--arch/powerpc/kernel/exceptions-64s.S19
-rw-r--r--arch/powerpc/kernel/process.c1
-rw-r--r--arch/powerpc/kernel/rtas-proc.c32
-rw-r--r--arch/powerpc/kernel/security.c149
-rw-r--r--arch/powerpc/kernel/traps.c13
-rw-r--r--arch/powerpc/kernel/vmlinux.lds.S14
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_radix.c6
-rw-r--r--arch/powerpc/kvm/book3s_hv.c1
-rw-r--r--arch/powerpc/kvm/book3s_hv_rmhandlers.S97
-rw-r--r--arch/powerpc/kvm/book3s_xive_template.c108
-rw-r--r--arch/powerpc/lib/feature-fixups.c115
-rw-r--r--arch/powerpc/mm/fault.c1
-rw-r--r--arch/powerpc/platforms/Kconfig.cputype1
-rw-r--r--arch/powerpc/platforms/cell/spufs/fault.c2
-rw-r--r--arch/powerpc/platforms/cell/spufs/sched.c14
-rw-r--r--arch/powerpc/platforms/powernv/opal-nvram.c14
-rw-r--r--arch/powerpc/platforms/powernv/setup.c1
-rw-r--r--arch/powerpc/platforms/pseries/setup.c1
-rw-r--r--arch/riscv/Kconfig44
-rw-r--r--arch/riscv/include/asm/dma-mapping.h15
-rw-r--r--arch/riscv/include/asm/pci.h3
-rw-r--r--arch/riscv/kernel/setup.c2
-rw-r--r--arch/riscv/kernel/traps.c16
-rw-r--r--arch/s390/Kconfig17
-rw-r--r--arch/s390/configs/debug_defconfig9
-rw-r--r--arch/s390/configs/performance_defconfig8
-rw-r--r--arch/s390/crypto/crc32be-vx.S5
-rw-r--r--arch/s390/crypto/crc32le-vx.S4
-rw-r--r--arch/s390/include/asm/hardirq.h2
-rw-r--r--arch/s390/include/asm/nospec-insn.h196
-rw-r--r--arch/s390/include/asm/pci.h2
-rw-r--r--arch/s390/include/asm/purgatory.h6
-rw-r--r--arch/s390/kernel/Makefile1
-rw-r--r--arch/s390/kernel/asm-offsets.c1
-rw-r--r--arch/s390/kernel/base.S24
-rw-r--r--arch/s390/kernel/entry.S105
-rw-r--r--arch/s390/kernel/irq.c5
-rw-r--r--arch/s390/kernel/mcount.S14
-rw-r--r--arch/s390/kernel/nospec-branch.c44
-rw-r--r--arch/s390/kernel/nospec-sysfs.c21
-rw-r--r--arch/s390/kernel/perf_cpum_sf.c4
-rw-r--r--arch/s390/kernel/reipl.S7
-rw-r--r--arch/s390/kernel/swsusp.S10
-rw-r--r--arch/s390/kernel/sysinfo.c28
-rw-r--r--arch/s390/kernel/traps.c29
-rw-r--r--arch/s390/kvm/vsie.c2
-rw-r--r--arch/s390/lib/mem.S19
-rw-r--r--arch/s390/mm/fault.c21
-rw-r--r--arch/s390/net/bpf_jit.S16
-rw-r--r--arch/s390/net/bpf_jit_comp.c63
-rw-r--r--arch/s390/pci/pci_dma.c11
-rw-r--r--arch/s390/purgatory/Makefile2
-rw-r--r--arch/sh/Kconfig11
-rw-r--r--arch/sh/drivers/dma/dma-api.c14
-rw-r--r--arch/sh/include/asm/pci.h6
-rw-r--r--arch/sh/kernel/cpu/sh2/probe.c4
-rw-r--r--arch/sh/kernel/dma-nommu.c1
-rw-r--r--arch/sh/kernel/hw_breakpoint.c9
-rw-r--r--arch/sh/kernel/irq.c2
-rw-r--r--arch/sh/kernel/setup.c1
-rw-r--r--arch/sh/kernel/traps_32.c20
-rw-r--r--arch/sh/math-emu/math.c8
-rw-r--r--arch/sh/mm/consistent.c20
-rw-r--r--arch/sh/mm/fault.c9
-rw-r--r--arch/sh/mm/init.c68
-rw-r--r--arch/sh/mm/numa.c19
-rw-r--r--arch/sparc/Kconfig18
-rw-r--r--arch/sparc/include/asm/hardirq_64.h5
-rw-r--r--arch/sparc/include/asm/iommu-common.h (renamed from include/linux/iommu-common.h)0
-rw-r--r--arch/sparc/include/asm/iommu_64.h2
-rw-r--r--arch/sparc/include/asm/pci_32.h4
-rw-r--r--arch/sparc/include/asm/pci_64.h6
-rw-r--r--arch/sparc/include/uapi/asm/jsflash.h40
-rw-r--r--arch/sparc/include/uapi/asm/oradax.h2
-rw-r--r--arch/sparc/include/uapi/asm/siginfo.h7
-rw-r--r--arch/sparc/kernel/Makefile4
-rw-r--r--arch/sparc/kernel/dma.c13
-rw-r--r--arch/sparc/kernel/iommu-common.c (renamed from lib/iommu-common.c)5
-rw-r--r--arch/sparc/kernel/iommu.c2
-rw-r--r--arch/sparc/kernel/ioport.c19
-rw-r--r--arch/sparc/kernel/ldc.c2
-rw-r--r--arch/sparc/kernel/pci_sun4v.c2
-rw-r--r--arch/sparc/kernel/process_64.c9
-rw-r--r--arch/sparc/kernel/sys_sparc_32.c8
-rw-r--r--arch/sparc/kernel/sys_sparc_64.c8
-rw-r--r--arch/sparc/kernel/traps_32.c104
-rw-r--r--arch/sparc/kernel/traps_64.c131
-rw-r--r--arch/sparc/kernel/unaligned_32.c11
-rw-r--r--arch/sparc/kernel/vio.c2
-rw-r--r--arch/sparc/mm/fault_32.c12
-rw-r--r--arch/sparc/mm/fault_64.c8
-rw-r--r--arch/um/drivers/ubd_kern.c16
-rw-r--r--arch/um/kernel/ptrace.c13
-rw-r--r--arch/um/kernel/trap.c62
-rw-r--r--arch/unicore32/Kconfig5
-rw-r--r--arch/unicore32/kernel/fpu-ucf64.c8
-rw-r--r--arch/unicore32/mm/Kconfig11
-rw-r--r--arch/unicore32/mm/fault.c3
-rw-r--r--arch/x86/Kconfig37
-rw-r--r--arch/x86/boot/compressed/cmdline.c2
-rw-r--r--arch/x86/boot/compressed/eboot.c112
-rw-r--r--arch/x86/boot/compressed/head_64.S80
-rw-r--r--arch/x86/boot/compressed/kaslr.c4
-rw-r--r--arch/x86/boot/compressed/misc.h6
-rw-r--r--arch/x86/boot/compressed/pgtable_64.c28
-rw-r--r--arch/x86/entry/syscalls/syscall_32.tbl1
-rw-r--r--arch/x86/entry/syscalls/syscall_64.tbl1
-rw-r--r--arch/x86/entry/vdso/Makefile11
-rw-r--r--arch/x86/entry/vdso/vdso32/vdso-fakesections.c1
-rw-r--r--arch/x86/entry/vsyscall/vsyscall_64.c2
-rw-r--r--arch/x86/events/amd/ibs.c2
-rw-r--r--arch/x86/events/amd/uncore.c21
-rw-r--r--arch/x86/events/core.c8
-rw-r--r--arch/x86/events/intel/cstate.c2
-rw-r--r--arch/x86/events/intel/pt.c4
-rw-r--r--arch/x86/events/intel/uncore.c76
-rw-r--r--arch/x86/events/intel/uncore.h127
-rw-r--r--arch/x86/events/intel/uncore_nhmex.c2
-rw-r--r--arch/x86/events/intel/uncore_snb.c101
-rw-r--r--arch/x86/events/intel/uncore_snbep.c82
-rw-r--r--arch/x86/events/msr.c9
-rw-r--r--arch/x86/include/asm/cacheinfo.h7
-rw-r--r--arch/x86/include/asm/cpufeature.h15
-rw-r--r--arch/x86/include/asm/cpufeatures.h20
-rw-r--r--arch/x86/include/asm/dma-mapping.h5
-rw-r--r--arch/x86/include/asm/hardirq.h8
-rw-r--r--arch/x86/include/asm/insn.h18
-rw-r--r--arch/x86/include/asm/io.h8
-rw-r--r--arch/x86/include/asm/kvm_host.h2
-rw-r--r--arch/x86/include/asm/mmu_context.h2
-rw-r--r--arch/x86/include/asm/msr-index.h9
-rw-r--r--arch/x86/include/asm/nospec-branch.h43
-rw-r--r--arch/x86/include/asm/page_64_types.h2
-rw-r--r--arch/x86/include/asm/paravirt.h4
-rw-r--r--arch/x86/include/asm/pci.h3
-rw-r--r--arch/x86/include/asm/pgalloc.h4
-rw-r--r--arch/x86/include/asm/pgtable.h12
-rw-r--r--arch/x86/include/asm/pgtable_32_types.h2
-rw-r--r--arch/x86/include/asm/pgtable_64.h2
-rw-r--r--arch/x86/include/asm/pgtable_64_types.h25
-rw-r--r--arch/x86/include/asm/pkeys.h18
-rw-r--r--arch/x86/include/asm/processor.h9
-rw-r--r--arch/x86/include/asm/qspinlock.h21
-rw-r--r--arch/x86/include/asm/qspinlock_paravirt.h3
-rw-r--r--arch/x86/include/asm/smp.h1
-rw-r--r--arch/x86/include/asm/sparsemem.h4
-rw-r--r--arch/x86/include/asm/spec-ctrl.h80
-rw-r--r--arch/x86/include/asm/stacktrace.h2
-rw-r--r--arch/x86/include/asm/string_64.h10
-rw-r--r--arch/x86/include/asm/thread_info.h4
-rw-r--r--arch/x86/include/asm/uaccess_64.h14
-rw-r--r--arch/x86/include/uapi/asm/kvm_para.h2
-rw-r--r--arch/x86/kernel/amd_nb.c6
-rw-r--r--arch/x86/kernel/apic/x2apic_cluster.c1
-rw-r--r--arch/x86/kernel/apm_32.c17
-rw-r--r--arch/x86/kernel/cpu/Makefile2
-rw-r--r--arch/x86/kernel/cpu/amd.c58
-rw-r--r--arch/x86/kernel/cpu/bugs.c397
-rw-r--r--arch/x86/kernel/cpu/cacheinfo.c (renamed from arch/x86/kernel/cpu/intel_cacheinfo.c)46
-rw-r--r--arch/x86/kernel/cpu/centaur.c53
-rw-r--r--arch/x86/kernel/cpu/common.c108
-rw-r--r--arch/x86/kernel/cpu/cpu.h12
-rw-r--r--arch/x86/kernel/cpu/intel.c37
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-inject.c2
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c18
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd.c44
-rw-r--r--arch/x86/kernel/cpu/mtrr/Makefile2
-rw-r--r--arch/x86/kernel/cpu/mtrr/mtrr.c (renamed from arch/x86/kernel/cpu/mtrr/main.c)37
-rw-r--r--arch/x86/kernel/cpu/topology.c8
-rw-r--r--arch/x86/kernel/dumpstack.c144
-rw-r--r--arch/x86/kernel/e820.c32
-rw-r--r--arch/x86/kernel/early-quirks.c8
-rw-r--r--arch/x86/kernel/head64.c35
-rw-r--r--arch/x86/kernel/hpet.c5
-rw-r--r--arch/x86/kernel/kprobes/core.c4
-rw-r--r--arch/x86/kernel/kvm.c8
-rw-r--r--arch/x86/kernel/machine_kexec_32.c6
-rw-r--r--arch/x86/kernel/machine_kexec_64.c8
-rw-r--r--arch/x86/kernel/pci-dma.c58
-rw-r--r--arch/x86/kernel/perf_regs.c10
-rw-r--r--arch/x86/kernel/process.c146
-rw-r--r--arch/x86/kernel/process_32.c8
-rw-r--r--arch/x86/kernel/process_64.c1
-rw-r--r--arch/x86/kernel/ptrace.c2
-rw-r--r--arch/x86/kernel/signal_compat.c2
-rw-r--r--arch/x86/kernel/smpboot.c12
-rw-r--r--arch/x86/kernel/traps.c3
-rw-r--r--arch/x86/kernel/umip.c1
-rw-r--r--arch/x86/kernel/uprobes.c8
-rw-r--r--arch/x86/kernel/vmlinux.lds.S4
-rw-r--r--arch/x86/kvm/cpuid.c26
-rw-r--r--arch/x86/kvm/hyperv.c23
-rw-r--r--arch/x86/kvm/lapic.c53
-rw-r--r--arch/x86/kvm/mmu.c1
-rw-r--r--arch/x86/kvm/svm.c66
-rw-r--r--arch/x86/kvm/vmx.c59
-rw-r--r--arch/x86/kvm/x86.c32
-rw-r--r--arch/x86/lib/memcpy_64.S102
-rw-r--r--arch/x86/lib/usercopy_64.c30
-rw-r--r--arch/x86/mm/dump_pagetables.c6
-rw-r--r--arch/x86/mm/fault.c12
-rw-r--r--arch/x86/mm/ident_map.c2
-rw-r--r--arch/x86/mm/init_64.c8
-rw-r--r--arch/x86/mm/kasan_init_64.c14
-rw-r--r--arch/x86/mm/kaslr.c8
-rw-r--r--arch/x86/mm/numa.c22
-rw-r--r--arch/x86/mm/pkeys.c21
-rw-r--r--arch/x86/mm/tlb.c2
-rw-r--r--arch/x86/net/bpf_jit_comp.c251
-rw-r--r--arch/x86/platform/efi/efi_64.c2
-rw-r--r--arch/x86/power/hibernate_64.c2
-rw-r--r--arch/x86/um/vdso/Makefile4
-rw-r--r--arch/x86/xen/efi.c57
-rw-r--r--arch/x86/xen/enlighten_hvm.c13
-rw-r--r--arch/x86/xen/enlighten_pv.c86
-rw-r--r--arch/x86/xen/mmu.c4
-rw-r--r--arch/x86/xen/mmu_pv.c4
-rw-r--r--arch/xtensa/Kconfig1
-rw-r--r--arch/xtensa/include/asm/pci.h2
-rw-r--r--arch/xtensa/kernel/pci-dma.c9
-rw-r--r--arch/xtensa/kernel/traps.c9
-rw-r--r--arch/xtensa/mm/fault.c18
-rw-r--r--arch/xtensa/platforms/iss/console.c15
-rw-r--r--block/bfq-cgroup.c40
-rw-r--r--block/bfq-iosched.c478
-rw-r--r--block/bfq-iosched.h30
-rw-r--r--block/bio-integrity.c29
-rw-r--r--block/bio.c189
-rw-r--r--block/blk-core.c120
-rw-r--r--block/blk-integrity.c12
-rw-r--r--block/blk-lib.c12
-rw-r--r--block/blk-merge.c29
-rw-r--r--block/blk-mq-debugfs.c1
-rw-r--r--block/blk-mq-sched.c46
-rw-r--r--block/blk-mq-sched.h2
-rw-r--r--block/blk-mq-sysfs.c6
-rw-r--r--block/blk-mq-tag.c14
-rw-r--r--block/blk-mq.c380
-rw-r--r--block/blk-mq.h46
-rw-r--r--block/blk-stat.c10
-rw-r--r--block/blk-stat.h45
-rw-r--r--block/blk-sysfs.c80
-rw-r--r--block/blk-throttle.c35
-rw-r--r--block/blk-timeout.c6
-rw-r--r--block/blk-wbt.c129
-rw-r--r--block/blk-wbt.h55
-rw-r--r--block/blk-zoned.c8
-rw-r--r--block/blk.h5
-rw-r--r--block/bounce.c52
-rw-r--r--block/bsg-lib.c6
-rw-r--r--block/bsg.c44
-rw-r--r--block/cfq-iosched.c66
-rw-r--r--block/deadline-iosched.c3
-rw-r--r--block/elevator.c101
-rw-r--r--block/genhd.c77
-rw-r--r--block/kyber-iosched.c199
-rw-r--r--block/mq-deadline.c3
-rw-r--r--block/partition-generic.c36
-rw-r--r--block/scsi_ioctl.c10
-rw-r--r--crypto/af_alg.c14
-rw-r--r--crypto/algif_aead.c4
-rw-r--r--crypto/algif_hash.c2
-rw-r--r--crypto/algif_rng.c1
-rw-r--r--crypto/algif_skcipher.c4
-rw-r--r--crypto/proc.c14
-rw-r--r--drivers/acpi/ac.c21
-rw-r--r--drivers/acpi/acpica/acnamesp.h4
-rw-r--r--drivers/acpi/acpica/exconfig.c14
-rw-r--r--drivers/acpi/acpica/nsinit.c76
-rw-r--r--drivers/acpi/battery.c121
-rw-r--r--drivers/acpi/button.c19
-rw-r--r--drivers/amba/bus.c5
-rw-r--r--drivers/ata/ahci.c7
-rw-r--r--drivers/ata/ahci.h8
-rw-r--r--drivers/ata/ahci_mvebu.c56
-rw-r--r--drivers/ata/ahci_qoriq.c2
-rw-r--r--drivers/ata/ahci_xgene.c4
-rw-r--r--drivers/ata/libahci.c20
-rw-r--r--drivers/ata/libahci_platform.c24
-rw-r--r--drivers/ata/libata-core.c13
-rw-r--r--drivers/ata/libata-eh.c55
-rw-r--r--drivers/ata/sata_highbank.c2
-rw-r--r--drivers/ata/sata_sil24.c4
-rw-r--r--drivers/atm/firestream.c2
-rw-r--r--drivers/atm/zatm.c7
-rw-r--r--drivers/base/cpu.c8
-rw-r--r--drivers/base/dma-mapping.c31
-rw-r--r--drivers/base/node.c5
-rw-r--r--drivers/base/platform-msi.c3
-rw-r--r--drivers/base/platform.c18
-rw-r--r--drivers/base/power/main.c7
-rw-r--r--drivers/base/regmap/regmap-mmio.c3
-rw-r--r--drivers/base/regmap/regmap-slimbus.c2
-rw-r--r--drivers/bcma/driver_mips.c2
-rw-r--r--drivers/bcma/main.c2
-rw-r--r--drivers/block/DAC960.c60
-rw-r--r--drivers/block/DAC960.h1
-rw-r--r--drivers/block/aoe/aoeblk.c11
-rw-r--r--drivers/block/aoe/aoecmd.c3
-rw-r--r--drivers/block/brd.c10
-rw-r--r--drivers/block/drbd/drbd_bitmap.c5
-rw-r--r--drivers/block/drbd/drbd_debugfs.c20
-rw-r--r--drivers/block/drbd/drbd_int.h12
-rw-r--r--drivers/block/drbd/drbd_main.c73
-rw-r--r--drivers/block/drbd/drbd_proc.c34
-rw-r--r--drivers/block/drbd/drbd_receiver.c6
-rw-r--r--drivers/block/drbd/drbd_req.c4
-rw-r--r--drivers/block/drbd/drbd_req.h2
-rw-r--r--drivers/block/floppy.c2
-rw-r--r--drivers/block/loop.c18
-rw-r--r--drivers/block/loop.h1
-rw-r--r--drivers/block/mtip32xx/mtip32xx.c29
-rw-r--r--drivers/block/nbd.c77
-rw-r--r--drivers/block/null_blk.c36
-rw-r--r--drivers/block/paride/pd.c2
-rw-r--r--drivers/block/pktcdvd.c74
-rw-r--r--drivers/block/ps3disk.c2
-rw-r--r--drivers/block/ps3vram.c17
-rw-r--r--drivers/block/rbd.c48
-rw-r--r--drivers/block/rsxx/core.c6
-rw-r--r--drivers/block/sx8.c2
-rw-r--r--drivers/block/virtio_blk.c8
-rw-r--r--drivers/block/xen-blkback/blkback.c2
-rw-r--r--drivers/block/xen-blkback/xenbus.c4
-rw-r--r--drivers/block/xen-blkfront.c7
-rw-r--r--drivers/bluetooth/btusb.c19
-rw-r--r--drivers/bus/fsl-mc/fsl-mc-msi.c2
-rw-r--r--drivers/cdrom/cdrom.c2
-rw-r--r--drivers/char/agp/uninorth-agp.c4
-rw-r--r--drivers/char/apm-emulation.c15
-rw-r--r--drivers/char/ds1620.c14
-rw-r--r--drivers/char/efirtc.c15
-rw-r--r--drivers/char/misc.c15
-rw-r--r--drivers/char/nvram.c15
-rw-r--r--drivers/char/pcmcia/synclink_cs.c15
-rw-r--r--drivers/char/random.c29
-rw-r--r--drivers/char/rtc.c19
-rw-r--r--drivers/char/toshiba.c15
-rw-r--r--drivers/clk/Kconfig6
-rw-r--r--drivers/clk/clk-cs2000-cp.c2
-rw-r--r--drivers/clk/clk-mux.c10
-rw-r--r--drivers/clk/clk-stm32mp1.c54
-rw-r--r--drivers/clk/clk.c7
-rw-r--r--drivers/clk/imx/clk-imx6ul.c2
-rw-r--r--drivers/clk/meson/clk-regmap.c11
-rw-r--r--drivers/clk/meson/gxbb-aoclk.h2
-rw-r--r--drivers/clk/meson/meson8b.c5
-rw-r--r--drivers/connector/connector.c15
-rw-r--r--drivers/cpufreq/Kconfig.arm2
-rw-r--r--drivers/cpufreq/cppc_cpufreq.c46
-rw-r--r--drivers/crypto/inside-secure/safexcel.c4
-rw-r--r--drivers/dma/qcom/bam_dma.c18
-rw-r--r--drivers/dma/qcom/hidma_mgmt.c2
-rw-r--r--drivers/firmware/arm_scmi/driver.c1
-rw-r--r--drivers/firmware/efi/Kconfig5
-rw-r--r--drivers/firmware/efi/Makefile1
-rw-r--r--drivers/firmware/efi/capsule-loader.c14
-rw-r--r--drivers/firmware/efi/cper-arm.c6
-rw-r--r--drivers/firmware/efi/cper-x86.c356
-rw-r--r--drivers/firmware/efi/cper.c16
-rw-r--r--drivers/firmware/efi/libstub/arm64-stub.c10
-rw-r--r--drivers/firmware/efi/libstub/secureboot.c3
-rw-r--r--drivers/firmware/efi/libstub/tpm.c2
-rw-r--r--drivers/firmware/qcom_scm-32.c8
-rw-r--r--drivers/gpio/gpio-aspeed.c2
-rw-r--r--drivers/gpio/gpio-pci-idio-16.c8
-rw-r--r--drivers/gpio/gpio-pcie-idio-24.c22
-rw-r--r--drivers/gpio/gpiolib.c7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c6
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c44
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c20
-rw-r--r--drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c86
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc_link_ddc.c10
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc_surface.c14
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/dc_link_ddc.h5
-rw-r--r--drivers/gpu/drm/amd/display/include/dal_asic_id.h9
-rw-r--r--drivers/gpu/drm/amd/display/modules/color/color_gamma.c72
-rw-r--r--drivers/gpu/drm/amd/include/atomfirmware.h170
-rw-r--r--drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c52
-rw-r--r--drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.h1
-rw-r--r--drivers/gpu/drm/amd/powerplay/hwmgr/smu7_powertune.c4
-rw-r--r--drivers/gpu/drm/bridge/Kconfig1
-rw-r--r--drivers/gpu/drm/bridge/dumb-vga-dac.c4
-rw-r--r--drivers/gpu/drm/bridge/synopsys/dw-hdmi.c15
-rw-r--r--drivers/gpu/drm/drm_atomic.c8
-rw-r--r--drivers/gpu/drm/drm_dp_helper.c1
-rw-r--r--drivers/gpu/drm/drm_drv.c2
-rw-r--r--drivers/gpu/drm/drm_dumb_buffers.c7
-rw-r--r--drivers/gpu/drm/drm_file.c1
-rw-r--r--drivers/gpu/drm/exynos/exynos_hdmi.c2
-rw-r--r--drivers/gpu/drm/exynos/exynos_mixer.c22
-rw-r--r--drivers/gpu/drm/exynos/regs-mixer.h1
-rw-r--r--drivers/gpu/drm/i915/i915_gem_userptr.c3
-rw-r--r--drivers/gpu/drm/i915/i915_query.c15
-rw-r--r--drivers/gpu/drm/i915/i915_reg.h3
-rw-r--r--drivers/gpu/drm/i915/intel_cdclk.c41
-rw-r--r--drivers/gpu/drm/i915/intel_csr.c1
-rw-r--r--drivers/gpu/drm/i915/intel_display.c2
-rw-r--r--drivers/gpu/drm/i915/intel_dp.c20
-rw-r--r--drivers/gpu/drm/i915/intel_engine_cs.c4
-rw-r--r--drivers/gpu/drm/i915/intel_lrc.c1
-rw-r--r--drivers/gpu/drm/i915/intel_lvds.c54
-rw-r--r--drivers/gpu/drm/meson/meson_dw_hdmi.c2
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_bo.c1
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_bo.h2
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_ttm.c6
-rw-r--r--drivers/gpu/drm/nouveau/nv50_display.c7
-rw-r--r--drivers/gpu/drm/omapdrm/dss/dispc.c20
-rw-r--r--drivers/gpu/drm/omapdrm/dss/hdmi4.c2
-rw-r--r--drivers/gpu/drm/omapdrm/dss/hdmi4_core.c7
-rw-r--r--drivers/gpu/drm/omapdrm/dss/hdmi5.c2
-rw-r--r--drivers/gpu/drm/omapdrm/dss/sdi.c5
-rw-r--r--drivers/gpu/drm/omapdrm/omap_connector.c10
-rw-r--r--drivers/gpu/drm/omapdrm/omap_dmm_tiler.c6
-rw-r--r--drivers/gpu/drm/omapdrm/tcm-sita.c2
-rw-r--r--drivers/gpu/drm/rcar-du/rcar_lvds.c3
-rw-r--r--drivers/gpu/drm/ttm/ttm_page_alloc.c11
-rw-r--r--drivers/gpu/drm/ttm/ttm_page_alloc_dma.c3
-rw-r--r--drivers/gpu/drm/vc4/vc4_crtc.c46
-rw-r--r--drivers/gpu/drm/vc4/vc4_dpi.c25
-rw-r--r--drivers/gpu/drm/vc4/vc4_drv.c1
-rw-r--r--drivers/gpu/drm/vc4/vc4_plane.c2
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_drv.c5
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_drv.h1
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_fb.c53
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_kms.c14
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_msg.c48
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_msg.h25
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c2
-rw-r--r--drivers/gpu/host1x/bus.c9
-rw-r--r--drivers/hid/Kconfig7
-rw-r--r--drivers/hid/hid-ids.h9
-rw-r--r--drivers/hid/hid-lenovo.c36
-rw-r--r--drivers/hid/i2c-hid/i2c-hid.c2
-rw-r--r--drivers/hid/intel-ish-hid/ishtp-hid-client.c36
-rw-r--r--drivers/hid/intel-ish-hid/ishtp/bus.c2
-rw-r--r--drivers/hid/wacom_sys.c4
-rw-r--r--drivers/hwmon/Kconfig9
-rw-r--r--drivers/hwmon/asus_atk0110.c117
-rw-r--r--drivers/hwmon/fschmd.c2
-rw-r--r--drivers/hwmon/hwmon.c3
-rw-r--r--drivers/hwmon/k10temp.c109
-rw-r--r--drivers/hwmon/ltc2990.c212
-rw-r--r--drivers/hwmon/mc13783-adc.c60
-rw-r--r--drivers/hwtracing/intel_th/msu.c6
-rw-r--r--drivers/hwtracing/stm/core.c7
-rw-r--r--drivers/i2c/busses/i2c-designware-master.c5
-rw-r--r--drivers/i2c/busses/i2c-ocores.c2
-rw-r--r--drivers/i2c/busses/i2c-pmcmsp.c4
-rw-r--r--drivers/i2c/busses/i2c-viperboard.c2
-rw-r--r--drivers/i2c/i2c-core-acpi.c13
-rw-r--r--drivers/ide/ide-atapi.c2
-rw-r--r--drivers/ide/ide-cd.c17
-rw-r--r--drivers/ide/ide-cd_ioctl.c2
-rw-r--r--drivers/ide/ide-devsets.c2
-rw-r--r--drivers/ide/ide-disk.c2
-rw-r--r--drivers/ide/ide-disk_proc.c62
-rw-r--r--drivers/ide/ide-dma.c2
-rw-r--r--drivers/ide/ide-floppy_proc.c17
-rw-r--r--drivers/ide/ide-ioctls.c4
-rw-r--r--drivers/ide/ide-lib.c26
-rw-r--r--drivers/ide/ide-park.c4
-rw-r--r--drivers/ide/ide-pm.c5
-rw-r--r--drivers/ide/ide-probe.c6
-rw-r--r--drivers/ide/ide-proc.c182
-rw-r--r--drivers/ide/ide-tape.c21
-rw-r--r--drivers/ide/ide-taskfile.c4
-rw-r--r--drivers/iio/adc/Kconfig1
-rw-r--r--drivers/iio/adc/ad7793.c75
-rw-r--r--drivers/iio/adc/at91-sama5d2_adc.c41
-rw-r--r--drivers/iio/adc/stm32-dfsdm-adc.c17
-rw-r--r--drivers/iio/buffer/industrialio-buffer-dma.c2
-rw-r--r--drivers/iio/buffer/kfifo_buf.c11
-rw-r--r--drivers/iio/common/hid-sensors/hid-sensor-trigger.c8
-rw-r--r--drivers/infiniband/Kconfig5
-rw-r--r--drivers/infiniband/core/cache.c57
-rw-r--r--drivers/infiniband/core/cma.c60
-rw-r--r--drivers/infiniband/core/iwpm_util.c5
-rw-r--r--drivers/infiniband/core/mad.c4
-rw-r--r--drivers/infiniband/core/roce_gid_mgmt.c26
-rw-r--r--drivers/infiniband/core/ucma.c44
-rw-r--r--drivers/infiniband/core/umem.c7
-rw-r--r--drivers/infiniband/core/uverbs_cmd.c6
-rw-r--r--drivers/infiniband/core/uverbs_ioctl.c9
-rw-r--r--drivers/infiniband/core/uverbs_std_types_flow_action.c12
-rw-r--r--drivers/infiniband/core/verbs.c1
-rw-r--r--drivers/infiniband/hw/bnxt_re/main.c55
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_fp.c94
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_fp.h3
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_rcfw.c61
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_rcfw.h3
-rw-r--r--drivers/infiniband/hw/cxgb4/cq.c11
-rw-r--r--drivers/infiniband/hw/cxgb4/device.c9
-rw-r--r--drivers/infiniband/hw/cxgb4/iw_cxgb4.h6
-rw-r--r--drivers/infiniband/hw/cxgb4/mem.c4
-rw-r--r--drivers/infiniband/hw/cxgb4/qp.c4
-rw-r--r--drivers/infiniband/hw/cxgb4/resource.c26
-rw-r--r--drivers/infiniband/hw/hfi1/affinity.c11
-rw-r--r--drivers/infiniband/hw/hfi1/chip.c4
-rw-r--r--drivers/infiniband/hw/hfi1/driver.c19
-rw-r--r--drivers/infiniband/hw/hfi1/hfi.h8
-rw-r--r--drivers/infiniband/hw/hfi1/init.c43
-rw-r--r--drivers/infiniband/hw/hfi1/pcie.c3
-rw-r--r--drivers/infiniband/hw/hfi1/platform.c1
-rw-r--r--drivers/infiniband/hw/hfi1/qsfp.c2
-rw-r--r--drivers/infiniband/hw/hfi1/ruc.c50
-rw-r--r--drivers/infiniband/hw/hfi1/ud.c4
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_cq.c1
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hem.c12
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v1.c3
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v2.c81
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_main.c2
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_qp.c12
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw.h1
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_cm.c2
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_hw.c4
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_main.c7
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_verbs.c13
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_verbs.h1
-rw-r--r--drivers/infiniband/hw/mlx4/mr.c2
-rw-r--r--drivers/infiniband/hw/mlx4/qp.c3
-rw-r--r--drivers/infiniband/hw/mlx5/Kconfig1
-rw-r--r--drivers/infiniband/hw/mlx5/main.c11
-rw-r--r--drivers/infiniband/hw/mlx5/mr.c32
-rw-r--r--drivers/infiniband/hw/mlx5/qp.c40
-rw-r--r--drivers/infiniband/hw/nes/nes_nic.c2
-rw-r--r--drivers/infiniband/hw/qedr/verbs.c60
-rw-r--r--drivers/infiniband/sw/rxe/rxe_opcode.c2
-rw-r--r--drivers/infiniband/sw/rxe/rxe_req.c1
-rw-r--r--drivers/infiniband/sw/rxe/rxe_resp.c6
-rw-r--r--drivers/infiniband/sw/rxe/rxe_verbs.c10
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_main.c2
-rw-r--r--drivers/infiniband/ulp/srp/Kconfig2
-rw-r--r--drivers/infiniband/ulp/srpt/Kconfig2
-rw-r--r--drivers/input/input-leds.c8
-rw-r--r--drivers/input/misc/hp_sdc_rtc.c14
-rw-r--r--drivers/input/mouse/alps.c2
-rw-r--r--drivers/input/mouse/elan_i2c_smbus.c22
-rw-r--r--drivers/input/mouse/synaptics.c6
-rw-r--r--drivers/input/rmi4/rmi_spi.c7
-rw-r--r--drivers/input/touchscreen/Kconfig2
-rw-r--r--drivers/input/touchscreen/atmel_mxt_ts.c200
-rw-r--r--drivers/iommu/Kconfig1
-rw-r--r--drivers/iommu/amd_iommu.c2
-rw-r--r--drivers/iommu/dma-iommu.c54
-rw-r--r--drivers/iommu/dmar.c2
-rw-r--r--drivers/iommu/intel_irq_remapping.c2
-rw-r--r--drivers/iommu/rockchip-iommu.c11
-rw-r--r--drivers/irqchip/Makefile2
-rw-r--r--drivers/irqchip/irq-gic-v3-mbi.c331
-rw-r--r--drivers/irqchip/irq-gic-v3.c7
-rw-r--r--drivers/irqchip/irq-meson-gpio.c5
-rw-r--r--drivers/irqchip/irq-mvebu-gicp.c38
-rw-r--r--drivers/irqchip/irq-mvebu-gicp.h12
-rw-r--r--drivers/irqchip/irq-mvebu-icu.c33
-rw-r--r--drivers/irqchip/irq-stm32-exti.c683
-rw-r--r--drivers/irqchip/qcom-irq-combiner.c4
-rw-r--r--drivers/isdn/capi/capi.c30
-rw-r--r--drivers/isdn/capi/capidrv.c15
-rw-r--r--drivers/isdn/capi/kcapi.c3
-rw-r--r--drivers/isdn/capi/kcapi_proc.c80
-rw-r--r--drivers/isdn/gigaset/capi.c15
-rw-r--r--drivers/isdn/hardware/avm/avmcard.h4
-rw-r--r--drivers/isdn/hardware/avm/b1.c17
-rw-r--r--drivers/isdn/hardware/avm/b1dma.c17
-rw-r--r--drivers/isdn/hardware/avm/b1isa.c2
-rw-r--r--drivers/isdn/hardware/avm/b1pci.c4
-rw-r--r--drivers/isdn/hardware/avm/b1pcmcia.c2
-rw-r--r--drivers/isdn/hardware/avm/c4.c15
-rw-r--r--drivers/isdn/hardware/avm/t1isa.c2
-rw-r--r--drivers/isdn/hardware/avm/t1pci.c2
-rw-r--r--drivers/isdn/hardware/eicon/capimain.c15
-rw-r--r--drivers/isdn/hardware/eicon/diva.c22
-rw-r--r--drivers/isdn/hardware/eicon/diva.h5
-rw-r--r--drivers/isdn/hardware/eicon/diva_didd.c17
-rw-r--r--drivers/isdn/hardware/eicon/divasi.c17
-rw-r--r--drivers/isdn/hardware/eicon/divasmain.c18
-rw-r--r--drivers/isdn/hysdn/hycapi.c15
-rw-r--r--drivers/isdn/mISDN/socket.c3
-rw-r--r--drivers/lightnvm/core.c10
-rw-r--r--drivers/lightnvm/pblk-cache.c10
-rw-r--r--drivers/lightnvm/pblk-core.c233
-rw-r--r--drivers/lightnvm/pblk-gc.c112
-rw-r--r--drivers/lightnvm/pblk-init.c172
-rw-r--r--drivers/lightnvm/pblk-map.c33
-rw-r--r--drivers/lightnvm/pblk-rb.c48
-rw-r--r--drivers/lightnvm/pblk-read.c146
-rw-r--r--drivers/lightnvm/pblk-recovery.c121
-rw-r--r--drivers/lightnvm/pblk-rl.c29
-rw-r--r--drivers/lightnvm/pblk-sysfs.c15
-rw-r--r--drivers/lightnvm/pblk-write.c269
-rw-r--r--drivers/lightnvm/pblk.h58
-rw-r--r--drivers/macintosh/via-pmu.c57
-rw-r--r--drivers/md/bcache/alloc.c5
-rw-r--r--drivers/md/bcache/bcache.h18
-rw-r--r--drivers/md/bcache/bset.c13
-rw-r--r--drivers/md/bcache/bset.h2
-rw-r--r--drivers/md/bcache/btree.c4
-rw-r--r--drivers/md/bcache/debug.c7
-rw-r--r--drivers/md/bcache/io.c12
-rw-r--r--drivers/md/bcache/request.c23
-rw-r--r--drivers/md/bcache/super.c184
-rw-r--r--drivers/md/bcache/sysfs.c51
-rw-r--r--drivers/md/bcache/util.c35
-rw-r--r--drivers/md/bcache/util.h5
-rw-r--r--drivers/md/bcache/writeback.c4
-rw-r--r--drivers/md/dm-bio-prison-v1.c13
-rw-r--r--drivers/md/dm-bio-prison-v2.c13
-rw-r--r--drivers/md/dm-bufio.c5
-rw-r--r--drivers/md/dm-cache-background-tracker.c2
-rw-r--r--drivers/md/dm-cache-target.c25
-rw-r--r--drivers/md/dm-core.h4
-rw-r--r--drivers/md/dm-crypt.c59
-rw-r--r--drivers/md/dm-integrity.c17
-rw-r--r--drivers/md/dm-io.c29
-rw-r--r--drivers/md/dm-kcopyd.c22
-rw-r--r--drivers/md/dm-log-userspace-base.c19
-rw-r--r--drivers/md/dm-mpath.c3
-rw-r--r--drivers/md/dm-raid1.c10
-rw-r--r--drivers/md/dm-region-hash.c23
-rw-r--r--drivers/md/dm-rq.c4
-rw-r--r--drivers/md/dm-snap.c17
-rw-r--r--drivers/md/dm-thin.c32
-rw-r--r--drivers/md/dm-verity-fec.c55
-rw-r--r--drivers/md/dm-verity-fec.h8
-rw-r--r--drivers/md/dm-zoned-target.c13
-rw-r--r--drivers/md/dm.c62
-rw-r--r--drivers/md/md-faulty.c2
-rw-r--r--drivers/md/md-linear.c2
-rw-r--r--drivers/md/md-multipath.c17
-rw-r--r--drivers/md/md-multipath.h2
-rw-r--r--drivers/md/md.c61
-rw-r--r--drivers/md/md.h4
-rw-r--r--drivers/md/raid0.c5
-rw-r--r--drivers/md/raid1.c76
-rw-r--r--drivers/md/raid1.h6
-rw-r--r--drivers/md/raid10.c60
-rw-r--r--drivers/md/raid10.h6
-rw-r--r--drivers/md/raid5-cache.c43
-rw-r--r--drivers/md/raid5-ppl.c42
-rw-r--r--drivers/md/raid5.c12
-rw-r--r--drivers/md/raid5.h2
-rw-r--r--drivers/media/i2c/saa7115.c2
-rw-r--r--drivers/media/i2c/saa711x_regs.h2
-rw-r--r--drivers/media/i2c/tda7432.c2
-rw-r--r--drivers/media/i2c/tvp5150.c2
-rw-r--r--drivers/media/i2c/tvp5150_reg.h2
-rw-r--r--drivers/media/i2c/tvp7002.c2
-rw-r--r--drivers/media/i2c/tvp7002_reg.h2
-rw-r--r--drivers/media/media-devnode.c2
-rw-r--r--drivers/media/pci/bt8xx/bttv-audio-hook.c2
-rw-r--r--drivers/media/pci/bt8xx/bttv-audio-hook.h2
-rw-r--r--drivers/media/pci/bt8xx/bttv-cards.c4
-rw-r--r--drivers/media/pci/bt8xx/bttv-driver.c2
-rw-r--r--drivers/media/pci/bt8xx/bttv-i2c.c2
-rw-r--r--drivers/media/pci/cx23885/cx23885-input.c2
-rw-r--r--drivers/media/pci/cx88/cx88-alsa.c4
-rw-r--r--drivers/media/pci/cx88/cx88-blackbird.c2
-rw-r--r--drivers/media/pci/cx88/cx88-core.c2
-rw-r--r--drivers/media/pci/cx88/cx88-i2c.c2
-rw-r--r--drivers/media/pci/cx88/cx88-video.c2
-rw-r--r--drivers/media/pci/saa7164/saa7164-core.c14
-rw-r--r--drivers/media/pci/zoran/videocodec.c16
-rw-r--r--drivers/media/radio/radio-aimslab.c2
-rw-r--r--drivers/media/radio/radio-aztech.c2
-rw-r--r--drivers/media/radio/radio-gemtek.c2
-rw-r--r--drivers/media/radio/radio-maxiradio.c2
-rw-r--r--drivers/media/radio/radio-rtrack2.c2
-rw-r--r--drivers/media/radio/radio-sf16fmi.c2
-rw-r--r--drivers/media/radio/radio-terratec.c2
-rw-r--r--drivers/media/radio/radio-trust.c2
-rw-r--r--drivers/media/radio/radio-typhoon.c2
-rw-r--r--drivers/media/radio/radio-zoltrix.c2
-rw-r--r--drivers/media/rc/keymaps/rc-avermedia-m135a.c2
-rw-r--r--drivers/media/rc/keymaps/rc-encore-enltv-fm53.c2
-rw-r--r--drivers/media/rc/keymaps/rc-encore-enltv2.c2
-rw-r--r--drivers/media/rc/keymaps/rc-kaiomy.c2
-rw-r--r--drivers/media/rc/keymaps/rc-kworld-plus-tv-analog.c2
-rw-r--r--drivers/media/rc/keymaps/rc-pixelview-new.c2
-rw-r--r--drivers/media/tuners/tea5761.c4
-rw-r--r--drivers/media/tuners/tea5767.c4
-rw-r--r--drivers/media/tuners/tuner-xc2028-types.h2
-rw-r--r--drivers/media/tuners/tuner-xc2028.c4
-rw-r--r--drivers/media/tuners/tuner-xc2028.h2
-rw-r--r--drivers/media/usb/em28xx/em28xx-camera.c2
-rw-r--r--drivers/media/usb/em28xx/em28xx-cards.c2
-rw-r--r--drivers/media/usb/em28xx/em28xx-core.c4
-rw-r--r--drivers/media/usb/em28xx/em28xx-dvb.c4
-rw-r--r--drivers/media/usb/em28xx/em28xx-i2c.c2
-rw-r--r--drivers/media/usb/em28xx/em28xx-input.c2
-rw-r--r--drivers/media/usb/em28xx/em28xx-video.c4
-rw-r--r--drivers/media/usb/em28xx/em28xx.h2
-rw-r--r--drivers/media/usb/gspca/zc3xx-reg.h2
-rw-r--r--drivers/media/usb/tm6000/tm6000-cards.c2
-rw-r--r--drivers/media/usb/tm6000/tm6000-core.c2
-rw-r--r--drivers/media/usb/tm6000/tm6000-i2c.c2
-rw-r--r--drivers/media/usb/tm6000/tm6000-regs.h2
-rw-r--r--drivers/media/usb/tm6000/tm6000-usb-isoc.h2
-rw-r--r--drivers/media/usb/tm6000/tm6000-video.c2
-rw-r--r--drivers/media/usb/tm6000/tm6000.h2
-rw-r--r--drivers/media/v4l2-core/v4l2-dev.c4
-rw-r--r--drivers/media/v4l2-core/v4l2-ioctl.c2
-rw-r--r--drivers/media/v4l2-core/videobuf-core.c6
-rw-r--r--drivers/media/v4l2-core/videobuf-dma-contig.c2
-rw-r--r--drivers/media/v4l2-core/videobuf-dma-sg.c6
-rw-r--r--drivers/media/v4l2-core/videobuf-vmalloc.c4
-rw-r--r--drivers/memstick/core/ms_block.c6
-rw-r--r--drivers/memstick/core/mspro_block.c6
-rw-r--r--drivers/message/fusion/mptbase.c57
-rw-r--r--drivers/message/fusion/mptsas.c2
-rw-r--r--drivers/mfd/cros_ec_spi.c24
-rw-r--r--drivers/mfd/mc13xxx-core.c15
-rw-r--r--drivers/misc/cxl/cxl.h1
-rw-r--r--drivers/misc/cxl/pci.c12
-rw-r--r--drivers/misc/cxl/sysfs.c10
-rw-r--r--drivers/misc/eeprom/at24.c2
-rw-r--r--drivers/misc/sgi-gru/gruprocfs.c81
-rw-r--r--drivers/mmc/core/block.c14
-rw-r--r--drivers/mmc/core/queue.c5
-rw-r--r--drivers/mmc/core/sdio_uart.c15
-rw-r--r--drivers/mmc/host/sdhci-iproc.c33
-rw-r--r--drivers/mtd/devices/Kconfig1
-rw-r--r--drivers/mtd/devices/m25p80.c238
-rw-r--r--drivers/mtd/mtd_blkdevs.c20
-rw-r--r--drivers/mtd/mtdcore.c14
-rw-r--r--drivers/mtd/nand/onenand/omap2.c105
-rw-r--r--drivers/mtd/nand/raw/marvell_nand.c20
-rw-r--r--drivers/mtd/nand/raw/nand_base.c5
-rw-r--r--drivers/net/bonding/bond_alb.c15
-rw-r--r--drivers/net/bonding/bond_main.c2
-rw-r--r--drivers/net/bonding/bond_procfs.c36
-rw-r--r--drivers/net/can/dev.c2
-rw-r--r--drivers/net/can/flexcan.c26
-rw-r--r--drivers/net/can/spi/hi311x.c11
-rw-r--r--drivers/net/can/usb/kvaser_usb.c2
-rw-r--r--drivers/net/dsa/b53/b53_common.c13
-rw-r--r--drivers/net/dsa/b53/b53_mdio.c5
-rw-r--r--drivers/net/dsa/b53/b53_priv.h1
-rw-r--r--drivers/net/dsa/bcm_sf2_cfp.c36
-rw-r--r--drivers/net/dsa/mv88e6xxx/chip.c26
-rw-r--r--drivers/net/dsa/mv88e6xxx/chip.h1
-rw-r--r--drivers/net/dsa/mv88e6xxx/global2.c2
-rw-r--r--drivers/net/ethernet/3com/3c59x.c104
-rw-r--r--drivers/net/ethernet/8390/ne.c4
-rw-r--r--drivers/net/ethernet/amd/pcnet32.c10
-rw-r--r--drivers/net/ethernet/aquantia/atlantic/aq_nic.c3
-rw-r--r--drivers/net/ethernet/aquantia/atlantic/aq_nic.h1
-rw-r--r--drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c20
-rw-r--r--drivers/net/ethernet/broadcom/bcmsysport.c18
-rw-r--r--drivers/net/ethernet/broadcom/tg3.c9
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h28
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c88
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c7
-rw-r--r--drivers/net/ethernet/cisco/enic/enic_main.c8
-rw-r--r--drivers/net/ethernet/emulex/benet/be_main.c4
-rw-r--r--drivers/net/ethernet/freescale/fec_main.c1
-rw-r--r--drivers/net/ethernet/freescale/fec_ptp.c14
-rw-r--r--drivers/net/ethernet/freescale/ucc_geth_ethtool.c2
-rw-r--r--drivers/net/ethernet/ibm/ibmvnic.c50
-rw-r--r--drivers/net/ethernet/intel/ice/ice_controlq.c2
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c2
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_main.c9
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c3
-rw-r--r--drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c2
-rw-r--r--drivers/net/ethernet/marvell/mvpp2.c30
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_ethtool.c16
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_netdev.c8
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/icm.c16
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/intf.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/main.c6
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/mlx4_en.h7
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/qp.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c8
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_rep.c5
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_rx.c42
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_tc.c7
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_tx.c20
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eq.c28
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch.c11
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c12
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_core.c26
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/main.c8
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h2
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/core.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum.c5
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c12
-rw-r--r--drivers/net/ethernet/natsemi/sonic.c2
-rw-r--r--drivers/net/ethernet/netronome/nfp/bpf/main.c2
-rw-r--r--drivers/net/ethernet/netronome/nfp/flower/action.c10
-rw-r--r--drivers/net/ethernet/netronome/nfp/flower/cmsg.h5
-rw-r--r--drivers/net/ethernet/netronome/nfp/flower/main.c21
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_app_nic.c2
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_main.h4
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_net_main.c31
-rw-r--r--drivers/net/ethernet/ni/nixge.c10
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_cxt.c2
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_l2.c6
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_ll2.c63
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_main.c2
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_roce.c2
-rw-r--r--drivers/net/ethernet/qlogic/qede/qede_main.c3
-rw-r--r--drivers/net/ethernet/qlogic/qede/qede_rdma.c2
-rw-r--r--drivers/net/ethernet/realtek/8139too.c2
-rw-r--r--drivers/net/ethernet/realtek/r8169.c3
-rw-r--r--drivers/net/ethernet/renesas/sh_eth.h2
-rw-r--r--drivers/net/ethernet/sfc/ef10.c5
-rw-r--r--drivers/net/ethernet/sfc/efx.c5
-rw-r--r--drivers/net/ethernet/sfc/falcon/efx.c5
-rw-r--r--drivers/net/ethernet/sfc/rx.c2
-rw-r--r--drivers/net/ethernet/socionext/netsec.c4
-rw-r--r--drivers/net/ethernet/sun/niu.c5
-rw-r--r--drivers/net/ethernet/ti/cpsw.c2
-rw-r--r--drivers/net/ethernet/ti/davinci_emac.c22
-rw-r--r--drivers/net/hamradio/bpqether.c16
-rw-r--r--drivers/net/hamradio/scc.c17
-rw-r--r--drivers/net/hamradio/yam.c16
-rw-r--r--drivers/net/hyperv/netvsc_drv.c3
-rw-r--r--drivers/net/hyperv/rndis_filter.c2
-rw-r--r--drivers/net/ieee802154/atusb.c2
-rw-r--r--drivers/net/ieee802154/mcr20a.c15
-rw-r--r--drivers/net/ipvlan/ipvlan_main.c4
-rw-r--r--drivers/net/phy/bcm-cygnus.c6
-rw-r--r--drivers/net/phy/bcm-phy-lib.c2
-rw-r--r--drivers/net/phy/bcm-phy-lib.h7
-rw-r--r--drivers/net/phy/bcm7xxx.c4
-rw-r--r--drivers/net/phy/broadcom.c10
-rw-r--r--drivers/net/phy/micrel.c31
-rw-r--r--drivers/net/phy/phy_device.c11
-rw-r--r--drivers/net/phy/sfp-bus.c2
-rw-r--r--drivers/net/ppp/ppp_generic.c27
-rw-r--r--drivers/net/ppp/pppoe.c20
-rw-r--r--drivers/net/ppp/pptp.c1
-rw-r--r--drivers/net/tun.c61
-rw-r--r--drivers/net/usb/cdc_mbim.c2
-rw-r--r--drivers/net/usb/qmi_wwan.c14
-rw-r--r--drivers/net/virtio_net.c21
-rw-r--r--drivers/net/vmxnet3/vmxnet3_drv.c72
-rw-r--r--drivers/net/vmxnet3/vmxnet3_int.h8
-rw-r--r--drivers/net/wireless/atmel/atmel.c15
-rw-r--r--drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.c36
-rw-r--r--drivers/net/wireless/intel/iwlwifi/fw/api/scan.h13
-rw-r--r--drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c111
-rw-r--r--drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.h6
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c3
-rw-r--r--drivers/net/wireless/intel/iwlwifi/pcie/trans.c10
-rw-r--r--drivers/net/wireless/intersil/hostap/hostap_ap.c86
-rw-r--r--drivers/net/wireless/intersil/hostap/hostap_hw.c17
-rw-r--r--drivers/net/wireless/intersil/hostap/hostap_proc.c143
-rw-r--r--drivers/net/wireless/mac80211_hwsim.c5
-rw-r--r--drivers/net/wireless/ralink/rt2x00/rt2x00queue.c7
-rw-r--r--drivers/net/wireless/ray_cs.c15
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtcoutsrc.c15
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/rtl8723be/hw.c11
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/wifi.h5
-rw-r--r--drivers/nubus/proc.c51
-rw-r--r--drivers/nvdimm/claim.c3
-rw-r--r--drivers/nvdimm/pmem.c6
-rw-r--r--drivers/nvme/host/Kconfig2
-rw-r--r--drivers/nvme/host/core.c196
-rw-r--r--drivers/nvme/host/fabrics.c106
-rw-r--r--drivers/nvme/host/fabrics.h4
-rw-r--r--drivers/nvme/host/fc.c15
-rw-r--r--drivers/nvme/host/multipath.c24
-rw-r--r--drivers/nvme/host/nvme.h26
-rw-r--r--drivers/nvme/host/pci.c270
-rw-r--r--drivers/nvme/host/rdma.c12
-rw-r--r--drivers/nvme/host/trace.h4
-rw-r--r--drivers/nvme/target/Kconfig2
-rw-r--r--drivers/nvme/target/Makefile4
-rw-r--r--drivers/nvme/target/admin-cmd.c143
-rw-r--r--drivers/nvme/target/core.c128
-rw-r--r--drivers/nvme/target/discovery.c2
-rw-r--r--drivers/nvme/target/fabrics-cmd.c4
-rw-r--r--drivers/nvme/target/fc.c2
-rw-r--r--drivers/nvme/target/io-cmd-bdev.c (renamed from drivers/nvme/target/io-cmd.c)77
-rw-r--r--drivers/nvme/target/io-cmd-file.c304
-rw-r--r--drivers/nvme/target/loop.c58
-rw-r--r--drivers/nvme/target/nvmet.h51
-rw-r--r--drivers/of/device.c6
-rw-r--r--drivers/of/of_reserved_mem.c2
-rw-r--r--drivers/of/overlay.c30
-rw-r--r--drivers/parisc/Kconfig5
-rw-r--r--drivers/parisc/ccio-dma.c40
-rw-r--r--drivers/parisc/sba_iommu.c34
-rw-r--r--drivers/pci/Kconfig4
-rw-r--r--drivers/pci/bus.c4
-rw-r--r--drivers/pci/msi.c3
-rw-r--r--drivers/pci/pci-driver.c33
-rw-r--r--drivers/pci/pci.c37
-rw-r--r--drivers/pci/proc.c17
-rw-r--r--drivers/pinctrl/intel/pinctrl-cherryview.c16
-rw-r--r--drivers/pinctrl/intel/pinctrl-sunrisepoint.c45
-rw-r--r--drivers/pinctrl/meson/pinctrl-meson-axg.c2
-rw-r--r--drivers/pinctrl/stm32/pinctrl-stm32.c13
-rw-r--r--drivers/platform/chrome/Kconfig11
-rw-r--r--drivers/platform/chrome/Makefile1
-rw-r--r--drivers/platform/chrome/chromeos_laptop.c315
-rw-r--r--drivers/platform/chrome/chromeos_tbmc.c111
-rw-r--r--drivers/platform/chrome/cros_ec_lightbar.c21
-rw-r--r--drivers/platform/chrome/cros_ec_lpc.c13
-rw-r--r--drivers/platform/chrome/cros_ec_proto.c2
-rw-r--r--drivers/platform/chrome/cros_ec_sysfs.c2
-rw-r--r--drivers/platform/chrome/cros_ec_vbc.c9
-rw-r--r--drivers/platform/x86/Kconfig4
-rw-r--r--drivers/platform/x86/asus-wireless.c4
-rw-r--r--drivers/platform/x86/asus-wmi.c23
-rw-r--r--drivers/platform/x86/toshiba_acpi.c17
-rw-r--r--drivers/pnp/pnpbios/proc.c78
-rw-r--r--drivers/remoteproc/qcom_q6v5_pil.c2
-rw-r--r--drivers/remoteproc/remoteproc_core.c4
-rw-r--r--drivers/reset/reset-uniphier.c6
-rw-r--r--drivers/rpmsg/rpmsg_char.c2
-rw-r--r--drivers/rtc/rtc-proc.c33
-rw-r--r--drivers/s390/block/dasd.c13
-rw-r--r--drivers/s390/block/dasd_proc.c17
-rw-r--r--drivers/s390/char/tape_proc.c19
-rw-r--r--drivers/s390/cio/qdio_setup.c12
-rw-r--r--drivers/s390/cio/vfio_ccw_cp.c13
-rw-r--r--drivers/s390/scsi/zfcp_dbf.c23
-rw-r--r--drivers/s390/scsi/zfcp_ext.h5
-rw-r--r--drivers/s390/scsi/zfcp_scsi.c14
-rw-r--r--drivers/sbus/char/Kconfig7
-rw-r--r--drivers/sbus/char/Makefile1
-rw-r--r--drivers/sbus/char/jsflash.c658
-rw-r--r--drivers/sbus/char/oradax.c2
-rw-r--r--drivers/scsi/Makefile2
-rw-r--r--drivers/scsi/aacraid/commsup.c8
-rw-r--r--drivers/scsi/gdth.c2
-rw-r--r--drivers/scsi/isci/port_config.c3
-rw-r--r--drivers/scsi/libiscsi.c6
-rw-r--r--drivers/scsi/megaraid.c140
-rw-r--r--drivers/scsi/megaraid.h12
-rw-r--r--drivers/scsi/megaraid/megaraid_sas_base.c2
-rw-r--r--drivers/scsi/mvumi.c2
-rw-r--r--drivers/scsi/osd/osd_initiator.c24
-rw-r--r--drivers/scsi/osst.c2
-rw-r--r--drivers/scsi/qla4xxx/ql4_os.c2
-rw-r--r--drivers/scsi/scsi_error.c10
-rw-r--r--drivers/scsi/scsi_lib.c28
-rw-r--r--drivers/scsi/scsi_transport_fc.c16
-rw-r--r--drivers/scsi/scsi_transport_iscsi.c2
-rw-r--r--drivers/scsi/scsi_transport_sas.c19
-rw-r--r--drivers/scsi/scsi_transport_srp.c26
-rw-r--r--drivers/scsi/sg.c128
-rw-r--r--drivers/scsi/sr_ioctl.c10
-rw-r--r--drivers/scsi/st.c2
-rw-r--r--drivers/scsi/storvsc_drv.c7
-rw-r--r--drivers/scsi/ufs/ufshcd.c6
-rw-r--r--drivers/scsi/vmw_pvscsi.c2
-rw-r--r--drivers/soc/lantiq/gphy.c36
-rw-r--r--drivers/spi/Kconfig27
-rw-r--r--drivers/spi/Makefile2
-rw-r--r--drivers/spi/internals.h43
-rw-r--r--drivers/spi/spi-bcm-qspi.c190
-rw-r--r--drivers/spi/spi-bcm2835aux.c5
-rw-r--r--drivers/spi/spi-bcm53xx.c360
-rw-r--r--drivers/spi/spi-bcm53xx.h73
-rw-r--r--drivers/spi/spi-bcm63xx-hsspi.c25
-rw-r--r--drivers/spi/spi-cadence.c14
-rw-r--r--drivers/spi/spi-fsl-lpspi.c21
-rw-r--r--drivers/spi/spi-imx.c24
-rw-r--r--drivers/spi/spi-mem.c410
-rw-r--r--drivers/spi/spi-meson-spicc.c11
-rw-r--r--drivers/spi/spi-mpc52xx.c2
-rw-r--r--drivers/spi/spi-mxs.c48
-rw-r--r--drivers/spi/spi-omap2-mcspi.c111
-rw-r--r--drivers/spi/spi-pxa2xx-dma.c28
-rw-r--r--drivers/spi/spi-pxa2xx.c257
-rw-r--r--drivers/spi/spi-pxa2xx.h19
-rw-r--r--drivers/spi/spi-s3c64xx.c160
-rw-r--r--drivers/spi/spi-sh-msiof.c72
-rw-r--r--drivers/spi/spi-stm32.c2
-rw-r--r--drivers/spi/spi-ti-qspi.c87
-rw-r--r--drivers/spi/spi-zynqmp-gqspi.c92
-rw-r--r--drivers/spi/spi.c145
-rw-r--r--drivers/ssb/Kconfig4
-rw-r--r--drivers/staging/comedi/drivers/serial2002.c4
-rw-r--r--drivers/staging/comedi/proc.c18
-rw-r--r--drivers/staging/fwserial/fwserial.c15
-rw-r--r--drivers/staging/ipx/af_ipx.c2
-rw-r--r--drivers/staging/ipx/ipx_proc.c45
-rw-r--r--drivers/staging/media/imx/imx-media-csi.c2
-rw-r--r--drivers/staging/ncpfs/dir.c42
-rw-r--r--drivers/staging/rtl8192u/r8192U_core.c67
-rw-r--r--drivers/target/target_core_iblock.c24
-rw-r--r--drivers/target/target_core_iblock.h2
-rw-r--r--drivers/target/target_core_pscsi.c3
-rw-r--r--drivers/target/target_core_user.c2
-rw-r--r--drivers/tee/tee_core.c11
-rw-r--r--drivers/tee/tee_shm.c5
-rw-r--r--drivers/thermal/int340x_thermal/int3403_thermal.c3
-rw-r--r--drivers/thermal/samsung/exynos_tmu.c14
-rw-r--r--drivers/thunderbolt/icm.c2
-rw-r--r--drivers/tty/amiserial.c15
-rw-r--r--drivers/tty/cyclades.c15
-rw-r--r--drivers/tty/serial/serial_core.c15
-rw-r--r--drivers/tty/synclink.c15
-rw-r--r--drivers/tty/synclink_gt.c15
-rw-r--r--drivers/tty/synclinkmp.c15
-rw-r--r--drivers/tty/tty_ldisc.c15
-rw-r--r--drivers/usb/core/config.c4
-rw-r--r--drivers/usb/dwc2/core.h2
-rw-r--r--drivers/usb/dwc2/gadget.c21
-rw-r--r--drivers/usb/dwc2/hcd.c13
-rw-r--r--drivers/usb/dwc2/pci.c4
-rw-r--r--drivers/usb/dwc3/gadget.c4
-rw-r--r--drivers/usb/gadget/function/f_phonet.c2
-rw-r--r--drivers/usb/gadget/udc/at91_udc.c16
-rw-r--r--drivers/usb/gadget/udc/fsl_udc_core.c18
-rw-r--r--drivers/usb/gadget/udc/goku_udc.c18
-rw-r--r--drivers/usb/gadget/udc/omap_udc.c15
-rw-r--r--drivers/usb/host/ehci-mem.c3
-rw-r--r--drivers/usb/host/ehci-sched.c6
-rw-r--r--drivers/usb/host/xhci-hub.c2
-rw-r--r--drivers/usb/host/xhci.c1
-rw-r--r--drivers/usb/musb/musb_gadget.c3
-rw-r--r--drivers/usb/musb/musb_host.c9
-rw-r--r--drivers/usb/musb/musb_host.h7
-rw-r--r--drivers/usb/musb/musb_virthub.c25
-rw-r--r--drivers/usb/serial/option.c5
-rw-r--r--drivers/usb/serial/usb-serial.c15
-rw-r--r--drivers/usb/serial/visor.c69
-rw-r--r--drivers/usb/typec/tcpm.c1
-rw-r--r--drivers/usb/typec/tps6598x.c47
-rw-r--r--drivers/usb/usbip/stub.h2
-rw-r--r--drivers/usb/usbip/stub_dev.c43
-rw-r--r--drivers/usb/usbip/stub_main.c105
-rw-r--r--drivers/vfio/vfio_iommu_type1.c25
-rw-r--r--drivers/vfio/virqfd.c2
-rw-r--r--drivers/vhost/net.c37
-rw-r--r--drivers/vhost/vhost.c5
-rw-r--r--drivers/video/fbdev/core/fbmem.c15
-rw-r--r--drivers/video/fbdev/via/viafbdev.c17
-rw-r--r--drivers/w1/w1_io.c1
-rw-r--r--drivers/xen/swiotlb-xen.c2
-rw-r--r--drivers/zorro/proc.c17
-rw-r--r--drivers/zorro/zorro.c3
-rw-r--r--fs/9p/vfs_inode.c35
-rw-r--r--fs/Kconfig2
-rw-r--r--fs/adfs/dir.c24
-rw-r--r--fs/affs/namei.c15
-rw-r--r--fs/affs/super.c1
-rw-r--r--fs/afs/addr_list.c25
-rw-r--r--fs/afs/callback.c84
-rw-r--r--fs/afs/cmservice.c67
-rw-r--r--fs/afs/dir.c54
-rw-r--r--fs/afs/file.c2
-rw-r--r--fs/afs/flock.c6
-rw-r--r--fs/afs/fsclient.c31
-rw-r--r--fs/afs/inode.c19
-rw-r--r--fs/afs/internal.h25
-rw-r--r--fs/afs/proc.c134
-rw-r--r--fs/afs/rotate.c20
-rw-r--r--fs/afs/rxrpc.c18
-rw-r--r--fs/afs/security.c17
-rw-r--r--fs/afs/server.c21
-rw-r--r--fs/afs/server_list.c7
-rw-r--r--fs/afs/super.c4
-rw-r--r--fs/afs/vlclient.c19
-rw-r--r--fs/afs/write.c2
-rw-r--r--fs/aio.c736
-rw-r--r--fs/attr.c36
-rw-r--r--fs/befs/linuxvfs.c17
-rw-r--r--fs/bfs/dir.c43
-rw-r--r--fs/block_dev.c24
-rw-r--r--fs/btrfs/btrfs_inode.h22
-rw-r--r--fs/btrfs/compression.c7
-rw-r--r--fs/btrfs/compression.h2
-rw-r--r--fs/btrfs/ctree.c145
-rw-r--r--fs/btrfs/ctree.h78
-rw-r--r--fs/btrfs/delayed-inode.c9
-rw-r--r--fs/btrfs/delayed-ref.c275
-rw-r--r--fs/btrfs/delayed-ref.h5
-rw-r--r--fs/btrfs/dev-replace.c150
-rw-r--r--fs/btrfs/disk-io.c417
-rw-r--r--fs/btrfs/extent-tree.c260
-rw-r--r--fs/btrfs/extent_io.c87
-rw-r--r--fs/btrfs/extent_io.h20
-rw-r--r--fs/btrfs/extent_map.c6
-rw-r--r--fs/btrfs/extent_map.h3
-rw-r--r--fs/btrfs/free-space-cache.c6
-rw-r--r--fs/btrfs/free-space-tree.c192
-rw-r--r--fs/btrfs/free-space-tree.h8
-rw-r--r--fs/btrfs/inode.c1377
-rw-r--r--fs/btrfs/ioctl.c1132
-rw-r--r--fs/btrfs/locking.c34
-rw-r--r--fs/btrfs/lzo.c76
-rw-r--r--fs/btrfs/ordered-data.c14
-rw-r--r--fs/btrfs/print-tree.c21
-rw-r--r--fs/btrfs/props.c12
-rw-r--r--fs/btrfs/qgroup.c69
-rw-r--r--fs/btrfs/raid56.c38
-rw-r--r--fs/btrfs/relocation.c10
-rw-r--r--fs/btrfs/scrub.c1
-rw-r--r--fs/btrfs/send.c50
-rw-r--r--fs/btrfs/super.c7
-rw-r--r--fs/btrfs/sysfs.c52
-rw-r--r--fs/btrfs/sysfs.h4
-rw-r--r--fs/btrfs/tests/btrfs-tests.c4
-rw-r--r--fs/btrfs/tests/btrfs-tests.h6
-rw-r--r--fs/btrfs/tests/extent-buffer-tests.c56
-rw-r--r--fs/btrfs/tests/extent-io-tests.c75
-rw-r--r--fs/btrfs/tests/extent-map-tests.c90
-rw-r--r--fs/btrfs/tests/free-space-tests.c177
-rw-r--r--fs/btrfs/tests/free-space-tree-tests.c129
-rw-r--r--fs/btrfs/tests/inode-tests.c312
-rw-r--r--fs/btrfs/tests/qgroup-tests.c100
-rw-r--r--fs/btrfs/transaction.c15
-rw-r--r--fs/btrfs/transaction.h1
-rw-r--r--fs/btrfs/tree-log.c172
-rw-r--r--fs/btrfs/uuid-tree.c10
-rw-r--r--fs/btrfs/volumes.c515
-rw-r--r--fs/btrfs/volumes.h24
-rw-r--r--fs/cachefiles/namei.c10
-rw-r--r--fs/cachefiles/proc.c19
-rw-r--r--fs/ceph/file.c205
-rw-r--r--fs/cifs/Kconfig2
-rw-r--r--fs/cifs/Makefile7
-rw-r--r--fs/cifs/cifs_debug.c58
-rw-r--r--fs/cifs/cifs_debug.h2
-rw-r--r--fs/cifs/cifs_fs_sb.h1
-rw-r--r--fs/cifs/cifsfs.c61
-rw-r--r--fs/cifs/cifsfs.h2
-rw-r--r--fs/cifs/cifsglob.h47
-rw-r--r--fs/cifs/cifsproto.h16
-rw-r--r--fs/cifs/cifssmb.c23
-rw-r--r--fs/cifs/connect.c101
-rw-r--r--fs/cifs/dir.c40
-rw-r--r--fs/cifs/file.c75
-rw-r--r--fs/cifs/inode.c13
-rw-r--r--fs/cifs/misc.c7
-rw-r--r--fs/cifs/netmisc.c2
-rw-r--r--fs/cifs/readdir.c6
-rw-r--r--fs/cifs/smb2glob.h5
-rw-r--r--fs/cifs/smb2inode.c43
-rw-r--r--fs/cifs/smb2maperror.c11
-rw-r--r--fs/cifs/smb2misc.c126
-rw-r--r--fs/cifs/smb2ops.c320
-rw-r--r--fs/cifs/smb2pdu.c413
-rw-r--r--fs/cifs/smb2pdu.h84
-rw-r--r--fs/cifs/smb2proto.h9
-rw-r--r--fs/cifs/smb2transport.c10
-rw-r--r--fs/cifs/trace.c18
-rw-r--r--fs/cifs/trace.h429
-rw-r--r--fs/cifs/transport.c4
-rw-r--r--fs/cramfs/inode.c7
-rw-r--r--fs/dax.c2
-rw-r--r--fs/dcache.c227
-rw-r--r--fs/direct-io.c4
-rw-r--r--fs/dlm/lowcomms.c16
-rw-r--r--fs/ecryptfs/inode.c3
-rw-r--r--fs/eventfd.c15
-rw-r--r--fs/eventpoll.c5
-rw-r--r--fs/exofs/ore.c10
-rw-r--r--fs/exofs/super.c2
-rw-r--r--fs/ext2/inode.c10
-rw-r--r--fs/ext2/namei.c6
-rw-r--r--fs/ext4/ext4.h2
-rw-r--r--fs/ext4/mballoc.c29
-rw-r--r--fs/ext4/namei.c6
-rw-r--r--fs/ext4/sysfs.c49
-rw-r--r--fs/f2fs/namei.c12
-rw-r--r--fs/f2fs/sysfs.c29
-rw-r--r--fs/fat/namei_msdos.c4
-rw-r--r--fs/fat/namei_vfat.c13
-rw-r--r--fs/fcntl.c15
-rw-r--r--fs/filesystems.c14
-rw-r--r--fs/freevxfs/vxfs_lookup.c8
-rw-r--r--fs/fs-writeback.c2
-rw-r--r--fs/fscache/histogram.c17
-rw-r--r--fs/fscache/internal.h5
-rw-r--r--fs/fscache/proc.c8
-rw-r--r--fs/fscache/stats.c17
-rw-r--r--fs/gfs2/aops.c69
-rw-r--r--fs/gfs2/bmap.c428
-rw-r--r--fs/gfs2/bmap.h6
-rw-r--r--fs/gfs2/file.c6
-rw-r--r--fs/gfs2/incore.h3
-rw-r--r--fs/gfs2/inode.c4
-rw-r--r--fs/gfs2/log.h7
-rw-r--r--fs/gfs2/ops_fstype.c19
-rw-r--r--fs/gfs2/quota.c5
-rw-r--r--fs/gfs2/rgrp.c2
-rw-r--r--fs/gfs2/trans.c27
-rw-r--r--fs/hfs/dir.c20
-rw-r--r--fs/hfs/inode.c4
-rw-r--r--fs/hfsplus/dir.c3
-rw-r--r--fs/hfsplus/super.c1
-rw-r--r--fs/inode.c1
-rw-r--r--fs/internal.h1
-rw-r--r--fs/ioctl.c4
-rw-r--r--fs/jffs2/dir.c12
-rw-r--r--fs/jfs/jfs_debug.c43
-rw-r--r--fs/jfs/jfs_debug.h10
-rw-r--r--fs/jfs/jfs_logmgr.c14
-rw-r--r--fs/jfs/jfs_metapage.c14
-rw-r--r--fs/jfs/jfs_txnmgr.c28
-rw-r--r--fs/jfs/jfs_xtree.c14
-rw-r--r--fs/jfs/namei.c12
-rw-r--r--fs/kernfs/mount.c1
-rw-r--r--fs/locks.c16
-rw-r--r--fs/minix/namei.c8
-rw-r--r--fs/namei.c35
-rw-r--r--fs/namespace.c4
-rw-r--r--fs/nfs/client.c43
-rw-r--r--fs/nfsd/blocklayout.c2
-rw-r--r--fs/nfsd/vfs.c22
-rw-r--r--fs/nilfs2/namei.c6
-rw-r--r--fs/ocfs2/cluster/heartbeat.c11
-rw-r--r--fs/ocfs2/refcounttree.c14
-rw-r--r--fs/omfs/dir.c7
-rw-r--r--fs/open.c44
-rw-r--r--fs/openpromfs/inode.c3
-rw-r--r--fs/orangefs/namei.c73
-rw-r--r--fs/pipe.c22
-rw-r--r--fs/proc/array.c48
-rw-r--r--fs/proc/base.c162
-rw-r--r--fs/proc/cmdline.c14
-rw-r--r--fs/proc/consoles.c14
-rw-r--r--fs/proc/devices.c14
-rw-r--r--fs/proc/fd.c138
-rw-r--r--fs/proc/generic.c151
-rw-r--r--fs/proc/internal.h17
-rw-r--r--fs/proc/interrupts.c14
-rw-r--r--fs/proc/kcore.c23
-rw-r--r--fs/proc/loadavg.c14
-rw-r--r--fs/proc/meminfo.c14
-rw-r--r--fs/proc/namespaces.c24
-rw-r--r--fs/proc/nommu.c14
-rw-r--r--fs/proc/proc_net.c104
-rw-r--r--fs/proc/proc_sysctl.c15
-rw-r--r--fs/proc/proc_tty.c22
-rw-r--r--fs/proc/self.c4
-rw-r--r--fs/proc/softirqs.c14
-rw-r--r--fs/proc/task_mmu.c4
-rw-r--r--fs/proc/thread_self.c4
-rw-r--r--fs/proc/uptime.c14
-rw-r--r--fs/proc/version.c14
-rw-r--r--fs/qnx4/namei.c8
-rw-r--r--fs/qnx6/namei.c8
-rw-r--r--fs/read_write.c6
-rw-r--r--fs/reiserfs/namei.c12
-rw-r--r--fs/reiserfs/procfs.c16
-rw-r--r--fs/romfs/super.c36
-rw-r--r--fs/select.c85
-rw-r--r--fs/seq_file.c5
-rw-r--r--fs/signalfd.c93
-rw-r--r--fs/super.c32
-rw-r--r--fs/sysfs/mount.c6
-rw-r--r--fs/sysv/namei.c9
-rw-r--r--fs/timerfd.c22
-rw-r--r--fs/ubifs/dir.c43
-rw-r--r--fs/udf/namei.c6
-rw-r--r--fs/ufs/namei.c6
-rw-r--r--fs/xattr.c4
-rw-r--r--fs/xfs/libxfs/xfs_attr.c9
-rw-r--r--fs/xfs/libxfs/xfs_bmap.c4
-rw-r--r--fs/xfs/libxfs/xfs_inode_buf.c21
-rw-r--r--fs/xfs/xfs_aops.c2
-rw-r--r--fs/xfs/xfs_aops.h2
-rw-r--r--fs/xfs/xfs_file.c24
-rw-r--r--fs/xfs/xfs_iops.c16
-rw-r--r--fs/xfs/xfs_stats.c33
-rw-r--r--fs/xfs/xfs_super.c11
-rw-r--r--include/asm-generic/atomic-long.h19
-rw-r--r--include/asm-generic/barrier.h27
-rw-r--r--include/asm-generic/dma-mapping.h9
-rw-r--r--include/asm-generic/pci.h8
-rw-r--r--include/asm-generic/qspinlock.h4
-rw-r--r--include/asm-generic/qspinlock_types.h32
-rw-r--r--include/crypto/if_alg.h3
-rw-r--r--include/drm/bridge/dw_hdmi.h2
-rw-r--r--include/dt-bindings/clock/stm32mp1-clks.h4
-rw-r--r--include/kvm/arm_vgic.h1
-rw-r--r--include/linux/aio.h2
-rw-r--r--include/linux/atalk.h7
-rw-r--r--include/linux/atomic.h2
-rw-r--r--include/linux/bio.h42
-rw-r--r--include/linux/blk-mq.h3
-rw-r--r--include/linux/blk_types.h51
-rw-r--r--include/linux/blkdev.h110
-rw-r--r--include/linux/bpf.h4
-rw-r--r--include/linux/bpf_verifier.h3
-rw-r--r--include/linux/brcmphy.h1
-rw-r--r--include/linux/bsg-lib.h3
-rw-r--r--include/linux/bsg.h6
-rw-r--r--include/linux/ceph/osd_client.h12
-rw-r--r--include/linux/clk-provider.h3
-rw-r--r--include/linux/compat.h7
-rw-r--r--include/linux/cper.h4
-rw-r--r--include/linux/cpu.h2
-rw-r--r--include/linux/dcache.h1
-rw-r--r--include/linux/delayacct.h2
-rw-r--r--include/linux/device.h11
-rw-r--r--include/linux/dma-debug.h6
-rw-r--r--include/linux/dma-direct.h7
-rw-r--r--include/linux/dma-iommu.h1
-rw-r--r--include/linux/dma-mapping.h19
-rw-r--r--include/linux/dma-noncoherent.h47
-rw-r--r--include/linux/efi.h14
-rw-r--r--include/linux/elevator.h2
-rw-r--r--include/linux/fs.h8
-rw-r--r--include/linux/genhd.h4
-rw-r--r--include/linux/gfp.h2
-rw-r--r--include/linux/hmm.h2
-rw-r--r--include/linux/ide.h8
-rw-r--r--include/linux/iio/buffer_impl.h6
-rw-r--r--include/linux/interrupt.h13
-rw-r--r--include/linux/iommu-helper.h13
-rw-r--r--include/linux/irq.h1
-rw-r--r--include/linux/irq_cpustat.h10
-rw-r--r--include/linux/irq_sim.h13
-rw-r--r--include/linux/irqchip/arm-gic-v3.h1
-rw-r--r--include/linux/irqdomain.h8
-rw-r--r--include/linux/isdn/capilli.h2
-rw-r--r--include/linux/kthread.h1
-rw-r--r--include/linux/kvm_host.h8
-rw-r--r--include/linux/libata.h2
-rw-r--r--include/linux/lightnvm.h2
-rw-r--r--include/linux/memory_hotplug.h3
-rw-r--r--include/linux/mempool.h34
-rw-r--r--include/linux/memremap.h4
-rw-r--r--include/linux/mfd/cros_ec.h2
-rw-r--r--include/linux/mfd/mc13xxx.h2
-rw-r--r--include/linux/mlx5/driver.h20
-rw-r--r--include/linux/mm.h9
-rw-r--r--include/linux/mmu_notifier.h2
-rw-r--r--include/linux/msi.h2
-rw-r--r--include/linux/mtd/map.h2
-rw-r--r--include/linux/mtd/rawnand.h16
-rw-r--r--include/linux/mutex.h3
-rw-r--r--include/linux/net.h1
-rw-r--r--include/linux/node.h8
-rw-r--r--include/linux/nospec.h10
-rw-r--r--include/linux/nvme.h16
-rw-r--r--include/linux/of_device.h8
-rw-r--r--include/linux/oom.h2
-rw-r--r--include/linux/pci.h2
-rw-r--r--include/linux/percpu-rwsem.h6
-rw-r--r--include/linux/perf_event.h10
-rw-r--r--include/linux/pktcdvd.h2
-rw-r--r--include/linux/platform_device.h2
-rw-r--r--include/linux/poll.h14
-rw-r--r--include/linux/proc_fs.h39
-rw-r--r--include/linux/ptrace.h1
-rw-r--r--include/linux/rbtree_augmented.h1
-rw-r--r--include/linux/rbtree_latch.h1
-rw-r--r--include/linux/rcupdate.h5
-rw-r--r--include/linux/rcutiny.h1
-rw-r--r--include/linux/rcutree.h2
-rw-r--r--include/linux/regmap.h19
-rw-r--r--include/linux/remoteproc.h2
-rw-r--r--include/linux/rwsem.h6
-rw-r--r--include/linux/sbitmap.h36
-rw-r--r--include/linux/sched.h69
-rw-r--r--include/linux/sched/mm.h42
-rw-r--r--include/linux/sched/signal.h2
-rw-r--r--include/linux/seccomp.h5
-rw-r--r--include/linux/seq_file_net.h19
-rw-r--r--include/linux/signal.h3
-rw-r--r--include/linux/skbuff.h3
-rw-r--r--include/linux/spi/spi-mem.h249
-rw-r--r--include/linux/spi/spi.h60
-rw-r--r--include/linux/spinlock.h18
-rw-r--r--include/linux/srcu.h36
-rw-r--r--include/linux/string.h4
-rw-r--r--include/linux/sunrpc/rpc_pipe_fs.h2
-rw-r--r--include/linux/swait.h15
-rw-r--r--include/linux/swap.h2
-rw-r--r--include/linux/syscalls.h6
-rw-r--r--include/linux/tracehook.h1
-rw-r--r--include/linux/tty.h3
-rw-r--r--include/linux/tty_driver.h2
-rw-r--r--include/linux/uio.h15
-rw-r--r--include/linux/usb/composite.h2
-rw-r--r--include/linux/wait_bit.h17
-rw-r--r--include/linux/xattr.h1
-rw-r--r--include/media/i2c/tvp7002.h2
-rw-r--r--include/media/videobuf-core.h4
-rw-r--r--include/media/videobuf-dma-sg.h4
-rw-r--r--include/media/videobuf-vmalloc.h2
-rw-r--r--include/net/ax25.h5
-rw-r--r--include/net/bluetooth/bluetooth.h2
-rw-r--r--include/net/bonding.h1
-rw-r--r--include/net/busy_poll.h15
-rw-r--r--include/net/flow_dissector.h2
-rw-r--r--include/net/ip6_fib.h10
-rw-r--r--include/net/ip_vs.h12
-rw-r--r--include/net/iucv/af_iucv.h2
-rw-r--r--include/net/mac80211.h2
-rw-r--r--include/net/netfilter/nf_tables.h5
-rw-r--r--include/net/netrom.h5
-rw-r--r--include/net/phonet/pn_dev.h4
-rw-r--r--include/net/ping.h11
-rw-r--r--include/net/raw.h4
-rw-r--r--include/net/rose.h6
-rw-r--r--include/net/sctp/sctp.h5
-rw-r--r--include/net/sock.h2
-rw-r--r--include/net/tcp.h14
-rw-r--r--include/net/tls.h4
-rw-r--r--include/net/udp.h22
-rw-r--r--include/net/xfrm.h1
-rw-r--r--include/rdma/ib_umem.h1
-rw-r--r--include/rdma/uverbs_ioctl.h10
-rw-r--r--include/scsi/osd_initiator.h6
-rw-r--r--include/scsi/scsi_host.h2
-rw-r--r--include/trace/events/afs.h42
-rw-r--r--include/trace/events/btrfs.h323
-rw-r--r--include/trace/events/initcall.h14
-rw-r--r--include/trace/events/rcu.h13
-rw-r--r--include/trace/events/rxrpc.h85
-rw-r--r--include/trace/events/sched.h4
-rw-r--r--include/trace/events/sunrpc.h16
-rw-r--r--include/trace/events/xen.h16
-rw-r--r--include/uapi/asm-generic/siginfo.h3
-rw-r--r--include/uapi/asm-generic/unistd.h4
-rw-r--r--include/uapi/linux/aio_abi.h12
-rw-r--r--include/uapi/linux/bpf.h2
-rw-r--r--include/uapi/linux/btrfs.h97
-rw-r--r--include/uapi/linux/if_infiniband.h2
-rw-r--r--include/uapi/linux/netfilter/nf_conntrack_tcp.h3
-rw-r--r--include/uapi/linux/nl80211.h2
-rw-r--r--include/uapi/linux/ppp-ioctl.h2
-rw-r--r--include/uapi/linux/prctl.h12
-rw-r--r--include/uapi/linux/rds.h2
-rw-r--r--include/uapi/linux/seccomp.h5
-rw-r--r--include/uapi/linux/signalfd.h6
-rw-r--r--include/uapi/linux/tls.h2
-rw-r--r--include/uapi/linux/types.h4
-rw-r--r--include/uapi/rdma/cxgb3-abi.h2
-rw-r--r--include/uapi/rdma/cxgb4-abi.h2
-rw-r--r--include/uapi/rdma/hns-abi.h2
-rw-r--r--include/uapi/rdma/ib_user_cm.h2
-rw-r--r--include/uapi/rdma/ib_user_ioctl_verbs.h2
-rw-r--r--include/uapi/rdma/ib_user_mad.h2
-rw-r--r--include/uapi/rdma/ib_user_sa.h2
-rw-r--r--include/uapi/rdma/ib_user_verbs.h2
-rw-r--r--include/uapi/rdma/mlx4-abi.h2
-rw-r--r--include/uapi/rdma/mlx5-abi.h2
-rw-r--r--include/uapi/rdma/mthca-abi.h2
-rw-r--r--include/uapi/rdma/nes-abi.h2
-rw-r--r--include/uapi/rdma/qedr-abi.h2
-rw-r--r--include/uapi/rdma/rdma_user_cm.h2
-rw-r--r--include/uapi/rdma/rdma_user_ioctl.h2
-rw-r--r--include/uapi/rdma/rdma_user_rxe.h2
-rw-r--r--init/Kconfig2
-rw-r--r--init/main.c10
-rw-r--r--ipc/shm.c19
-rw-r--r--kernel/bpf/arraymap.c3
-rw-r--r--kernel/bpf/core.c100
-rw-r--r--kernel/bpf/sockmap.c117
-rw-r--r--kernel/bpf/syscall.c23
-rw-r--r--kernel/bpf/verifier.c145
-rw-r--r--kernel/cgroup/cgroup-internal.h2
-rw-r--r--kernel/cgroup/cgroup-v1.c14
-rw-r--r--kernel/cgroup/cgroup.c2
-rw-r--r--kernel/compat.c1
-rw-r--r--kernel/delayacct.c17
-rw-r--r--kernel/dma.c14
-rw-r--r--kernel/events/core.c44
-rw-r--r--kernel/events/ring_buffer.c7
-rw-r--r--kernel/events/uprobes.c7
-rw-r--r--kernel/exec_domain.c14
-rw-r--r--kernel/irq/irq_sim.c7
-rw-r--r--kernel/irq/msi.c33
-rw-r--r--kernel/irq/proc.c82
-rw-r--r--kernel/kthread.c48
-rw-r--r--kernel/locking/lockdep.c70
-rw-r--r--kernel/locking/lockdep_proc.c45
-rw-r--r--kernel/locking/mcs_spinlock.h10
-rw-r--r--kernel/locking/mutex.c3
-rw-r--r--kernel/locking/qspinlock.c247
-rw-r--r--kernel/locking/qspinlock_paravirt.h49
-rw-r--r--kernel/locking/qspinlock_stat.h9
-rw-r--r--kernel/locking/rwsem-xadd.c40
-rw-r--r--kernel/locking/rwsem.c2
-rw-r--r--kernel/locking/rwsem.h30
-rw-r--r--kernel/module.c5
-rw-r--r--kernel/power/swap.c14
-rw-r--r--kernel/rcu/rcu.h12
-rw-r--r--kernel/rcu/rcu_segcblist.c18
-rw-r--r--kernel/rcu/rcu_segcblist.h2
-rw-r--r--kernel/rcu/rcuperf.c2
-rw-r--r--kernel/rcu/rcutorture.c15
-rw-r--r--kernel/rcu/srcutiny.c9
-rw-r--r--kernel/rcu/srcutree.c30
-rw-r--r--kernel/rcu/tree.c364
-rw-r--r--kernel/rcu/tree.h36
-rw-r--r--kernel/rcu/tree_exp.h235
-rw-r--r--kernel/rcu/tree_plugin.h98
-rw-r--r--kernel/rcu/update.c50
-rw-r--r--kernel/resource.c43
-rw-r--r--kernel/sched/autogroup.c7
-rw-r--r--kernel/sched/core.c154
-rw-r--r--kernel/sched/cpufreq_schedutil.c33
-rw-r--r--kernel/sched/deadline.c10
-rw-r--r--kernel/sched/debug.c28
-rw-r--r--kernel/sched/fair.c176
-rw-r--r--kernel/sched/rt.c2
-rw-r--r--kernel/sched/sched.h13
-rw-r--r--kernel/sched/stats.c15
-rw-r--r--kernel/sched/topology.c2
-rw-r--r--kernel/seccomp.c21
-rw-r--r--kernel/signal.c198
-rw-r--r--kernel/softirq.c7
-rw-r--r--kernel/stop_machine.c43
-rw-r--r--kernel/sys.c28
-rw-r--r--kernel/sys_ni.c2
-rw-r--r--kernel/time/tick-broadcast.c8
-rw-r--r--kernel/time/timer_list.c16
-rw-r--r--kernel/torture.c2
-rw-r--r--kernel/trace/ftrace.c4
-rw-r--r--kernel/trace/trace.c12
-rw-r--r--kernel/trace/trace.h11
-rw-r--r--kernel/trace/trace_benchmark.c4
-rw-r--r--kernel/trace/trace_events_filter.c3
-rw-r--r--kernel/trace/trace_events_hist.c12
-rw-r--r--kernel/trace/trace_events_trigger.c15
-rw-r--r--kernel/trace/trace_stack.c2
-rw-r--r--kernel/trace/trace_uprobe.c35
-rw-r--r--kernel/tracepoint.c4
-rw-r--r--lib/Kconfig43
-rw-r--r--lib/Kconfig.debug19
-rw-r--r--lib/Makefile3
-rw-r--r--lib/dma-debug.c65
-rw-r--r--lib/dma-direct.c29
-rw-r--r--lib/dma-noncoherent.c102
-rw-r--r--lib/errseq.c23
-rw-r--r--lib/find_bit_benchmark.c7
-rw-r--r--lib/iommu-helper.c14
-rw-r--r--lib/iov_iter.c65
-rw-r--r--lib/radix-tree.c10
-rw-r--r--lib/sbitmap.c113
-rw-r--r--lib/swiotlb.c15
-rw-r--r--lib/test_bitmap.c21
-rw-r--r--lib/vsprintf.c26
-rw-r--r--mm/Kconfig10
-rw-r--r--mm/backing-dev.c21
-rw-r--r--mm/cleancache.c2
-rw-r--r--mm/cma.c83
-rw-r--r--mm/compaction.c4
-rw-r--r--mm/frontswap.c2
-rw-r--r--mm/gup.c3
-rw-r--r--mm/hmm.c2
-rw-r--r--mm/huge_memory.c6
-rw-r--r--mm/hugetlb.c4
-rw-r--r--mm/internal.h4
-rw-r--r--mm/kasan/kasan.c66
-rw-r--r--mm/ksm.c23
-rw-r--r--mm/memcontrol.c2
-rw-r--r--mm/memory_hotplug.c2
-rw-r--r--mm/mempool.c108
-rw-r--r--mm/migrate.c4
-rw-r--r--mm/mmap.c78
-rw-r--r--mm/oom_kill.c81
-rw-r--r--mm/page_alloc.c99
-rw-r--r--mm/rmap.c6
-rw-r--r--mm/sparse.c2
-rw-r--r--mm/swapfile.c7
-rw-r--r--mm/util.c2
-rw-r--r--mm/vmalloc.c21
-rw-r--r--mm/vmscan.c2
-rw-r--r--mm/vmstat.c62
-rw-r--r--mm/z3fold.c42
-rw-r--r--net/8021q/vlanproc.c39
-rw-r--r--net/9p/trans_common.c2
-rw-r--r--net/9p/trans_fd.c22
-rw-r--r--net/9p/trans_rdma.c4
-rw-r--r--net/9p/trans_virtio.c5
-rw-r--r--net/9p/trans_xen.c2
-rw-r--r--net/appletalk/aarp.c20
-rw-r--r--net/appletalk/atalk_proc.c51
-rw-r--r--net/appletalk/ddp.c2
-rw-r--r--net/atm/br2684.c14
-rw-r--r--net/atm/clip.c17
-rw-r--r--net/atm/common.c11
-rw-r--r--net/atm/common.h2
-rw-r--r--net/atm/lec.c24
-rw-r--r--net/atm/proc.c131
-rw-r--r--net/atm/pvc.c2
-rw-r--r--net/atm/svc.c2
-rw-r--r--net/ax25/af_ax25.c23
-rw-r--r--net/ax25/ax25_route.c15
-rw-r--r--net/ax25/ax25_uid.c15
-rw-r--r--net/batman-adv/multicast.c2
-rw-r--r--net/batman-adv/translation-table.c84
-rw-r--r--net/bluetooth/af_bluetooth.c47
-rw-r--r--net/bluetooth/bnep/sock.c1
-rw-r--r--net/bluetooth/cmtp/capi.c14
-rw-r--r--net/bluetooth/cmtp/sock.c1
-rw-r--r--net/bluetooth/hci_sock.c2
-rw-r--r--net/bluetooth/hidp/sock.c1
-rw-r--r--net/bluetooth/l2cap_sock.c2
-rw-r--r--net/bluetooth/rfcomm/sock.c2
-rw-r--r--net/bluetooth/sco.c2
-rw-r--r--net/bridge/br_if.c4
-rw-r--r--net/bridge/netfilter/ebt_stp.c4
-rw-r--r--net/bridge/netfilter/ebtables.c3
-rw-r--r--net/caif/caif_socket.c12
-rw-r--r--net/can/bcm.c18
-rw-r--r--net/can/proc.c127
-rw-r--r--net/can/raw.c2
-rw-r--r--net/ceph/osd_client.c27
-rw-r--r--net/compat.c6
-rw-r--r--net/core/datagram.c13
-rw-r--r--net/core/dev.c22
-rw-r--r--net/core/ethtool.c5
-rw-r--r--net/core/filter.c12
-rw-r--r--net/core/neighbour.c31
-rw-r--r--net/core/net-procfs.c65
-rw-r--r--net/core/net-sysfs.c6
-rw-r--r--net/core/sock.c24
-rw-r--r--net/dccp/ccids/ccid2.c14
-rw-r--r--net/dccp/dccp.h3
-rw-r--r--net/dccp/ipv4.c2
-rw-r--r--net/dccp/ipv6.c2
-rw-r--r--net/dccp/proto.c15
-rw-r--r--net/dccp/timer.c2
-rw-r--r--net/decnet/af_decnet.c23
-rw-r--r--net/decnet/dn_dev.c15
-rw-r--r--net/decnet/dn_neigh.c18
-rw-r--r--net/decnet/dn_route.c19
-rw-r--r--net/dsa/dsa2.c9
-rw-r--r--net/ieee802154/6lowpan/6lowpan_i.h4
-rw-r--r--net/ieee802154/6lowpan/reassembly.c14
-rw-r--r--net/ieee802154/socket.c4
-rw-r--r--net/ipv4/af_inet.c8
-rw-r--r--net/ipv4/arp.c17
-rw-r--r--net/ipv4/fib_frontend.c9
-rw-r--r--net/ipv4/fib_trie.c48
-rw-r--r--net/ipv4/igmp.c33
-rw-r--r--net/ipv4/ip_gre.c4
-rw-r--r--net/ipv4/ip_output.c3
-rw-r--r--net/ipv4/ip_sockglue.c2
-rw-r--r--net/ipv4/ip_tunnel.c8
-rw-r--r--net/ipv4/ipconfig.c14
-rw-r--r--net/ipv4/ipmr.c32
-rw-r--r--net/ipv4/ipmr_base.c5
-rw-r--r--net/ipv4/netfilter/ip_tables.c1
-rw-r--r--net/ipv4/netfilter/ipt_rpfilter.c2
-rw-r--r--net/ipv4/ping.c59
-rw-r--r--net/ipv4/proc.c48
-rw-r--r--net/ipv4/raw.c45
-rw-r--r--net/ipv4/route.c141
-rw-r--r--net/ipv4/tcp.c30
-rw-r--r--net/ipv4/tcp_bbr.c4
-rw-r--r--net/ipv4/tcp_ipv4.c82
-rw-r--r--net/ipv4/tcp_output.c7
-rw-r--r--net/ipv4/udp.c115
-rw-r--r--net/ipv4/udplite.c21
-rw-r--r--net/ipv6/Kconfig9
-rw-r--r--net/ipv6/addrconf.c16
-rw-r--r--net/ipv6/af_inet6.c4
-rw-r--r--net/ipv6/anycast.c16
-rw-r--r--net/ipv6/ip6_fib.c18
-rw-r--r--net/ipv6/ip6_flowlabel.c42
-rw-r--r--net/ipv6/ip6_gre.c286
-rw-r--r--net/ipv6/ip6_output.c3
-rw-r--r--net/ipv6/ip6_tunnel.c11
-rw-r--r--net/ipv6/ip6_vti.c4
-rw-r--r--net/ipv6/ip6mr.c32
-rw-r--r--net/ipv6/mcast.c34
-rw-r--r--net/ipv6/netfilter/ip6_tables.c1
-rw-r--r--net/ipv6/ping.c23
-rw-r--r--net/ipv6/proc.c48
-rw-r--r--net/ipv6/raw.c19
-rw-r--r--net/ipv6/route.c33
-rw-r--r--net/ipv6/seg6_iptunnel.c4
-rw-r--r--net/ipv6/sit.c5
-rw-r--r--net/ipv6/tcp_ipv6.c22
-rw-r--r--net/ipv6/udp.c27
-rw-r--r--net/ipv6/udplite.c22
-rw-r--r--net/ipv6/xfrm6_policy.c2
-rw-r--r--net/ipv6/xfrm6_tunnel.c3
-rw-r--r--net/iucv/af_iucv.c7
-rw-r--r--net/kcm/kcmproc.c83
-rw-r--r--net/kcm/kcmsock.c12
-rw-r--r--net/key/af_key.c63
-rw-r--r--net/l2tp/l2tp_ip.c2
-rw-r--r--net/l2tp/l2tp_ip6.c2
-rw-r--r--net/l2tp/l2tp_ppp.c24
-rw-r--r--net/llc/af_llc.c5
-rw-r--r--net/llc/llc_proc.c28
-rw-r--r--net/mac80211/agg-tx.c4
-rw-r--r--net/mac80211/mesh_plink.c8
-rw-r--r--net/mac80211/mlme.c27
-rw-r--r--net/mac80211/tx.c3
-rw-r--r--net/ncsi/ncsi-netlink.c2
-rw-r--r--net/netfilter/core.c3
-rw-r--r--net/netfilter/ipvs/ip_vs_app.c16
-rw-r--r--net/netfilter/ipvs/ip_vs_conn.c52
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c12
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c68
-rw-r--r--net/netfilter/nf_conntrack_expect.c17
-rw-r--r--net/netfilter/nf_conntrack_proto_tcp.c11
-rw-r--r--net/netfilter/nf_conntrack_standalone.c33
-rw-r--r--net/netfilter/nf_log.c19
-rw-r--r--net/netfilter/nf_synproxy_core.c17
-rw-r--r--net/netfilter/nf_tables_api.c85
-rw-r--r--net/netfilter/nf_tables_core.c17
-rw-r--r--net/netfilter/nfnetlink_acct.c2
-rw-r--r--net/netfilter/nfnetlink_cthelper.c7
-rw-r--r--net/netfilter/nfnetlink_log.c18
-rw-r--r--net/netfilter/nfnetlink_queue.c18
-rw-r--r--net/netfilter/nft_compat.c201
-rw-r--r--net/netfilter/nft_ct.c20
-rw-r--r--net/netfilter/nft_immediate.c15
-rw-r--r--net/netfilter/nft_limit.c38
-rw-r--r--net/netfilter/nft_meta.c14
-rw-r--r--net/netfilter/x_tables.c101
-rw-r--r--net/netfilter/xt_hashlimit.c92
-rw-r--r--net/netlink/af_netlink.c26
-rw-r--r--net/netrom/af_netrom.c20
-rw-r--r--net/netrom/nr_route.c29
-rw-r--r--net/nfc/llcp_sock.c9
-rw-r--r--net/nfc/rawsock.c4
-rw-r--r--net/nsh/nsh.c4
-rw-r--r--net/openvswitch/flow_netlink.c9
-rw-r--r--net/packet/af_packet.c30
-rw-r--r--net/phonet/pn_dev.c6
-rw-r--r--net/phonet/socket.c39
-rw-r--r--net/qrtr/qrtr.c2
-rw-r--r--net/rds/ib_cm.c3
-rw-r--r--net/rds/recv.c1
-rw-r--r--net/rfkill/rfkill-gpio.c7
-rw-r--r--net/rose/af_rose.c28
-rw-r--r--net/rose/rose_route.c44
-rw-r--r--net/rxrpc/af_rxrpc.c12
-rw-r--r--net/rxrpc/ar-internal.h5
-rw-r--r--net/rxrpc/conn_event.c11
-rw-r--r--net/rxrpc/input.c2
-rw-r--r--net/rxrpc/local_event.c3
-rw-r--r--net/rxrpc/local_object.c57
-rw-r--r--net/rxrpc/net_ns.c7
-rw-r--r--net/rxrpc/output.c34
-rw-r--r--net/rxrpc/peer_event.c46
-rw-r--r--net/rxrpc/proc.c31
-rw-r--r--net/rxrpc/rxkad.c6
-rw-r--r--net/rxrpc/sendmsg.c10
-rw-r--r--net/sched/act_skbedit.c3
-rw-r--r--net/sched/act_skbmod.c5
-rw-r--r--net/sched/act_vlan.c2
-rw-r--r--net/sched/cls_api.c4
-rw-r--r--net/sched/cls_flower.c2
-rw-r--r--net/sched/sch_api.c14
-rw-r--r--net/sched/sch_fq.c37
-rw-r--r--net/sched/sch_red.c5
-rw-r--r--net/sched/sch_tbf.c5
-rw-r--r--net/sctp/associola.c30
-rw-r--r--net/sctp/inqueue.c2
-rw-r--r--net/sctp/ipv6.c7
-rw-r--r--net/sctp/objcnt.c16
-rw-r--r--net/sctp/proc.c71
-rw-r--r--net/sctp/protocol.c4
-rw-r--r--net/sctp/sm_make_chunk.c2
-rw-r--r--net/sctp/sm_statefuns.c94
-rw-r--r--net/sctp/socket.c55
-rw-r--r--net/sctp/stream.c2
-rw-r--r--net/sctp/ulpevent.c1
-rw-r--r--net/smc/af_smc.c61
-rw-r--r--net/smc/smc_core.c22
-rw-r--r--net/smc/smc_core.h3
-rw-r--r--net/smc/smc_pnet.c71
-rw-r--r--net/socket.c55
-rw-r--r--net/sunrpc/rpc_pipe.c16
-rw-r--r--net/sunrpc/xprtrdma/fmr_ops.c5
-rw-r--r--net/sunrpc/xprtrdma/frwr_ops.c9
-rw-r--r--net/sunrpc/xprtrdma/verbs.c5
-rw-r--r--net/sunrpc/xprtrdma/xprt_rdma.h2
-rw-r--r--net/tipc/node.c17
-rw-r--r--net/tipc/socket.c17
-rw-r--r--net/tls/tls_main.c19
-rw-r--r--net/tls/tls_sw.c9
-rw-r--r--net/unix/af_unix.c47
-rw-r--r--net/vmw_vsock/af_vsock.c19
-rw-r--r--net/wireless/core.c3
-rw-r--r--net/wireless/nl80211.c4
-rw-r--r--net/wireless/reg.c4
-rw-r--r--net/wireless/wext-proc.c17
-rw-r--r--net/x25/af_x25.c2
-rw-r--r--net/x25/x25_proc.c48
-rw-r--r--net/xfrm/xfrm_policy.c5
-rw-r--r--net/xfrm/xfrm_proc.c16
-rw-r--r--net/xfrm/xfrm_state.c6
-rw-r--r--samples/bpf/Makefile2
-rw-r--r--samples/sockmap/Makefile7
-rw-r--r--scripts/Makefile.gcc-plugins2
-rw-r--r--scripts/Makefile.lib2
-rwxr-xr-xscripts/checkpatch.pl2
-rwxr-xr-xscripts/documentation-file-ref-check125
-rw-r--r--scripts/dtc/checks.c5
-rwxr-xr-xscripts/extract_xc3028.pl2
-rwxr-xr-xscripts/faddr2line5
-rw-r--r--scripts/genksyms/Makefile4
-rw-r--r--scripts/mod/sumversion.c9
-rwxr-xr-xscripts/spdxcheck.py284
-rwxr-xr-xscripts/split-man.pl2
-rw-r--r--security/commoncap.c8
-rw-r--r--security/integrity/evm/evm_crypto.c3
-rw-r--r--security/keys/proc.c34
-rw-r--r--security/selinux/hooks.c77
-rw-r--r--security/selinux/ss/services.c2
-rw-r--r--sound/core/control_compat.c3
-rw-r--r--sound/core/pcm_compat.c2
-rw-r--r--sound/core/seq/seq_virmidi.c4
-rw-r--r--sound/core/timer.c4
-rw-r--r--sound/drivers/aloop.c17
-rw-r--r--sound/firewire/amdtp-stream.c5
-rw-r--r--sound/pci/hda/hda_intel.c2
-rw-r--r--sound/pci/hda/hda_local.h6
-rw-r--r--sound/pci/hda/patch_realtek.c3
-rw-r--r--sound/usb/mixer.c8
-rw-r--r--sound/usb/stream.c9
-rw-r--r--tools/arch/arm/include/uapi/asm/kvm.h6
-rw-r--r--tools/arch/arm64/include/uapi/asm/kvm.h6
-rw-r--r--tools/arch/x86/include/asm/cpufeatures.h21
-rw-r--r--tools/bpf/Makefile2
-rw-r--r--tools/bpf/bpf_dbg.c7
-rw-r--r--tools/include/linux/compiler-gcc.h3
-rw-r--r--tools/include/linux/spinlock.h3
-rw-r--r--tools/include/uapi/linux/bpf.h2
-rw-r--r--tools/include/uapi/linux/kvm.h7
-rw-r--r--tools/include/uapi/linux/prctl.h12
-rw-r--r--tools/lib/api/fs/tracing_path.c40
-rw-r--r--tools/lib/api/fs/tracing_path.h9
-rw-r--r--tools/lib/bpf/libbpf.c2
-rw-r--r--tools/lib/symbol/kallsyms.c6
-rw-r--r--tools/lib/symbol/kallsyms.h2
-rw-r--r--tools/memory-model/Documentation/cheatsheet.txt7
-rw-r--r--tools/memory-model/Documentation/explanation.txt221
-rw-r--r--tools/memory-model/Documentation/references.txt17
-rw-r--r--tools/memory-model/README2
-rw-r--r--tools/memory-model/linux-kernel.bell4
-rw-r--r--tools/memory-model/linux-kernel.cat41
-rw-r--r--tools/memory-model/linux-kernel.def34
-rw-r--r--tools/memory-model/litmus-tests/.gitignore1
-rw-r--r--tools/memory-model/litmus-tests/IRIW+mbonceonces+OnceOnce.litmus2
-rw-r--r--tools/memory-model/litmus-tests/MP+polockmbonce+poacquiresilsil.litmus35
-rw-r--r--tools/memory-model/litmus-tests/MP+polockonce+poacquiresilsil.litmus34
-rw-r--r--tools/memory-model/litmus-tests/README19
-rw-r--r--tools/memory-model/litmus-tests/WRC+pooncerelease+rmbonceonce+Once.litmus4
-rw-r--r--tools/memory-model/lock.cat107
-rw-r--r--tools/memory-model/scripts/checkalllitmus.sh73
-rw-r--r--tools/memory-model/scripts/checklitmus.sh86
-rw-r--r--tools/objtool/arch/x86/include/asm/insn.h18
-rw-r--r--tools/objtool/check.c167
-rw-r--r--tools/objtool/elf.c42
-rw-r--r--tools/objtool/elf.h2
-rw-r--r--tools/perf/Documentation/Makefile29
-rw-r--r--tools/perf/Documentation/asciidoctor-extensions.rb29
-rw-r--r--tools/perf/Documentation/perf-buildid-cache.txt7
-rw-r--r--tools/perf/Documentation/perf-stat.txt16
-rw-r--r--tools/perf/Documentation/perf.data-file-format.txt10
-rw-r--r--tools/perf/Makefile.config14
-rw-r--r--tools/perf/Makefile.perf10
-rw-r--r--tools/perf/arch/arm/tests/dwarf-unwind.c2
-rw-r--r--tools/perf/arch/arm64/tests/dwarf-unwind.c2
-rw-r--r--tools/perf/arch/powerpc/tests/dwarf-unwind.c2
-rw-r--r--tools/perf/arch/powerpc/util/skip-callchain-idx.c3
-rw-r--r--tools/perf/arch/x86/tests/dwarf-unwind.c2
-rw-r--r--tools/perf/arch/x86/util/Build2
-rw-r--r--tools/perf/arch/x86/util/event.c76
-rw-r--r--tools/perf/arch/x86/util/machine.c103
-rw-r--r--tools/perf/bench/numa.c2
-rw-r--r--tools/perf/builtin-annotate.c9
-rw-r--r--tools/perf/builtin-buildid-cache.c81
-rw-r--r--tools/perf/builtin-inject.c4
-rw-r--r--tools/perf/builtin-kallsyms.c2
-rw-r--r--tools/perf/builtin-kmem.c6
-rw-r--r--tools/perf/builtin-report.c27
-rw-r--r--tools/perf/builtin-script.c56
-rw-r--r--tools/perf/builtin-stat.c92
-rw-r--r--tools/perf/builtin-timechart.c8
-rw-r--r--tools/perf/builtin-top.c9
-rw-r--r--tools/perf/builtin-trace.c11
-rwxr-xr-xtools/perf/check-headers.sh30
-rw-r--r--tools/perf/examples/bpf/5sec.c49
-rw-r--r--tools/perf/examples/bpf/empty.c3
-rw-r--r--tools/perf/include/bpf/bpf.h13
-rw-r--r--tools/perf/perf.c24
-rw-r--r--tools/perf/pmu-events/arch/x86/mapfile.csv1
-rw-r--r--tools/perf/tests/builtin-test.c9
-rw-r--r--tools/perf/tests/code-reading.c5
-rw-r--r--tools/perf/tests/hists_common.c6
-rw-r--r--tools/perf/tests/mmap-thread-lookup.c7
-rw-r--r--tools/perf/tests/parse-events.c22
-rwxr-xr-xtools/perf/tests/shell/record+probe_libc_inet_pton.sh12
-rw-r--r--tools/perf/tests/topology.c30
-rw-r--r--tools/perf/tests/vmlinux-kallsyms.c20
-rwxr-xr-xtools/perf/trace/beauty/prctl_option.sh2
-rw-r--r--tools/perf/ui/browsers/annotate.c8
-rw-r--r--tools/perf/ui/browsers/map.c2
-rw-r--r--tools/perf/ui/stdio/hist.c3
-rw-r--r--tools/perf/util/Build2
-rw-r--r--tools/perf/util/annotate.c60
-rw-r--r--tools/perf/util/annotate.h11
-rw-r--r--tools/perf/util/auxtrace.c12
-rw-r--r--tools/perf/util/bpf-loader.c6
-rw-r--r--tools/perf/util/build-id.c4
-rw-r--r--tools/perf/util/config.c16
-rw-r--r--tools/perf/util/config.h1
-rw-r--r--tools/perf/util/cs-etm-decoder/cs-etm-decoder.c12
-rw-r--r--tools/perf/util/cs-etm.c32
-rw-r--r--tools/perf/util/db-export.c7
-rw-r--r--tools/perf/util/dso.c34
-rw-r--r--tools/perf/util/dso.h37
-rw-r--r--tools/perf/util/env.c31
-rw-r--r--tools/perf/util/env.h3
-rw-r--r--tools/perf/util/event.c73
-rw-r--r--tools/perf/util/event.h8
-rw-r--r--tools/perf/util/evlist.c15
-rw-r--r--tools/perf/util/evlist.h3
-rw-r--r--tools/perf/util/evsel.c2
-rw-r--r--tools/perf/util/evsel.h1
-rw-r--r--tools/perf/util/genelf.c2
-rw-r--r--tools/perf/util/intel-bts.c3
-rw-r--r--tools/perf/util/intel-pt-decoder/insn.h18
-rw-r--r--tools/perf/util/intel-pt.c8
-rw-r--r--tools/perf/util/llvm-utils.c19
-rw-r--r--tools/perf/util/machine.c355
-rw-r--r--tools/perf/util/machine.h72
-rw-r--r--tools/perf/util/map.c121
-rw-r--r--tools/perf/util/map.h74
-rw-r--r--tools/perf/util/parse-events.c205
-rw-r--r--tools/perf/util/parse-events.h7
-rw-r--r--tools/perf/util/parse-events.y14
-rw-r--r--tools/perf/util/probe-event.c29
-rw-r--r--tools/perf/util/probe-file.c3
-rw-r--r--tools/perf/util/scripting-engines/trace-event-python.c2
-rw-r--r--tools/perf/util/session.c13
-rw-r--r--tools/perf/util/sort.c10
-rw-r--r--tools/perf/util/sort.h4
-rw-r--r--tools/perf/util/srcline.c1
-rw-r--r--tools/perf/util/stat.h3
-rw-r--r--tools/perf/util/symbol-elf.c494
-rw-r--r--tools/perf/util/symbol-minimal.c3
-rw-r--r--tools/perf/util/symbol.c264
-rw-r--r--tools/perf/util/symbol.h24
-rw-r--r--tools/perf/util/symbol_fprintf.c4
-rw-r--r--tools/perf/util/thread.c35
-rw-r--r--tools/perf/util/thread.h13
-rw-r--r--tools/perf/util/trace-event-info.c11
-rw-r--r--tools/perf/util/trace-event.c8
-rw-r--r--tools/perf/util/unwind-libdw.c23
-rw-r--r--tools/perf/util/unwind-libunwind-local.c19
-rw-r--r--tools/perf/util/util.c34
-rw-r--r--tools/perf/util/util.h4
-rw-r--r--tools/perf/util/vdso.c6
-rw-r--r--tools/power/acpi/Makefile.config1
-rw-r--r--tools/testing/radix-tree/Makefile6
-rw-r--r--tools/testing/radix-tree/idr-test.c7
-rw-r--r--tools/testing/radix-tree/multiorder.c63
-rw-r--r--tools/testing/radix-tree/test.c19
-rw-r--r--tools/testing/radix-tree/test.h3
-rw-r--r--tools/testing/selftests/bpf/config2
-rw-r--r--tools/testing/selftests/bpf/test_progs.c4
-rw-r--r--tools/testing/selftests/bpf/test_verifier.c5
-rw-r--r--tools/testing/selftests/kvm/Makefile2
-rw-r--r--tools/testing/selftests/kvm/include/test_util.h1
-rw-r--r--tools/testing/selftests/kvm/lib/kvm_util.c16
-rw-r--r--tools/testing/selftests/kvm/sync_regs_test.c40
-rw-r--r--tools/testing/selftests/kvm/vmx_tsc_adjust_test.c4
-rw-r--r--tools/testing/selftests/lib.mk8
-rw-r--r--tools/testing/selftests/net/Makefile3
-rw-r--r--tools/testing/selftests/net/config5
-rw-r--r--tools/testing/selftests/net/reuseport_bpf_numa.c4
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-find-errors.sh56
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh12
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-recheck.sh4
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh1
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/parse-console.sh115
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/parse-torture.sh105
-rw-r--r--tools/testing/selftests/seccomp/seccomp_bpf.c22
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json11
-rw-r--r--tools/testing/selftests/x86/Makefile2
-rw-r--r--tools/testing/selftests/x86/mov_ss_trap.c285
-rw-r--r--tools/testing/selftests/x86/mpx-mini-test.c7
-rw-r--r--tools/testing/selftests/x86/pkey-helpers.h20
-rw-r--r--tools/testing/selftests/x86/protection_keys.c254
-rw-r--r--tools/virtio/linux/dma-mapping.h2
-rw-r--r--virt/kvm/arm/mmu.c1
-rw-r--r--virt/kvm/arm/vgic/vgic-debug.c5
-rw-r--r--virt/kvm/arm/vgic/vgic-init.c2
-rw-r--r--virt/kvm/arm/vgic/vgic-its.c34
-rw-r--r--virt/kvm/arm/vgic/vgic-mmio.c10
-rw-r--r--virt/kvm/arm/vgic/vgic-v2.c38
-rw-r--r--virt/kvm/arm/vgic/vgic-v3.c53
-rw-r--r--virt/kvm/arm/vgic/vgic.c52
-rw-r--r--virt/kvm/arm/vgic/vgic.h14
-rw-r--r--virt/kvm/eventfd.c2
2441 files changed, 39440 insertions, 30415 deletions
diff --git a/Documentation/00-INDEX b/Documentation/00-INDEX
index 708dc4c166e4..2754fe83f0d4 100644
--- a/Documentation/00-INDEX
+++ b/Documentation/00-INDEX
@@ -64,8 +64,6 @@ auxdisplay/
- misc. LCD driver documentation (cfag12864b, ks0108).
backlight/
- directory with info on controlling backlights in flat panel displays
-bcache.txt
- - Block-layer cache on fast SSDs to improve slow (raid) I/O performance.
block/
- info on the Block I/O (BIO) layer.
blockdev/
@@ -78,18 +76,10 @@ bus-devices/
- directory with info on TI GPMC (General Purpose Memory Controller)
bus-virt-phys-mapping.txt
- how to access I/O mapped memory from within device drivers.
-cachetlb.txt
- - describes the cache/TLB flushing interfaces Linux uses.
cdrom/
- directory with information on the CD-ROM drivers that Linux has.
cgroup-v1/
- cgroups v1 features, including cpusets and memory controller.
-cgroup-v2.txt
- - cgroups v2 features, including cpusets and memory controller.
-circular-buffers.txt
- - how to make use of the existing circular buffer infrastructure
-clk.txt
- - info on the common clock framework
cma/
- Continuous Memory Area (CMA) debugfs interface.
conf.py
diff --git a/Documentation/ABI/stable/sysfs-devices-node b/Documentation/ABI/stable/sysfs-devices-node
index 5b2d0f08867c..3e90e1f3bf0a 100644
--- a/Documentation/ABI/stable/sysfs-devices-node
+++ b/Documentation/ABI/stable/sysfs-devices-node
@@ -90,4 +90,4 @@ Date: December 2009
Contact: Lee Schermerhorn <lee.schermerhorn@hp.com>
Description:
The node's huge page size control/query attributes.
- See Documentation/vm/hugetlbpage.txt \ No newline at end of file
+ See Documentation/admin-guide/mm/hugetlbpage.rst \ No newline at end of file
diff --git a/Documentation/ABI/testing/sysfs-class-cxl b/Documentation/ABI/testing/sysfs-class-cxl
index 640f65e79ef1..8e69345c37cc 100644
--- a/Documentation/ABI/testing/sysfs-class-cxl
+++ b/Documentation/ABI/testing/sysfs-class-cxl
@@ -244,3 +244,11 @@ Description: read only
Returns 1 if the psl timebase register is synchronized
with the core timebase register, 0 otherwise.
Users: https://github.com/ibm-capi/libcxl
+
+What: /sys/class/cxl/<card>/tunneled_ops_supported
+Date: May 2018
+Contact: linuxppc-dev@lists.ozlabs.org
+Description: read only
+ Returns 1 if tunneled operations are supported in capi mode,
+ 0 otherwise.
+Users: https://github.com/ibm-capi/libcxl
diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu
index 025b7cf3768d..bd4975e132d3 100644
--- a/Documentation/ABI/testing/sysfs-devices-system-cpu
+++ b/Documentation/ABI/testing/sysfs-devices-system-cpu
@@ -478,6 +478,7 @@ What: /sys/devices/system/cpu/vulnerabilities
/sys/devices/system/cpu/vulnerabilities/meltdown
/sys/devices/system/cpu/vulnerabilities/spectre_v1
/sys/devices/system/cpu/vulnerabilities/spectre_v2
+ /sys/devices/system/cpu/vulnerabilities/spec_store_bypass
Date: January 2018
Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
Description: Information about CPU vulnerabilities
diff --git a/Documentation/ABI/testing/sysfs-kernel-mm-hugepages b/Documentation/ABI/testing/sysfs-kernel-mm-hugepages
index e21c00571cf4..fdaa2162fae1 100644
--- a/Documentation/ABI/testing/sysfs-kernel-mm-hugepages
+++ b/Documentation/ABI/testing/sysfs-kernel-mm-hugepages
@@ -12,4 +12,4 @@ Description:
free_hugepages
surplus_hugepages
resv_hugepages
- See Documentation/vm/hugetlbpage.txt for details.
+ See Documentation/admin-guide/mm/hugetlbpage.rst for details.
diff --git a/Documentation/ABI/testing/sysfs-kernel-mm-ksm b/Documentation/ABI/testing/sysfs-kernel-mm-ksm
index 73e653ee2481..dfc13244cda3 100644
--- a/Documentation/ABI/testing/sysfs-kernel-mm-ksm
+++ b/Documentation/ABI/testing/sysfs-kernel-mm-ksm
@@ -40,7 +40,7 @@ Description: Kernel Samepage Merging daemon sysfs interface
sleep_millisecs: how many milliseconds ksm should sleep between
scans.
- See Documentation/vm/ksm.txt for more information.
+ See Documentation/vm/ksm.rst for more information.
What: /sys/kernel/mm/ksm/merge_across_nodes
Date: January 2013
diff --git a/Documentation/ABI/testing/sysfs-kernel-slab b/Documentation/ABI/testing/sysfs-kernel-slab
index 2cc0a72b64be..29601d93a1c2 100644
--- a/Documentation/ABI/testing/sysfs-kernel-slab
+++ b/Documentation/ABI/testing/sysfs-kernel-slab
@@ -37,7 +37,7 @@ Description:
The alloc_calls file is read-only and lists the kernel code
locations from which allocations for this cache were performed.
The alloc_calls file only contains information if debugging is
- enabled for that cache (see Documentation/vm/slub.txt).
+ enabled for that cache (see Documentation/vm/slub.rst).
What: /sys/kernel/slab/cache/alloc_fastpath
Date: February 2008
@@ -219,7 +219,7 @@ Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
Description:
The free_calls file is read-only and lists the locations of
object frees if slab debugging is enabled (see
- Documentation/vm/slub.txt).
+ Documentation/vm/slub.rst).
What: /sys/kernel/slab/cache/free_fastpath
Date: February 2008
diff --git a/Documentation/RCU/whatisRCU.txt b/Documentation/RCU/whatisRCU.txt
index a27fbfb0efb8..65eb856526b7 100644
--- a/Documentation/RCU/whatisRCU.txt
+++ b/Documentation/RCU/whatisRCU.txt
@@ -1,3 +1,5 @@
+What is RCU? -- "Read, Copy, Update"
+
Please note that the "What is RCU?" LWN series is an excellent place
to start learning about RCU:
diff --git a/Documentation/bcache.txt b/Documentation/admin-guide/bcache.rst
index c0ce64d75bbf..c0ce64d75bbf 100644
--- a/Documentation/bcache.txt
+++ b/Documentation/admin-guide/bcache.rst
diff --git a/Documentation/cgroup-v2.txt b/Documentation/admin-guide/cgroup-v2.rst
index 74cdeaed9f7a..74cdeaed9f7a 100644
--- a/Documentation/cgroup-v2.txt
+++ b/Documentation/admin-guide/cgroup-v2.rst
diff --git a/Documentation/admin-guide/index.rst b/Documentation/admin-guide/index.rst
index 5bb9161dbe6a..48d70af11652 100644
--- a/Documentation/admin-guide/index.rst
+++ b/Documentation/admin-guide/index.rst
@@ -48,6 +48,7 @@ configure specific aspects of kernel behavior to your liking.
:maxdepth: 1
initrd
+ cgroup-v2
serial-console
braille-console
parport
@@ -60,9 +61,11 @@ configure specific aspects of kernel behavior to your liking.
mono
java
ras
+ bcache
pm/index
thunderbolt
LSM/index
+ mm/index
.. only:: subproject and html
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 11fc28ecdb6d..9d699c85d8fe 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -106,11 +106,11 @@
use by PCI
Format: <irq>,<irq>...
- acpi_mask_gpe= [HW,ACPI]
+ acpi_mask_gpe= [HW,ACPI]
Due to the existence of _Lxx/_Exx, some GPEs triggered
by unsupported hardware/firmware features can result in
- GPE floodings that cannot be automatically disabled by
- the GPE dispatcher.
+ GPE floodings that cannot be automatically disabled by
+ the GPE dispatcher.
This facility can be used to prevent such uncontrolled
GPE floodings.
Format: <int>
@@ -472,10 +472,10 @@
for platform specific values (SB1, Loongson3 and
others).
- ccw_timeout_log [S390]
+ ccw_timeout_log [S390]
See Documentation/s390/CommonIO for details.
- cgroup_disable= [KNL] Disable a particular controller
+ cgroup_disable= [KNL] Disable a particular controller
Format: {name of the controller(s) to disable}
The effects of cgroup_disable=foo are:
- foo isn't auto-mounted if you mount all cgroups in
@@ -518,7 +518,7 @@
those clocks in any way. This parameter is useful for
debug and development, but should not be needed on a
platform with proper driver support. For more
- information, see Documentation/clk.txt.
+ information, see Documentation/driver-api/clk.rst.
clock= [BUGS=X86-32, HW] gettimeofday clocksource override.
[Deprecated]
@@ -587,11 +587,6 @@
Sets the size of memory pool for coherent, atomic dma
allocations, by default set to 256K.
- code_bytes [X86] How many bytes of object code to print
- in an oops report.
- Range: 0 - 8192
- Default: 64
-
com20020= [HW,NET] ARCnet - COM20020 chipset
Format:
<io>[,<irq>[,<nodeID>[,<backplane>[,<ckp>[,<timeout>]]]]]
@@ -641,8 +636,8 @@
hvc<n> Use the hypervisor console device <n>. This is for
both Xen and PowerPC hypervisors.
- If the device connected to the port is not a TTY but a braille
- device, prepend "brl," before the device type, for instance
+ If the device connected to the port is not a TTY but a braille
+ device, prepend "brl," before the device type, for instance
console=brl,ttyS0
For now, only VisioBraille is supported.
@@ -662,7 +657,7 @@
consoleblank= [KNL] The console blank (screen saver) timeout in
seconds. A value of 0 disables the blank timer.
- Defaults to 0.
+ Defaults to 0.
coredump_filter=
[KNL] Change the default value for
@@ -730,7 +725,7 @@
or memory reserved is below 4G.
cryptomgr.notests
- [KNL] Disable crypto self-tests
+ [KNL] Disable crypto self-tests
cs89x0_dma= [HW,NET]
Format: <dma>
@@ -746,7 +741,7 @@
Format: <port#>,<type>
See also Documentation/input/devices/joystick-parport.rst
- ddebug_query= [KNL,DYNAMIC_DEBUG] Enable debug messages at early boot
+ ddebug_query= [KNL,DYNAMIC_DEBUG] Enable debug messages at early boot
time. See
Documentation/admin-guide/dynamic-debug-howto.rst for
details. Deprecated, see dyndbg.
@@ -833,7 +828,7 @@
causing system reset or hang due to sending
INIT from AP to BSP.
- disable_ddw [PPC/PSERIES]
+ disable_ddw [PPC/PSERIES]
Disable Dynamic DMA Window support. Use this if
to workaround buggy firmware.
@@ -1188,7 +1183,7 @@
parameter will force ia64_sal_cache_flush to call
ia64_pal_cache_flush instead of SAL_CACHE_FLUSH.
- forcepae [X86-32]
+ forcepae [X86-32]
Forcefully enable Physical Address Extension (PAE).
Many Pentium M systems disable PAE but may have a
functionally usable PAE implementation.
@@ -1247,7 +1242,7 @@
gamma= [HW,DRM]
- gart_fix_e820= [X86_64] disable the fix e820 for K8 GART
+ gart_fix_e820= [X86_64] disable the fix e820 for K8 GART
Format: off | on
default: on
@@ -1341,23 +1336,32 @@
x86-64 are 2M (when the CPU supports "pse") and 1G
(when the CPU supports the "pdpe1gb" cpuinfo flag).
- hvc_iucv= [S390] Number of z/VM IUCV hypervisor console (HVC)
- terminal devices. Valid values: 0..8
- hvc_iucv_allow= [S390] Comma-separated list of z/VM user IDs.
- If specified, z/VM IUCV HVC accepts connections
- from listed z/VM user IDs only.
+ hung_task_panic=
+ [KNL] Should the hung task detector generate panics.
+ Format: <integer>
+ A nonzero value instructs the kernel to panic when a
+ hung task is detected. The default value is controlled
+ by the CONFIG_BOOTPARAM_HUNG_TASK_PANIC build-time
+ option. The value selected by this boot parameter can
+ be changed later by the kernel.hung_task_panic sysctl.
+
+ hvc_iucv= [S390] Number of z/VM IUCV hypervisor console (HVC)
+ terminal devices. Valid values: 0..8
+ hvc_iucv_allow= [S390] Comma-separated list of z/VM user IDs.
+ If specified, z/VM IUCV HVC accepts connections
+ from listed z/VM user IDs only.
keep_bootcon [KNL]
Do not unregister boot console at start. This is only
useful for debugging when something happens in the window
between unregistering the boot console and initializing
the real console.
- i2c_bus= [HW] Override the default board specific I2C bus speed
- or register an additional I2C bus that is not
- registered from board initialization code.
- Format:
- <bus_id>,<clkrate>
+ i2c_bus= [HW] Override the default board specific I2C bus speed
+ or register an additional I2C bus that is not
+ registered from board initialization code.
+ Format:
+ <bus_id>,<clkrate>
i8042.debug [HW] Toggle i8042 debug mode
i8042.unmask_kbd_data
@@ -1386,7 +1390,7 @@
Default: only on s2r transitions on x86; most other
architectures force reset to be always executed
i8042.unlock [HW] Unlock (ignore) the keylock
- i8042.kbdreset [HW] Reset device connected to KBD port
+ i8042.kbdreset [HW] Reset device connected to KBD port
i810= [HW,DRM]
@@ -1548,13 +1552,13 @@
programs exec'd, files mmap'd for exec, and all files
opened for read by uid=0.
- ima_template= [IMA]
+ ima_template= [IMA]
Select one of defined IMA measurements template formats.
Formats: { "ima" | "ima-ng" | "ima-sig" }
Default: "ima-ng"
ima_template_fmt=
- [IMA] Define a custom template format.
+ [IMA] Define a custom template format.
Format: { "field1|...|fieldN" }
ima.ahash_minsize= [IMA] Minimum file size for asynchronous hash usage
@@ -1597,7 +1601,7 @@
inport.irq= [HW] Inport (ATI XL and Microsoft) busmouse driver
Format: <irq>
- int_pln_enable [x86] Enable power limit notification interrupt
+ int_pln_enable [x86] Enable power limit notification interrupt
integrity_audit=[IMA]
Format: { "0" | "1" }
@@ -1650,39 +1654,39 @@
0 disables intel_idle and fall back on acpi_idle.
1 to 9 specify maximum depth of C-state.
- intel_pstate= [X86]
- disable
- Do not enable intel_pstate as the default
- scaling driver for the supported processors
- passive
- Use intel_pstate as a scaling driver, but configure it
- to work with generic cpufreq governors (instead of
- enabling its internal governor). This mode cannot be
- used along with the hardware-managed P-states (HWP)
- feature.
- force
- Enable intel_pstate on systems that prohibit it by default
- in favor of acpi-cpufreq. Forcing the intel_pstate driver
- instead of acpi-cpufreq may disable platform features, such
- as thermal controls and power capping, that rely on ACPI
- P-States information being indicated to OSPM and therefore
- should be used with caution. This option does not work with
- processors that aren't supported by the intel_pstate driver
- or on platforms that use pcc-cpufreq instead of acpi-cpufreq.
- no_hwp
- Do not enable hardware P state control (HWP)
- if available.
- hwp_only
- Only load intel_pstate on systems which support
- hardware P state control (HWP) if available.
- support_acpi_ppc
- Enforce ACPI _PPC performance limits. If the Fixed ACPI
- Description Table, specifies preferred power management
- profile as "Enterprise Server" or "Performance Server",
- then this feature is turned on by default.
- per_cpu_perf_limits
- Allow per-logical-CPU P-State performance control limits using
- cpufreq sysfs interface
+ intel_pstate= [X86]
+ disable
+ Do not enable intel_pstate as the default
+ scaling driver for the supported processors
+ passive
+ Use intel_pstate as a scaling driver, but configure it
+ to work with generic cpufreq governors (instead of
+ enabling its internal governor). This mode cannot be
+ used along with the hardware-managed P-states (HWP)
+ feature.
+ force
+ Enable intel_pstate on systems that prohibit it by default
+ in favor of acpi-cpufreq. Forcing the intel_pstate driver
+ instead of acpi-cpufreq may disable platform features, such
+ as thermal controls and power capping, that rely on ACPI
+ P-States information being indicated to OSPM and therefore
+ should be used with caution. This option does not work with
+ processors that aren't supported by the intel_pstate driver
+ or on platforms that use pcc-cpufreq instead of acpi-cpufreq.
+ no_hwp
+ Do not enable hardware P state control (HWP)
+ if available.
+ hwp_only
+ Only load intel_pstate on systems which support
+ hardware P state control (HWP) if available.
+ support_acpi_ppc
+ Enforce ACPI _PPC performance limits. If the Fixed ACPI
+ Description Table, specifies preferred power management
+ profile as "Enterprise Server" or "Performance Server",
+ then this feature is turned on by default.
+ per_cpu_perf_limits
+ Allow per-logical-CPU P-State performance control limits using
+ cpufreq sysfs interface
intremap= [X86-64, Intel-IOMMU]
on enable Interrupt Remapping (default)
@@ -1705,7 +1709,6 @@
nopanic
merge
nomerge
- forcesac
soft
pt [x86, IA-64]
nobypass [PPC/POWERNV]
@@ -2027,7 +2030,7 @@
* [no]ncqtrim: Turn off queued DSM TRIM.
* nohrst, nosrst, norst: suppress hard, soft
- and both resets.
+ and both resets.
* rstonce: only attempt one reset during
hot-unplug link recovery
@@ -2215,7 +2218,7 @@
[KNL,SH] Allow user to override the default size for
per-device physically contiguous DMA buffers.
- memhp_default_state=online/offline
+ memhp_default_state=online/offline
[KNL] Set the initial state for the memory hotplug
onlining policy. If not specified, the default value is
set according to the
@@ -2600,6 +2603,9 @@
emulation library even if a 387 maths coprocessor
is present.
+ no5lvl [X86-64] Disable 5-level paging mode. Forces
+ kernel to use 4-level paging instead.
+
no_console_suspend
[HW] Never suspend the console
Disable suspending of consoles during suspend and
@@ -2680,6 +2686,9 @@
allow data leaks with this option, which is equivalent
to spectre_v2=off.
+ nospec_store_bypass_disable
+ [HW] Disable all mitigations for the Speculative Store Bypass vulnerability
+
noxsave [BUGS=X86] Disables x86 extended register state save
and restore using xsave. The kernel will fallback to
enabling legacy floating-point and sse state.
@@ -2762,7 +2771,7 @@
[X86,PV_OPS] Disable paravirtualized VMware scheduler
clock and use the default one.
- no-steal-acc [X86,KVM] Disable paravirtualized steal time accounting.
+ no-steal-acc [X86,KVM] Disable paravirtualized steal time accounting.
steal time is computed, but won't influence scheduler
behaviour
@@ -2823,7 +2832,7 @@
notsc [BUGS=X86-32] Disable Time Stamp Counter
nowatchdog [KNL] Disable both lockup detectors, i.e.
- soft-lockup and NMI watchdog (hard-lockup).
+ soft-lockup and NMI watchdog (hard-lockup).
nowb [ARM]
@@ -2843,7 +2852,7 @@
If the dependencies are under your control, you can
turn on cpu0_hotplug.
- nps_mtm_hs_ctr= [KNL,ARC]
+ nps_mtm_hs_ctr= [KNL,ARC]
This parameter sets the maximum duration, in
cycles, each HW thread of the CTOP can run
without interruptions, before HW switches it.
@@ -2984,7 +2993,7 @@
pci=option[,option...] [PCI] various PCI subsystem options:
earlydump [X86] dump PCI config space before the kernel
- changes anything
+ changes anything
off [X86] don't probe for the PCI bus
bios [X86-32] force use of PCI BIOS, don't access
the hardware directly. Use this if your machine
@@ -3072,7 +3081,7 @@
is enabled by default. If you need to use this,
please report a bug.
nocrs [X86] Ignore PCI host bridge windows from ACPI.
- If you need to use this, please report a bug.
+ If you need to use this, please report a bug.
routeirq Do IRQ routing for all PCI devices.
This is normally done in pci_enable_device(),
so this option is a temporary workaround
@@ -3915,7 +3924,7 @@
cache (risks via metadata attacks are mostly
unchanged). Debug options disable merging on their
own.
- For more information see Documentation/vm/slub.txt.
+ For more information see Documentation/vm/slub.rst.
slab_max_order= [MM, SLAB]
Determines the maximum allowed order for slabs.
@@ -3929,7 +3938,7 @@
slub_debug can create guard zones around objects and
may poison objects when not in use. Also tracks the
last alloc / free. For more information see
- Documentation/vm/slub.txt.
+ Documentation/vm/slub.rst.
slub_memcg_sysfs= [MM, SLUB]
Determines whether to enable sysfs directories for
@@ -3943,7 +3952,7 @@
Determines the maximum allowed order for slabs.
A high setting may cause OOMs due to memory
fragmentation. For more information see
- Documentation/vm/slub.txt.
+ Documentation/vm/slub.rst.
slub_min_objects= [MM, SLUB]
The minimum number of objects per slab. SLUB will
@@ -3952,12 +3961,12 @@
the number of objects indicated. The higher the number
of objects the smaller the overhead of tracking slabs
and the less frequently locks need to be acquired.
- For more information see Documentation/vm/slub.txt.
+ For more information see Documentation/vm/slub.rst.
slub_min_order= [MM, SLUB]
Determines the minimum page order for slabs. Must be
lower than slub_max_order.
- For more information see Documentation/vm/slub.txt.
+ For more information see Documentation/vm/slub.rst.
slub_nomerge [MM, SLUB]
Same with slab_nomerge. This is supported for legacy.
@@ -4025,6 +4034,48 @@
Not specifying this option is equivalent to
spectre_v2=auto.
+ spec_store_bypass_disable=
+ [HW] Control Speculative Store Bypass (SSB) Disable mitigation
+ (Speculative Store Bypass vulnerability)
+
+ Certain CPUs are vulnerable to an exploit against a
+ a common industry wide performance optimization known
+ as "Speculative Store Bypass" in which recent stores
+ to the same memory location may not be observed by
+ later loads during speculative execution. The idea
+ is that such stores are unlikely and that they can
+ be detected prior to instruction retirement at the
+ end of a particular speculation execution window.
+
+ In vulnerable processors, the speculatively forwarded
+ store can be used in a cache side channel attack, for
+ example to read memory to which the attacker does not
+ directly have access (e.g. inside sandboxed code).
+
+ This parameter controls whether the Speculative Store
+ Bypass optimization is used.
+
+ on - Unconditionally disable Speculative Store Bypass
+ off - Unconditionally enable Speculative Store Bypass
+ auto - Kernel detects whether the CPU model contains an
+ implementation of Speculative Store Bypass and
+ picks the most appropriate mitigation. If the
+ CPU is not vulnerable, "off" is selected. If the
+ CPU is vulnerable the default mitigation is
+ architecture and Kconfig dependent. See below.
+ prctl - Control Speculative Store Bypass per thread
+ via prctl. Speculative Store Bypass is enabled
+ for a process by default. The state of the control
+ is inherited on fork.
+ seccomp - Same as "prctl" above, but all seccomp threads
+ will disable SSB unless they explicitly opt out.
+
+ Not specifying this option is equivalent to
+ spec_store_bypass_disable=auto.
+
+ Default mitigations:
+ X86: If CONFIG_SECCOMP=y "seccomp", otherwise "prctl"
+
spia_io_base= [HW,MTD]
spia_fio_base=
spia_pedr=
@@ -4313,7 +4364,8 @@
Format: [always|madvise|never]
Can be used to control the default behavior of the system
with respect to transparent hugepages.
- See Documentation/vm/transhuge.txt for more details.
+ See Documentation/admin-guide/mm/transhuge.rst
+ for more details.
tsc= Disable clocksource stability checks for TSC.
Format: <string>
@@ -4391,7 +4443,7 @@
usbcore.initial_descriptor_timeout=
[USB] Specifies timeout for the initial 64-byte
- USB_REQ_GET_DESCRIPTOR request in milliseconds
+ USB_REQ_GET_DESCRIPTOR request in milliseconds
(default 5000 = 5.0 seconds).
usbcore.nousb [USB] Disable the USB subsystem
diff --git a/Documentation/admin-guide/mm/concepts.rst b/Documentation/admin-guide/mm/concepts.rst
new file mode 100644
index 000000000000..291699c810d4
--- /dev/null
+++ b/Documentation/admin-guide/mm/concepts.rst
@@ -0,0 +1,222 @@
+.. _mm_concepts:
+
+=================
+Concepts overview
+=================
+
+The memory management in Linux is complex system that evolved over the
+years and included more and more functionality to support variety of
+systems from MMU-less microcontrollers to supercomputers. The memory
+management for systems without MMU is called ``nommu`` and it
+definitely deserves a dedicated document, which hopefully will be
+eventually written. Yet, although some of the concepts are the same,
+here we assume that MMU is available and CPU can translate a virtual
+address to a physical address.
+
+.. contents:: :local:
+
+Virtual Memory Primer
+=====================
+
+The physical memory in a computer system is a limited resource and
+even for systems that support memory hotplug there is a hard limit on
+the amount of memory that can be installed. The physical memory is not
+necessary contiguous, it might be accessible as a set of distinct
+address ranges. Besides, different CPU architectures, and even
+different implementations of the same architecture have different view
+how these address ranges defined.
+
+All this makes dealing directly with physical memory quite complex and
+to avoid this complexity a concept of virtual memory was developed.
+
+The virtual memory abstracts the details of physical memory from the
+application software, allows to keep only needed information in the
+physical memory (demand paging) and provides a mechanism for the
+protection and controlled sharing of data between processes.
+
+With virtual memory, each and every memory access uses a virtual
+address. When the CPU decodes the an instruction that reads (or
+writes) from (or to) the system memory, it translates the `virtual`
+address encoded in that instruction to a `physical` address that the
+memory controller can understand.
+
+The physical system memory is divided into page frames, or pages. The
+size of each page is architecture specific. Some architectures allow
+selection of the page size from several supported values; this
+selection is performed at the kernel build time by setting an
+appropriate kernel configuration option.
+
+Each physical memory page can be mapped as one or more virtual
+pages. These mappings are described by page tables that allow
+translation from virtual address used by programs to real address in
+the physical memory. The page tables organized hierarchically.
+
+The tables at the lowest level of the hierarchy contain physical
+addresses of actual pages used by the software. The tables at higher
+levels contain physical addresses of the pages belonging to the lower
+levels. The pointer to the top level page table resides in a
+register. When the CPU performs the address translation, it uses this
+register to access the top level page table. The high bits of the
+virtual address are used to index an entry in the top level page
+table. That entry is then used to access the next level in the
+hierarchy with the next bits of the virtual address as the index to
+that level page table. The lowest bits in the virtual address define
+the offset inside the actual page.
+
+Huge Pages
+==========
+
+The address translation requires several memory accesses and memory
+accesses are slow relatively to CPU speed. To avoid spending precious
+processor cycles on the address translation, CPUs maintain a cache of
+such translations called Translation Lookaside Buffer (or
+TLB). Usually TLB is pretty scarce resource and applications with
+large memory working set will experience performance hit because of
+TLB misses.
+
+Many modern CPU architectures allow mapping of the memory pages
+directly by the higher levels in the page table. For instance, on x86,
+it is possible to map 2M and even 1G pages using entries in the second
+and the third level page tables. In Linux such pages are called
+`huge`. Usage of huge pages significantly reduces pressure on TLB,
+improves TLB hit-rate and thus improves overall system performance.
+
+There are two mechanisms in Linux that enable mapping of the physical
+memory with the huge pages. The first one is `HugeTLB filesystem`, or
+hugetlbfs. It is a pseudo filesystem that uses RAM as its backing
+store. For the files created in this filesystem the data resides in
+the memory and mapped using huge pages. The hugetlbfs is described at
+:ref:`Documentation/admin-guide/mm/hugetlbpage.rst <hugetlbpage>`.
+
+Another, more recent, mechanism that enables use of the huge pages is
+called `Transparent HugePages`, or THP. Unlike the hugetlbfs that
+requires users and/or system administrators to configure what parts of
+the system memory should and can be mapped by the huge pages, THP
+manages such mappings transparently to the user and hence the
+name. See
+:ref:`Documentation/admin-guide/mm/transhuge.rst <admin_guide_transhuge>`
+for more details about THP.
+
+Zones
+=====
+
+Often hardware poses restrictions on how different physical memory
+ranges can be accessed. In some cases, devices cannot perform DMA to
+all the addressable memory. In other cases, the size of the physical
+memory exceeds the maximal addressable size of virtual memory and
+special actions are required to access portions of the memory. Linux
+groups memory pages into `zones` according to their possible
+usage. For example, ZONE_DMA will contain memory that can be used by
+devices for DMA, ZONE_HIGHMEM will contain memory that is not
+permanently mapped into kernel's address space and ZONE_NORMAL will
+contain normally addressed pages.
+
+The actual layout of the memory zones is hardware dependent as not all
+architectures define all zones, and requirements for DMA are different
+for different platforms.
+
+Nodes
+=====
+
+Many multi-processor machines are NUMA - Non-Uniform Memory Access -
+systems. In such systems the memory is arranged into banks that have
+different access latency depending on the "distance" from the
+processor. Each bank is referred as `node` and for each node Linux
+constructs an independent memory management subsystem. A node has it's
+own set of zones, lists of free and used pages and various statistics
+counters. You can find more details about NUMA in
+:ref:`Documentation/vm/numa.rst <numa>` and in
+:ref:`Documentation/admin-guide/mm/numa_memory_policy.rst <numa_memory_policy>`.
+
+Page cache
+==========
+
+The physical memory is volatile and the common case for getting data
+into the memory is to read it from files. Whenever a file is read, the
+data is put into the `page cache` to avoid expensive disk access on
+the subsequent reads. Similarly, when one writes to a file, the data
+is placed in the page cache and eventually gets into the backing
+storage device. The written pages are marked as `dirty` and when Linux
+decides to reuse them for other purposes, it makes sure to synchronize
+the file contents on the device with the updated data.
+
+Anonymous Memory
+================
+
+The `anonymous memory` or `anonymous mappings` represent memory that
+is not backed by a filesystem. Such mappings are implicitly created
+for program's stack and heap or by explicit calls to mmap(2) system
+call. Usually, the anonymous mappings only define virtual memory areas
+that the program is allowed to access. The read accesses will result
+in creation of a page table entry that references a special physical
+page filled with zeroes. When the program performs a write, regular
+physical page will be allocated to hold the written data. The page
+will be marked dirty and if the kernel will decide to repurpose it,
+the dirty page will be swapped out.
+
+Reclaim
+=======
+
+Throughout the system lifetime, a physical page can be used for storing
+different types of data. It can be kernel internal data structures,
+DMA'able buffers for device drivers use, data read from a filesystem,
+memory allocated by user space processes etc.
+
+Depending on the page usage it is treated differently by the Linux
+memory management. The pages that can be freed at any time, either
+because they cache the data available elsewhere, for instance, on a
+hard disk, or because they can be swapped out, again, to the hard
+disk, are called `reclaimable`. The most notable categories of the
+reclaimable pages are page cache and anonymous memory.
+
+In most cases, the pages holding internal kernel data and used as DMA
+buffers cannot be repurposed, and they remain pinned until freed by
+their user. Such pages are called `unreclaimable`. However, in certain
+circumstances, even pages occupied with kernel data structures can be
+reclaimed. For instance, in-memory caches of filesystem metadata can
+be re-read from the storage device and therefore it is possible to
+discard them from the main memory when system is under memory
+pressure.
+
+The process of freeing the reclaimable physical memory pages and
+repurposing them is called (surprise!) `reclaim`. Linux can reclaim
+pages either asynchronously or synchronously, depending on the state
+of the system. When system is not loaded, most of the memory is free
+and allocation request will be satisfied immediately from the free
+pages supply. As the load increases, the amount of the free pages goes
+down and when it reaches a certain threshold (high watermark), an
+allocation request will awaken the ``kswapd`` daemon. It will
+asynchronously scan memory pages and either just free them if the data
+they contain is available elsewhere, or evict to the backing storage
+device (remember those dirty pages?). As memory usage increases even
+more and reaches another threshold - min watermark - an allocation
+will trigger the `direct reclaim`. In this case allocation is stalled
+until enough memory pages are reclaimed to satisfy the request.
+
+Compaction
+==========
+
+As the system runs, tasks allocate and free the memory and it becomes
+fragmented. Although with virtual memory it is possible to present
+scattered physical pages as virtually contiguous range, sometimes it is
+necessary to allocate large physically contiguous memory areas. Such
+need may arise, for instance, when a device driver requires large
+buffer for DMA, or when THP allocates a huge page. Memory `compaction`
+addresses the fragmentation issue. This mechanism moves occupied pages
+from the lower part of a memory zone to free pages in the upper part
+of the zone. When a compaction scan is finished free pages are grouped
+together at the beginning of the zone and allocations of large
+physically contiguous areas become possible.
+
+Like reclaim, the compaction may happen asynchronously in ``kcompactd``
+daemon or synchronously as a result of memory allocation request.
+
+OOM killer
+==========
+
+It may happen, that on a loaded machine memory will be exhausted. When
+the kernel detects that the system runs out of memory (OOM) it invokes
+`OOM killer`. Its mission is simple: all it has to do is to select a
+task to sacrifice for the sake of the overall system health. The
+selected task is killed in a hope that after it exits enough memory
+will be freed to continue normal operation.
diff --git a/Documentation/vm/hugetlbpage.txt b/Documentation/admin-guide/mm/hugetlbpage.rst
index faf077d50d42..1cc0bc78d10e 100644
--- a/Documentation/vm/hugetlbpage.txt
+++ b/Documentation/admin-guide/mm/hugetlbpage.rst
@@ -1,3 +1,11 @@
+.. _hugetlbpage:
+
+=============
+HugeTLB Pages
+=============
+
+Overview
+========
The intent of this file is to give a brief summary of hugetlbpage support in
the Linux kernel. This support is built on top of multiple page size support
@@ -18,53 +26,59 @@ First the Linux kernel needs to be built with the CONFIG_HUGETLBFS
automatically when CONFIG_HUGETLBFS is selected) configuration
options.
-The /proc/meminfo file provides information about the total number of
+The ``/proc/meminfo`` file provides information about the total number of
persistent hugetlb pages in the kernel's huge page pool. It also displays
default huge page size and information about the number of free, reserved
and surplus huge pages in the pool of huge pages of default size.
The huge page size is needed for generating the proper alignment and
size of the arguments to system calls that map huge page regions.
-The output of "cat /proc/meminfo" will include lines like:
+The output of ``cat /proc/meminfo`` will include lines like::
-.....
-HugePages_Total: uuu
-HugePages_Free: vvv
-HugePages_Rsvd: www
-HugePages_Surp: xxx
-Hugepagesize: yyy kB
-Hugetlb: zzz kB
+ HugePages_Total: uuu
+ HugePages_Free: vvv
+ HugePages_Rsvd: www
+ HugePages_Surp: xxx
+ Hugepagesize: yyy kB
+ Hugetlb: zzz kB
where:
-HugePages_Total is the size of the pool of huge pages.
-HugePages_Free is the number of huge pages in the pool that are not yet
- allocated.
-HugePages_Rsvd is short for "reserved," and is the number of huge pages for
- which a commitment to allocate from the pool has been made,
- but no allocation has yet been made. Reserved huge pages
- guarantee that an application will be able to allocate a
- huge page from the pool of huge pages at fault time.
-HugePages_Surp is short for "surplus," and is the number of huge pages in
- the pool above the value in /proc/sys/vm/nr_hugepages. The
- maximum number of surplus huge pages is controlled by
- /proc/sys/vm/nr_overcommit_hugepages.
-Hugepagesize is the default hugepage size (in Kb).
-Hugetlb is the total amount of memory (in kB), consumed by huge
- pages of all sizes.
- If huge pages of different sizes are in use, this number
- will exceed HugePages_Total * Hugepagesize. To get more
- detailed information, please, refer to
- /sys/kernel/mm/hugepages (described below).
-
-
-/proc/filesystems should also show a filesystem of type "hugetlbfs" configured
-in the kernel.
-
-/proc/sys/vm/nr_hugepages indicates the current number of "persistent" huge
+
+HugePages_Total
+ is the size of the pool of huge pages.
+HugePages_Free
+ is the number of huge pages in the pool that are not yet
+ allocated.
+HugePages_Rsvd
+ is short for "reserved," and is the number of huge pages for
+ which a commitment to allocate from the pool has been made,
+ but no allocation has yet been made. Reserved huge pages
+ guarantee that an application will be able to allocate a
+ huge page from the pool of huge pages at fault time.
+HugePages_Surp
+ is short for "surplus," and is the number of huge pages in
+ the pool above the value in ``/proc/sys/vm/nr_hugepages``. The
+ maximum number of surplus huge pages is controlled by
+ ``/proc/sys/vm/nr_overcommit_hugepages``.
+Hugepagesize
+ is the default hugepage size (in Kb).
+Hugetlb
+ is the total amount of memory (in kB), consumed by huge
+ pages of all sizes.
+ If huge pages of different sizes are in use, this number
+ will exceed HugePages_Total \* Hugepagesize. To get more
+ detailed information, please, refer to
+ ``/sys/kernel/mm/hugepages`` (described below).
+
+
+``/proc/filesystems`` should also show a filesystem of type "hugetlbfs"
+configured in the kernel.
+
+``/proc/sys/vm/nr_hugepages`` indicates the current number of "persistent" huge
pages in the kernel's huge page pool. "Persistent" huge pages will be
returned to the huge page pool when freed by a task. A user with root
privileges can dynamically allocate more or free some persistent huge pages
-by increasing or decreasing the value of 'nr_hugepages'.
+by increasing or decreasing the value of ``nr_hugepages``.
Pages that are used as huge pages are reserved inside the kernel and cannot
be used for other purposes. Huge pages cannot be swapped out under
@@ -73,7 +87,7 @@ memory pressure.
Once a number of huge pages have been pre-allocated to the kernel huge page
pool, a user with appropriate privilege can use either the mmap system call
or shared memory system calls to use the huge pages. See the discussion of
-Using Huge Pages, below.
+:ref:`Using Huge Pages <using_huge_pages>`, below.
The administrator can allocate persistent huge pages on the kernel boot
command line by specifying the "hugepages=N" parameter, where 'N' = the
@@ -86,10 +100,10 @@ with a huge page size selection parameter "hugepagesz=<size>". <size> must
be specified in bytes with optional scale suffix [kKmMgG]. The default huge
page size may be selected with the "default_hugepagesz=<size>" boot parameter.
-When multiple huge page sizes are supported, /proc/sys/vm/nr_hugepages
+When multiple huge page sizes are supported, ``/proc/sys/vm/nr_hugepages``
indicates the current number of pre-allocated huge pages of the default size.
Thus, one can use the following command to dynamically allocate/deallocate
-default sized persistent huge pages:
+default sized persistent huge pages::
echo 20 > /proc/sys/vm/nr_hugepages
@@ -98,11 +112,12 @@ huge page pool to 20, allocating or freeing huge pages, as required.
On a NUMA platform, the kernel will attempt to distribute the huge page pool
over all the set of allowed nodes specified by the NUMA memory policy of the
-task that modifies nr_hugepages. The default for the allowed nodes--when the
+task that modifies ``nr_hugepages``. The default for the allowed nodes--when the
task has default memory policy--is all on-line nodes with memory. Allowed
nodes with insufficient available, contiguous memory for a huge page will be
-silently skipped when allocating persistent huge pages. See the discussion
-below of the interaction of task memory policy, cpusets and per node attributes
+silently skipped when allocating persistent huge pages. See the
+:ref:`discussion below <mem_policy_and_hp_alloc>`
+of the interaction of task memory policy, cpusets and per node attributes
with the allocation and freeing of persistent huge pages.
The success or failure of huge page allocation depends on the amount of
@@ -117,51 +132,52 @@ init files. This will enable the kernel to allocate huge pages early in
the boot process when the possibility of getting physical contiguous pages
is still very high. Administrators can verify the number of huge pages
actually allocated by checking the sysctl or meminfo. To check the per node
-distribution of huge pages in a NUMA system, use:
+distribution of huge pages in a NUMA system, use::
cat /sys/devices/system/node/node*/meminfo | fgrep Huge
-/proc/sys/vm/nr_overcommit_hugepages specifies how large the pool of
-huge pages can grow, if more huge pages than /proc/sys/vm/nr_hugepages are
+``/proc/sys/vm/nr_overcommit_hugepages`` specifies how large the pool of
+huge pages can grow, if more huge pages than ``/proc/sys/vm/nr_hugepages`` are
requested by applications. Writing any non-zero value into this file
indicates that the hugetlb subsystem is allowed to try to obtain that
number of "surplus" huge pages from the kernel's normal page pool, when the
persistent huge page pool is exhausted. As these surplus huge pages become
unused, they are freed back to the kernel's normal page pool.
-When increasing the huge page pool size via nr_hugepages, any existing surplus
-pages will first be promoted to persistent huge pages. Then, additional
+When increasing the huge page pool size via ``nr_hugepages``, any existing
+surplus pages will first be promoted to persistent huge pages. Then, additional
huge pages will be allocated, if necessary and if possible, to fulfill
the new persistent huge page pool size.
The administrator may shrink the pool of persistent huge pages for
-the default huge page size by setting the nr_hugepages sysctl to a
+the default huge page size by setting the ``nr_hugepages`` sysctl to a
smaller value. The kernel will attempt to balance the freeing of huge pages
-across all nodes in the memory policy of the task modifying nr_hugepages.
+across all nodes in the memory policy of the task modifying ``nr_hugepages``.
Any free huge pages on the selected nodes will be freed back to the kernel's
normal page pool.
-Caveat: Shrinking the persistent huge page pool via nr_hugepages such that
+Caveat: Shrinking the persistent huge page pool via ``nr_hugepages`` such that
it becomes less than the number of huge pages in use will convert the balance
of the in-use huge pages to surplus huge pages. This will occur even if
-the number of surplus pages it would exceed the overcommit value. As long as
-this condition holds--that is, until nr_hugepages+nr_overcommit_hugepages is
+the number of surplus pages would exceed the overcommit value. As long as
+this condition holds--that is, until ``nr_hugepages+nr_overcommit_hugepages`` is
increased sufficiently, or the surplus huge pages go out of use and are freed--
no more surplus huge pages will be allowed to be allocated.
With support for multiple huge page pools at run-time available, much of
-the huge page userspace interface in /proc/sys/vm has been duplicated in sysfs.
-The /proc interfaces discussed above have been retained for backwards
-compatibility. The root huge page control directory in sysfs is:
+the huge page userspace interface in ``/proc/sys/vm`` has been duplicated in
+sysfs.
+The ``/proc`` interfaces discussed above have been retained for backwards
+compatibility. The root huge page control directory in sysfs is::
/sys/kernel/mm/hugepages
For each huge page size supported by the running kernel, a subdirectory
-will exist, of the form:
+will exist, of the form::
hugepages-${size}kB
-Inside each of these directories, the same set of files will exist:
+Inside each of these directories, the same set of files will exist::
nr_hugepages
nr_hugepages_mempolicy
@@ -172,37 +188,39 @@ Inside each of these directories, the same set of files will exist:
which function as described above for the default huge page-sized case.
+.. _mem_policy_and_hp_alloc:
Interaction of Task Memory Policy with Huge Page Allocation/Freeing
===================================================================
-Whether huge pages are allocated and freed via the /proc interface or
-the /sysfs interface using the nr_hugepages_mempolicy attribute, the NUMA
-nodes from which huge pages are allocated or freed are controlled by the
-NUMA memory policy of the task that modifies the nr_hugepages_mempolicy
-sysctl or attribute. When the nr_hugepages attribute is used, mempolicy
+Whether huge pages are allocated and freed via the ``/proc`` interface or
+the ``/sysfs`` interface using the ``nr_hugepages_mempolicy`` attribute, the
+NUMA nodes from which huge pages are allocated or freed are controlled by the
+NUMA memory policy of the task that modifies the ``nr_hugepages_mempolicy``
+sysctl or attribute. When the ``nr_hugepages`` attribute is used, mempolicy
is ignored.
The recommended method to allocate or free huge pages to/from the kernel
-huge page pool, using the nr_hugepages example above, is:
+huge page pool, using the ``nr_hugepages`` example above, is::
numactl --interleave <node-list> echo 20 \
>/proc/sys/vm/nr_hugepages_mempolicy
-or, more succinctly:
+or, more succinctly::
numactl -m <node-list> echo 20 >/proc/sys/vm/nr_hugepages_mempolicy
-This will allocate or free abs(20 - nr_hugepages) to or from the nodes
+This will allocate or free ``abs(20 - nr_hugepages)`` to or from the nodes
specified in <node-list>, depending on whether number of persistent huge pages
is initially less than or greater than 20, respectively. No huge pages will be
allocated nor freed on any node not included in the specified <node-list>.
-When adjusting the persistent hugepage count via nr_hugepages_mempolicy, any
+When adjusting the persistent hugepage count via ``nr_hugepages_mempolicy``, any
memory policy mode--bind, preferred, local or interleave--may be used. The
resulting effect on persistent huge page allocation is as follows:
-1) Regardless of mempolicy mode [see Documentation/vm/numa_memory_policy.txt],
+#. Regardless of mempolicy mode [see
+ :ref:`Documentation/admin-guide/mm/numa_memory_policy.rst <numa_memory_policy>`],
persistent huge pages will be distributed across the node or nodes
specified in the mempolicy as if "interleave" had been specified.
However, if a node in the policy does not contain sufficient contiguous
@@ -212,7 +230,7 @@ resulting effect on persistent huge page allocation is as follows:
possibly, allocation of persistent huge pages on nodes not allowed by
the task's memory policy.
-2) One or more nodes may be specified with the bind or interleave policy.
+#. One or more nodes may be specified with the bind or interleave policy.
If more than one node is specified with the preferred policy, only the
lowest numeric id will be used. Local policy will select the node where
the task is running at the time the nodes_allowed mask is constructed.
@@ -222,20 +240,20 @@ resulting effect on persistent huge page allocation is as follows:
indeterminate. Thus, local policy is not very useful for this purpose.
Any of the other mempolicy modes may be used to specify a single node.
-3) The nodes allowed mask will be derived from any non-default task mempolicy,
+#. The nodes allowed mask will be derived from any non-default task mempolicy,
whether this policy was set explicitly by the task itself or one of its
ancestors, such as numactl. This means that if the task is invoked from a
shell with non-default policy, that policy will be used. One can specify a
node list of "all" with numactl --interleave or --membind [-m] to achieve
interleaving over all nodes in the system or cpuset.
-4) Any task mempolicy specified--e.g., using numactl--will be constrained by
+#. Any task mempolicy specified--e.g., using numactl--will be constrained by
the resource limits of any cpuset in which the task runs. Thus, there will
be no way for a task with non-default policy running in a cpuset with a
subset of the system nodes to allocate huge pages outside the cpuset
without first moving to a cpuset that contains all of the desired nodes.
-5) Boot-time huge page allocation attempts to distribute the requested number
+#. Boot-time huge page allocation attempts to distribute the requested number
of huge pages over all on-lines nodes with memory.
Per Node Hugepages Attributes
@@ -243,22 +261,22 @@ Per Node Hugepages Attributes
A subset of the contents of the root huge page control directory in sysfs,
described above, will be replicated under each the system device of each
-NUMA node with memory in:
+NUMA node with memory in::
/sys/devices/system/node/node[0-9]*/hugepages/
Under this directory, the subdirectory for each supported huge page size
-contains the following attribute files:
+contains the following attribute files::
nr_hugepages
free_hugepages
surplus_hugepages
-The free_' and surplus_' attribute files are read-only. They return the number
+The free\_' and surplus\_' attribute files are read-only. They return the number
of free and surplus [overcommitted] huge pages, respectively, on the parent
node.
-The nr_hugepages attribute returns the total number of huge pages on the
+The ``nr_hugepages`` attribute returns the total number of huge pages on the
specified node. When this attribute is written, the number of persistent huge
pages on the parent node will be adjusted to the specified value, if sufficient
resources exist, regardless of the task's mempolicy or cpuset constraints.
@@ -267,43 +285,58 @@ Note that the number of overcommit and reserve pages remain global quantities,
as we don't know until fault time, when the faulting task's mempolicy is
applied, from which node the huge page allocation will be attempted.
+.. _using_huge_pages:
Using Huge Pages
================
If the user applications are going to request huge pages using mmap system
call, then it is required that system administrator mount a file system of
-type hugetlbfs:
+type hugetlbfs::
mount -t hugetlbfs \
-o uid=<value>,gid=<value>,mode=<value>,pagesize=<value>,size=<value>,\
min_size=<value>,nr_inodes=<value> none /mnt/huge
This command mounts a (pseudo) filesystem of type hugetlbfs on the directory
-/mnt/huge. Any files created on /mnt/huge uses huge pages. The uid and gid
-options sets the owner and group of the root of the file system. By default
-the uid and gid of the current process are taken. The mode option sets the
-mode of root of file system to value & 01777. This value is given in octal.
-By default the value 0755 is picked. If the platform supports multiple huge
-page sizes, the pagesize option can be used to specify the huge page size and
-associated pool. pagesize is specified in bytes. If pagesize is not specified
-the platform's default huge page size and associated pool will be used. The
-size option sets the maximum value of memory (huge pages) allowed for that
-filesystem (/mnt/huge). The size option can be specified in bytes, or as a
-percentage of the specified huge page pool (nr_hugepages). The size is
-rounded down to HPAGE_SIZE boundary. The min_size option sets the minimum
-value of memory (huge pages) allowed for the filesystem. min_size can be
-specified in the same way as size, either bytes or a percentage of the
-huge page pool. At mount time, the number of huge pages specified by
-min_size are reserved for use by the filesystem. If there are not enough
-free huge pages available, the mount will fail. As huge pages are allocated
-to the filesystem and freed, the reserve count is adjusted so that the sum
-of allocated and reserved huge pages is always at least min_size. The option
-nr_inodes sets the maximum number of inodes that /mnt/huge can use. If the
-size, min_size or nr_inodes option is not provided on command line then
-no limits are set. For pagesize, size, min_size and nr_inodes options, you
-can use [G|g]/[M|m]/[K|k] to represent giga/mega/kilo. For example, size=2K
-has the same meaning as size=2048.
+``/mnt/huge``. Any file created on ``/mnt/huge`` uses huge pages.
+
+The ``uid`` and ``gid`` options sets the owner and group of the root of the
+file system. By default the ``uid`` and ``gid`` of the current process
+are taken.
+
+The ``mode`` option sets the mode of root of file system to value & 01777.
+This value is given in octal. By default the value 0755 is picked.
+
+If the platform supports multiple huge page sizes, the ``pagesize`` option can
+be used to specify the huge page size and associated pool. ``pagesize``
+is specified in bytes. If ``pagesize`` is not specified the platform's
+default huge page size and associated pool will be used.
+
+The ``size`` option sets the maximum value of memory (huge pages) allowed
+for that filesystem (``/mnt/huge``). The ``size`` option can be specified
+in bytes, or as a percentage of the specified huge page pool (``nr_hugepages``).
+The size is rounded down to HPAGE_SIZE boundary.
+
+The ``min_size`` option sets the minimum value of memory (huge pages) allowed
+for the filesystem. ``min_size`` can be specified in the same way as ``size``,
+either bytes or a percentage of the huge page pool.
+At mount time, the number of huge pages specified by ``min_size`` are reserved
+for use by the filesystem.
+If there are not enough free huge pages available, the mount will fail.
+As huge pages are allocated to the filesystem and freed, the reserve count
+is adjusted so that the sum of allocated and reserved huge pages is always
+at least ``min_size``.
+
+The option ``nr_inodes`` sets the maximum number of inodes that ``/mnt/huge``
+can use.
+
+If the ``size``, ``min_size`` or ``nr_inodes`` option is not provided on
+command line then no limits are set.
+
+For ``pagesize``, ``size``, ``min_size`` and ``nr_inodes`` options, you can
+use [G|g]/[M|m]/[K|k] to represent giga/mega/kilo.
+For example, size=2K has the same meaning as size=2048.
While read system calls are supported on files that reside on hugetlb
file systems, write system calls are not.
@@ -313,12 +346,12 @@ used to change the file attributes on hugetlbfs.
Also, it is important to note that no such mount command is required if
applications are going to use only shmat/shmget system calls or mmap with
-MAP_HUGETLB. For an example of how to use mmap with MAP_HUGETLB see map_hugetlb
-below.
+MAP_HUGETLB. For an example of how to use mmap with MAP_HUGETLB see
+:ref:`map_hugetlb <map_hugetlb>` below.
-Users who wish to use hugetlb memory via shared memory segment should be a
-member of a supplementary group and system admin needs to configure that gid
-into /proc/sys/vm/hugetlb_shm_group. It is possible for same or different
+Users who wish to use hugetlb memory via shared memory segment should be
+members of a supplementary group and system admin needs to configure that gid
+into ``/proc/sys/vm/hugetlb_shm_group``. It is possible for same or different
applications to use any combination of mmaps and shm* calls, though the mount of
filesystem will be required for using mmap calls without MAP_HUGETLB.
@@ -332,20 +365,18 @@ a hugetlb page and the length is smaller than the hugepage size.
Examples
========
-1) map_hugetlb: see tools/testing/selftests/vm/map_hugetlb.c
+.. _map_hugetlb:
-2) hugepage-shm: see tools/testing/selftests/vm/hugepage-shm.c
+``map_hugetlb``
+ see tools/testing/selftests/vm/map_hugetlb.c
-3) hugepage-mmap: see tools/testing/selftests/vm/hugepage-mmap.c
+``hugepage-shm``
+ see tools/testing/selftests/vm/hugepage-shm.c
-4) The libhugetlbfs (https://github.com/libhugetlbfs/libhugetlbfs) library
- provides a wide range of userspace tools to help with huge page usability,
- environment setup, and control.
+``hugepage-mmap``
+ see tools/testing/selftests/vm/hugepage-mmap.c
-Kernel development regression testing
-=====================================
+The `libhugetlbfs`_ library provides a wide range of userspace tools
+to help with huge page usability, environment setup, and control.
-The most complete set of hugetlb tests are in the libhugetlbfs repository.
-If you modify any hugetlb related code, use the libhugetlbfs test suite
-to check for regressions. In addition, if you add any new hugetlb
-functionality, please add appropriate tests to libhugetlbfs.
+.. _libhugetlbfs: https://github.com/libhugetlbfs/libhugetlbfs
diff --git a/Documentation/vm/idle_page_tracking.txt b/Documentation/admin-guide/mm/idle_page_tracking.rst
index 85dcc3bb85dc..6f7b7ca1add3 100644
--- a/Documentation/vm/idle_page_tracking.txt
+++ b/Documentation/admin-guide/mm/idle_page_tracking.rst
@@ -1,4 +1,11 @@
-MOTIVATION
+.. _idle_page_tracking:
+
+==================
+Idle Page Tracking
+==================
+
+Motivation
+==========
The idle page tracking feature allows to track which memory pages are being
accessed by a workload and which are idle. This information can be useful for
@@ -8,10 +15,14 @@ or deciding where to place the workload within a compute cluster.
It is enabled by CONFIG_IDLE_PAGE_TRACKING=y.
-USER API
+.. _user_api:
-The idle page tracking API is located at /sys/kernel/mm/page_idle. Currently,
-it consists of the only read-write file, /sys/kernel/mm/page_idle/bitmap.
+User API
+========
+
+The idle page tracking API is located at ``/sys/kernel/mm/page_idle``.
+Currently, it consists of the only read-write file,
+``/sys/kernel/mm/page_idle/bitmap``.
The file implements a bitmap where each bit corresponds to a memory page. The
bitmap is represented by an array of 8-byte integers, and the page at PFN #i is
@@ -19,8 +30,9 @@ mapped to bit #i%64 of array element #i/64, byte order is native. When a bit is
set, the corresponding page is idle.
A page is considered idle if it has not been accessed since it was marked idle
-(for more details on what "accessed" actually means see the IMPLEMENTATION
-DETAILS section). To mark a page idle one has to set the bit corresponding to
+(for more details on what "accessed" actually means see the :ref:`Implementation
+Details <impl_details>` section).
+To mark a page idle one has to set the bit corresponding to
the page by writing to the file. A value written to the file is OR-ed with the
current bitmap value.
@@ -30,9 +42,9 @@ page types (e.g. SLAB pages) an attempt to mark a page idle is silently ignored,
and hence such pages are never reported idle.
For huge pages the idle flag is set only on the head page, so one has to read
-/proc/kpageflags in order to correctly count idle huge pages.
+``/proc/kpageflags`` in order to correctly count idle huge pages.
-Reading from or writing to /sys/kernel/mm/page_idle/bitmap will return
+Reading from or writing to ``/sys/kernel/mm/page_idle/bitmap`` will return
-EINVAL if you are not starting the read/write on an 8-byte boundary, or
if the size of the read/write is not a multiple of 8 bytes. Writing to
this file beyond max PFN will return -ENXIO.
@@ -41,21 +53,26 @@ That said, in order to estimate the amount of pages that are not used by a
workload one should:
1. Mark all the workload's pages as idle by setting corresponding bits in
- /sys/kernel/mm/page_idle/bitmap. The pages can be found by reading
- /proc/pid/pagemap if the workload is represented by a process, or by
- filtering out alien pages using /proc/kpagecgroup in case the workload is
- placed in a memory cgroup.
+ ``/sys/kernel/mm/page_idle/bitmap``. The pages can be found by reading
+ ``/proc/pid/pagemap`` if the workload is represented by a process, or by
+ filtering out alien pages using ``/proc/kpagecgroup`` in case the workload
+ is placed in a memory cgroup.
2. Wait until the workload accesses its working set.
- 3. Read /sys/kernel/mm/page_idle/bitmap and count the number of bits set. If
- one wants to ignore certain types of pages, e.g. mlocked pages since they
- are not reclaimable, he or she can filter them out using /proc/kpageflags.
+ 3. Read ``/sys/kernel/mm/page_idle/bitmap`` and count the number of bits set.
+ If one wants to ignore certain types of pages, e.g. mlocked pages since they
+ are not reclaimable, he or she can filter them out using
+ ``/proc/kpageflags``.
+
+See :ref:`Documentation/admin-guide/mm/pagemap.rst <pagemap>` for more
+information about ``/proc/pid/pagemap``, ``/proc/kpageflags``, and
+``/proc/kpagecgroup``.
-See Documentation/vm/pagemap.txt for more information about /proc/pid/pagemap,
-/proc/kpageflags, and /proc/kpagecgroup.
+.. _impl_details:
-IMPLEMENTATION DETAILS
+Implementation Details
+======================
The kernel internally keeps track of accesses to user memory pages in order to
reclaim unreferenced pages first on memory shortage conditions. A page is
@@ -77,7 +94,8 @@ When a dirty page is written to swap or disk as a result of memory reclaim or
exceeding the dirty memory limit, it is not marked referenced.
The idle memory tracking feature adds a new page flag, the Idle flag. This flag
-is set manually, by writing to /sys/kernel/mm/page_idle/bitmap (see the USER API
+is set manually, by writing to ``/sys/kernel/mm/page_idle/bitmap`` (see the
+:ref:`User API <user_api>`
section), and cleared automatically whenever a page is referenced as defined
above.
diff --git a/Documentation/admin-guide/mm/index.rst b/Documentation/admin-guide/mm/index.rst
new file mode 100644
index 000000000000..ceead68c2df7
--- /dev/null
+++ b/Documentation/admin-guide/mm/index.rst
@@ -0,0 +1,36 @@
+=================
+Memory Management
+=================
+
+Linux memory management subsystem is responsible, as the name implies,
+for managing the memory in the system. This includes implemnetation of
+virtual memory and demand paging, memory allocation both for kernel
+internal structures and user space programms, mapping of files into
+processes address space and many other cool things.
+
+Linux memory management is a complex system with many configurable
+settings. Most of these settings are available via ``/proc``
+filesystem and can be quired and adjusted using ``sysctl``. These APIs
+are described in Documentation/sysctl/vm.txt and in `man 5 proc`_.
+
+.. _man 5 proc: http://man7.org/linux/man-pages/man5/proc.5.html
+
+Linux memory management has its own jargon and if you are not yet
+familiar with it, consider reading
+:ref:`Documentation/admin-guide/mm/concepts.rst <mm_concepts>`.
+
+Here we document in detail how to interact with various mechanisms in
+the Linux memory management.
+
+.. toctree::
+ :maxdepth: 1
+
+ concepts
+ hugetlbpage
+ idle_page_tracking
+ ksm
+ numa_memory_policy
+ pagemap
+ soft-dirty
+ transhuge
+ userfaultfd
diff --git a/Documentation/admin-guide/mm/ksm.rst b/Documentation/admin-guide/mm/ksm.rst
new file mode 100644
index 000000000000..9303786632d1
--- /dev/null
+++ b/Documentation/admin-guide/mm/ksm.rst
@@ -0,0 +1,189 @@
+.. _admin_guide_ksm:
+
+=======================
+Kernel Samepage Merging
+=======================
+
+Overview
+========
+
+KSM is a memory-saving de-duplication feature, enabled by CONFIG_KSM=y,
+added to the Linux kernel in 2.6.32. See ``mm/ksm.c`` for its implementation,
+and http://lwn.net/Articles/306704/ and http://lwn.net/Articles/330589/
+
+KSM was originally developed for use with KVM (where it was known as
+Kernel Shared Memory), to fit more virtual machines into physical memory,
+by sharing the data common between them. But it can be useful to any
+application which generates many instances of the same data.
+
+The KSM daemon ksmd periodically scans those areas of user memory
+which have been registered with it, looking for pages of identical
+content which can be replaced by a single write-protected page (which
+is automatically copied if a process later wants to update its
+content). The amount of pages that KSM daemon scans in a single pass
+and the time between the passes are configured using :ref:`sysfs
+intraface <ksm_sysfs>`
+
+KSM only merges anonymous (private) pages, never pagecache (file) pages.
+KSM's merged pages were originally locked into kernel memory, but can now
+be swapped out just like other user pages (but sharing is broken when they
+are swapped back in: ksmd must rediscover their identity and merge again).
+
+Controlling KSM with madvise
+============================
+
+KSM only operates on those areas of address space which an application
+has advised to be likely candidates for merging, by using the madvise(2)
+system call::
+
+ int madvise(addr, length, MADV_MERGEABLE)
+
+The app may call
+
+::
+
+ int madvise(addr, length, MADV_UNMERGEABLE)
+
+to cancel that advice and restore unshared pages: whereupon KSM
+unmerges whatever it merged in that range. Note: this unmerging call
+may suddenly require more memory than is available - possibly failing
+with EAGAIN, but more probably arousing the Out-Of-Memory killer.
+
+If KSM is not configured into the running kernel, madvise MADV_MERGEABLE
+and MADV_UNMERGEABLE simply fail with EINVAL. If the running kernel was
+built with CONFIG_KSM=y, those calls will normally succeed: even if the
+the KSM daemon is not currently running, MADV_MERGEABLE still registers
+the range for whenever the KSM daemon is started; even if the range
+cannot contain any pages which KSM could actually merge; even if
+MADV_UNMERGEABLE is applied to a range which was never MADV_MERGEABLE.
+
+If a region of memory must be split into at least one new MADV_MERGEABLE
+or MADV_UNMERGEABLE region, the madvise may return ENOMEM if the process
+will exceed ``vm.max_map_count`` (see Documentation/sysctl/vm.txt).
+
+Like other madvise calls, they are intended for use on mapped areas of
+the user address space: they will report ENOMEM if the specified range
+includes unmapped gaps (though working on the intervening mapped areas),
+and might fail with EAGAIN if not enough memory for internal structures.
+
+Applications should be considerate in their use of MADV_MERGEABLE,
+restricting its use to areas likely to benefit. KSM's scans may use a lot
+of processing power: some installations will disable KSM for that reason.
+
+.. _ksm_sysfs:
+
+KSM daemon sysfs interface
+==========================
+
+The KSM daemon is controlled by sysfs files in ``/sys/kernel/mm/ksm/``,
+readable by all but writable only by root:
+
+pages_to_scan
+ how many pages to scan before ksmd goes to sleep
+ e.g. ``echo 100 > /sys/kernel/mm/ksm/pages_to_scan``.
+
+ Default: 100 (chosen for demonstration purposes)
+
+sleep_millisecs
+ how many milliseconds ksmd should sleep before next scan
+ e.g. ``echo 20 > /sys/kernel/mm/ksm/sleep_millisecs``
+
+ Default: 20 (chosen for demonstration purposes)
+
+merge_across_nodes
+ specifies if pages from different NUMA nodes can be merged.
+ When set to 0, ksm merges only pages which physically reside
+ in the memory area of same NUMA node. That brings lower
+ latency to access of shared pages. Systems with more nodes, at
+ significant NUMA distances, are likely to benefit from the
+ lower latency of setting 0. Smaller systems, which need to
+ minimize memory usage, are likely to benefit from the greater
+ sharing of setting 1 (default). You may wish to compare how
+ your system performs under each setting, before deciding on
+ which to use. ``merge_across_nodes`` setting can be changed only
+ when there are no ksm shared pages in the system: set run 2 to
+ unmerge pages first, then to 1 after changing
+ ``merge_across_nodes``, to remerge according to the new setting.
+
+ Default: 1 (merging across nodes as in earlier releases)
+
+run
+ * set to 0 to stop ksmd from running but keep merged pages,
+ * set to 1 to run ksmd e.g. ``echo 1 > /sys/kernel/mm/ksm/run``,
+ * set to 2 to stop ksmd and unmerge all pages currently merged, but
+ leave mergeable areas registered for next run.
+
+ Default: 0 (must be changed to 1 to activate KSM, except if
+ CONFIG_SYSFS is disabled)
+
+use_zero_pages
+ specifies whether empty pages (i.e. allocated pages that only
+ contain zeroes) should be treated specially. When set to 1,
+ empty pages are merged with the kernel zero page(s) instead of
+ with each other as it would happen normally. This can improve
+ the performance on architectures with coloured zero pages,
+ depending on the workload. Care should be taken when enabling
+ this setting, as it can potentially degrade the performance of
+ KSM for some workloads, for example if the checksums of pages
+ candidate for merging match the checksum of an empty
+ page. This setting can be changed at any time, it is only
+ effective for pages merged after the change.
+
+ Default: 0 (normal KSM behaviour as in earlier releases)
+
+max_page_sharing
+ Maximum sharing allowed for each KSM page. This enforces a
+ deduplication limit to avoid high latency for virtual memory
+ operations that involve traversal of the virtual mappings that
+ share the KSM page. The minimum value is 2 as a newly created
+ KSM page will have at least two sharers. The higher this value
+ the faster KSM will merge the memory and the higher the
+ deduplication factor will be, but the slower the worst case
+ virtual mappings traversal could be for any given KSM
+ page. Slowing down this traversal means there will be higher
+ latency for certain virtual memory operations happening during
+ swapping, compaction, NUMA balancing and page migration, in
+ turn decreasing responsiveness for the caller of those virtual
+ memory operations. The scheduler latency of other tasks not
+ involved with the VM operations doing the virtual mappings
+ traversal is not affected by this parameter as these
+ traversals are always schedule friendly themselves.
+
+stable_node_chains_prune_millisecs
+ specifies how frequently KSM checks the metadata of the pages
+ that hit the deduplication limit for stale information.
+ Smaller milllisecs values will free up the KSM metadata with
+ lower latency, but they will make ksmd use more CPU during the
+ scan. It's a noop if not a single KSM page hit the
+ ``max_page_sharing`` yet.
+
+The effectiveness of KSM and MADV_MERGEABLE is shown in ``/sys/kernel/mm/ksm/``:
+
+pages_shared
+ how many shared pages are being used
+pages_sharing
+ how many more sites are sharing them i.e. how much saved
+pages_unshared
+ how many pages unique but repeatedly checked for merging
+pages_volatile
+ how many pages changing too fast to be placed in a tree
+full_scans
+ how many times all mergeable areas have been scanned
+stable_node_chains
+ the number of KSM pages that hit the ``max_page_sharing`` limit
+stable_node_dups
+ number of duplicated KSM pages
+
+A high ratio of ``pages_sharing`` to ``pages_shared`` indicates good
+sharing, but a high ratio of ``pages_unshared`` to ``pages_sharing``
+indicates wasted effort. ``pages_volatile`` embraces several
+different kinds of activity, but a high proportion there would also
+indicate poor use of madvise MADV_MERGEABLE.
+
+The maximum possible ``pages_sharing/pages_shared`` ratio is limited by the
+``max_page_sharing`` tunable. To increase the ratio ``max_page_sharing`` must
+be increased accordingly.
+
+--
+Izik Eidus,
+Hugh Dickins, 17 Nov 2009
diff --git a/Documentation/admin-guide/mm/numa_memory_policy.rst b/Documentation/admin-guide/mm/numa_memory_policy.rst
new file mode 100644
index 000000000000..d78c5b315f72
--- /dev/null
+++ b/Documentation/admin-guide/mm/numa_memory_policy.rst
@@ -0,0 +1,495 @@
+.. _numa_memory_policy:
+
+==================
+NUMA Memory Policy
+==================
+
+What is NUMA Memory Policy?
+============================
+
+In the Linux kernel, "memory policy" determines from which node the kernel will
+allocate memory in a NUMA system or in an emulated NUMA system. Linux has
+supported platforms with Non-Uniform Memory Access architectures since 2.4.?.
+The current memory policy support was added to Linux 2.6 around May 2004. This
+document attempts to describe the concepts and APIs of the 2.6 memory policy
+support.
+
+Memory policies should not be confused with cpusets
+(``Documentation/cgroup-v1/cpusets.txt``)
+which is an administrative mechanism for restricting the nodes from which
+memory may be allocated by a set of processes. Memory policies are a
+programming interface that a NUMA-aware application can take advantage of. When
+both cpusets and policies are applied to a task, the restrictions of the cpuset
+takes priority. See :ref:`Memory Policies and cpusets <mem_pol_and_cpusets>`
+below for more details.
+
+Memory Policy Concepts
+======================
+
+Scope of Memory Policies
+------------------------
+
+The Linux kernel supports _scopes_ of memory policy, described here from
+most general to most specific:
+
+System Default Policy
+ this policy is "hard coded" into the kernel. It is the policy
+ that governs all page allocations that aren't controlled by
+ one of the more specific policy scopes discussed below. When
+ the system is "up and running", the system default policy will
+ use "local allocation" described below. However, during boot
+ up, the system default policy will be set to interleave
+ allocations across all nodes with "sufficient" memory, so as
+ not to overload the initial boot node with boot-time
+ allocations.
+
+Task/Process Policy
+ this is an optional, per-task policy. When defined for a
+ specific task, this policy controls all page allocations made
+ by or on behalf of the task that aren't controlled by a more
+ specific scope. If a task does not define a task policy, then
+ all page allocations that would have been controlled by the
+ task policy "fall back" to the System Default Policy.
+
+ The task policy applies to the entire address space of a task. Thus,
+ it is inheritable, and indeed is inherited, across both fork()
+ [clone() w/o the CLONE_VM flag] and exec*(). This allows a parent task
+ to establish the task policy for a child task exec()'d from an
+ executable image that has no awareness of memory policy. See the
+ :ref:`Memory Policy APIs <memory_policy_apis>` section,
+ below, for an overview of the system call
+ that a task may use to set/change its task/process policy.
+
+ In a multi-threaded task, task policies apply only to the thread
+ [Linux kernel task] that installs the policy and any threads
+ subsequently created by that thread. Any sibling threads existing
+ at the time a new task policy is installed retain their current
+ policy.
+
+ A task policy applies only to pages allocated after the policy is
+ installed. Any pages already faulted in by the task when the task
+ changes its task policy remain where they were allocated based on
+ the policy at the time they were allocated.
+
+.. _vma_policy:
+
+VMA Policy
+ A "VMA" or "Virtual Memory Area" refers to a range of a task's
+ virtual address space. A task may define a specific policy for a range
+ of its virtual address space. See the
+ :ref:`Memory Policy APIs <memory_policy_apis>` section,
+ below, for an overview of the mbind() system call used to set a VMA
+ policy.
+
+ A VMA policy will govern the allocation of pages that back
+ this region of the address space. Any regions of the task's
+ address space that don't have an explicit VMA policy will fall
+ back to the task policy, which may itself fall back to the
+ System Default Policy.
+
+ VMA policies have a few complicating details:
+
+ * VMA policy applies ONLY to anonymous pages. These include
+ pages allocated for anonymous segments, such as the task
+ stack and heap, and any regions of the address space
+ mmap()ed with the MAP_ANONYMOUS flag. If a VMA policy is
+ applied to a file mapping, it will be ignored if the mapping
+ used the MAP_SHARED flag. If the file mapping used the
+ MAP_PRIVATE flag, the VMA policy will only be applied when
+ an anonymous page is allocated on an attempt to write to the
+ mapping-- i.e., at Copy-On-Write.
+
+ * VMA policies are shared between all tasks that share a
+ virtual address space--a.k.a. threads--independent of when
+ the policy is installed; and they are inherited across
+ fork(). However, because VMA policies refer to a specific
+ region of a task's address space, and because the address
+ space is discarded and recreated on exec*(), VMA policies
+ are NOT inheritable across exec(). Thus, only NUMA-aware
+ applications may use VMA policies.
+
+ * A task may install a new VMA policy on a sub-range of a
+ previously mmap()ed region. When this happens, Linux splits
+ the existing virtual memory area into 2 or 3 VMAs, each with
+ it's own policy.
+
+ * By default, VMA policy applies only to pages allocated after
+ the policy is installed. Any pages already faulted into the
+ VMA range remain where they were allocated based on the
+ policy at the time they were allocated. However, since
+ 2.6.16, Linux supports page migration via the mbind() system
+ call, so that page contents can be moved to match a newly
+ installed policy.
+
+Shared Policy
+ Conceptually, shared policies apply to "memory objects" mapped
+ shared into one or more tasks' distinct address spaces. An
+ application installs shared policies the same way as VMA
+ policies--using the mbind() system call specifying a range of
+ virtual addresses that map the shared object. However, unlike
+ VMA policies, which can be considered to be an attribute of a
+ range of a task's address space, shared policies apply
+ directly to the shared object. Thus, all tasks that attach to
+ the object share the policy, and all pages allocated for the
+ shared object, by any task, will obey the shared policy.
+
+ As of 2.6.22, only shared memory segments, created by shmget() or
+ mmap(MAP_ANONYMOUS|MAP_SHARED), support shared policy. When shared
+ policy support was added to Linux, the associated data structures were
+ added to hugetlbfs shmem segments. At the time, hugetlbfs did not
+ support allocation at fault time--a.k.a lazy allocation--so hugetlbfs
+ shmem segments were never "hooked up" to the shared policy support.
+ Although hugetlbfs segments now support lazy allocation, their support
+ for shared policy has not been completed.
+
+ As mentioned above in :ref:`VMA policies <vma_policy>` section,
+ allocations of page cache pages for regular files mmap()ed
+ with MAP_SHARED ignore any VMA policy installed on the virtual
+ address range backed by the shared file mapping. Rather,
+ shared page cache pages, including pages backing private
+ mappings that have not yet been written by the task, follow
+ task policy, if any, else System Default Policy.
+
+ The shared policy infrastructure supports different policies on subset
+ ranges of the shared object. However, Linux still splits the VMA of
+ the task that installs the policy for each range of distinct policy.
+ Thus, different tasks that attach to a shared memory segment can have
+ different VMA configurations mapping that one shared object. This
+ can be seen by examining the /proc/<pid>/numa_maps of tasks sharing
+ a shared memory region, when one task has installed shared policy on
+ one or more ranges of the region.
+
+Components of Memory Policies
+-----------------------------
+
+A NUMA memory policy consists of a "mode", optional mode flags, and
+an optional set of nodes. The mode determines the behavior of the
+policy, the optional mode flags determine the behavior of the mode,
+and the optional set of nodes can be viewed as the arguments to the
+policy behavior.
+
+Internally, memory policies are implemented by a reference counted
+structure, struct mempolicy. Details of this structure will be
+discussed in context, below, as required to explain the behavior.
+
+NUMA memory policy supports the following 4 behavioral modes:
+
+Default Mode--MPOL_DEFAULT
+ This mode is only used in the memory policy APIs. Internally,
+ MPOL_DEFAULT is converted to the NULL memory policy in all
+ policy scopes. Any existing non-default policy will simply be
+ removed when MPOL_DEFAULT is specified. As a result,
+ MPOL_DEFAULT means "fall back to the next most specific policy
+ scope."
+
+ For example, a NULL or default task policy will fall back to the
+ system default policy. A NULL or default vma policy will fall
+ back to the task policy.
+
+ When specified in one of the memory policy APIs, the Default mode
+ does not use the optional set of nodes.
+
+ It is an error for the set of nodes specified for this policy to
+ be non-empty.
+
+MPOL_BIND
+ This mode specifies that memory must come from the set of
+ nodes specified by the policy. Memory will be allocated from
+ the node in the set with sufficient free memory that is
+ closest to the node where the allocation takes place.
+
+MPOL_PREFERRED
+ This mode specifies that the allocation should be attempted
+ from the single node specified in the policy. If that
+ allocation fails, the kernel will search other nodes, in order
+ of increasing distance from the preferred node based on
+ information provided by the platform firmware.
+
+ Internally, the Preferred policy uses a single node--the
+ preferred_node member of struct mempolicy. When the internal
+ mode flag MPOL_F_LOCAL is set, the preferred_node is ignored
+ and the policy is interpreted as local allocation. "Local"
+ allocation policy can be viewed as a Preferred policy that
+ starts at the node containing the cpu where the allocation
+ takes place.
+
+ It is possible for the user to specify that local allocation
+ is always preferred by passing an empty nodemask with this
+ mode. If an empty nodemask is passed, the policy cannot use
+ the MPOL_F_STATIC_NODES or MPOL_F_RELATIVE_NODES flags
+ described below.
+
+MPOL_INTERLEAVED
+ This mode specifies that page allocations be interleaved, on a
+ page granularity, across the nodes specified in the policy.
+ This mode also behaves slightly differently, based on the
+ context where it is used:
+
+ For allocation of anonymous pages and shared memory pages,
+ Interleave mode indexes the set of nodes specified by the
+ policy using the page offset of the faulting address into the
+ segment [VMA] containing the address modulo the number of
+ nodes specified by the policy. It then attempts to allocate a
+ page, starting at the selected node, as if the node had been
+ specified by a Preferred policy or had been selected by a
+ local allocation. That is, allocation will follow the per
+ node zonelist.
+
+ For allocation of page cache pages, Interleave mode indexes
+ the set of nodes specified by the policy using a node counter
+ maintained per task. This counter wraps around to the lowest
+ specified node after it reaches the highest specified node.
+ This will tend to spread the pages out over the nodes
+ specified by the policy based on the order in which they are
+ allocated, rather than based on any page offset into an
+ address range or file. During system boot up, the temporary
+ interleaved system default policy works in this mode.
+
+NUMA memory policy supports the following optional mode flags:
+
+MPOL_F_STATIC_NODES
+ This flag specifies that the nodemask passed by
+ the user should not be remapped if the task or VMA's set of allowed
+ nodes changes after the memory policy has been defined.
+
+ Without this flag, any time a mempolicy is rebound because of a
+ change in the set of allowed nodes, the node (Preferred) or
+ nodemask (Bind, Interleave) is remapped to the new set of
+ allowed nodes. This may result in nodes being used that were
+ previously undesired.
+
+ With this flag, if the user-specified nodes overlap with the
+ nodes allowed by the task's cpuset, then the memory policy is
+ applied to their intersection. If the two sets of nodes do not
+ overlap, the Default policy is used.
+
+ For example, consider a task that is attached to a cpuset with
+ mems 1-3 that sets an Interleave policy over the same set. If
+ the cpuset's mems change to 3-5, the Interleave will now occur
+ over nodes 3, 4, and 5. With this flag, however, since only node
+ 3 is allowed from the user's nodemask, the "interleave" only
+ occurs over that node. If no nodes from the user's nodemask are
+ now allowed, the Default behavior is used.
+
+ MPOL_F_STATIC_NODES cannot be combined with the
+ MPOL_F_RELATIVE_NODES flag. It also cannot be used for
+ MPOL_PREFERRED policies that were created with an empty nodemask
+ (local allocation).
+
+MPOL_F_RELATIVE_NODES
+ This flag specifies that the nodemask passed
+ by the user will be mapped relative to the set of the task or VMA's
+ set of allowed nodes. The kernel stores the user-passed nodemask,
+ and if the allowed nodes changes, then that original nodemask will
+ be remapped relative to the new set of allowed nodes.
+
+ Without this flag (and without MPOL_F_STATIC_NODES), anytime a
+ mempolicy is rebound because of a change in the set of allowed
+ nodes, the node (Preferred) or nodemask (Bind, Interleave) is
+ remapped to the new set of allowed nodes. That remap may not
+ preserve the relative nature of the user's passed nodemask to its
+ set of allowed nodes upon successive rebinds: a nodemask of
+ 1,3,5 may be remapped to 7-9 and then to 1-3 if the set of
+ allowed nodes is restored to its original state.
+
+ With this flag, the remap is done so that the node numbers from
+ the user's passed nodemask are relative to the set of allowed
+ nodes. In other words, if nodes 0, 2, and 4 are set in the user's
+ nodemask, the policy will be effected over the first (and in the
+ Bind or Interleave case, the third and fifth) nodes in the set of
+ allowed nodes. The nodemask passed by the user represents nodes
+ relative to task or VMA's set of allowed nodes.
+
+ If the user's nodemask includes nodes that are outside the range
+ of the new set of allowed nodes (for example, node 5 is set in
+ the user's nodemask when the set of allowed nodes is only 0-3),
+ then the remap wraps around to the beginning of the nodemask and,
+ if not already set, sets the node in the mempolicy nodemask.
+
+ For example, consider a task that is attached to a cpuset with
+ mems 2-5 that sets an Interleave policy over the same set with
+ MPOL_F_RELATIVE_NODES. If the cpuset's mems change to 3-7, the
+ interleave now occurs over nodes 3,5-7. If the cpuset's mems
+ then change to 0,2-3,5, then the interleave occurs over nodes
+ 0,2-3,5.
+
+ Thanks to the consistent remapping, applications preparing
+ nodemasks to specify memory policies using this flag should
+ disregard their current, actual cpuset imposed memory placement
+ and prepare the nodemask as if they were always located on
+ memory nodes 0 to N-1, where N is the number of memory nodes the
+ policy is intended to manage. Let the kernel then remap to the
+ set of memory nodes allowed by the task's cpuset, as that may
+ change over time.
+
+ MPOL_F_RELATIVE_NODES cannot be combined with the
+ MPOL_F_STATIC_NODES flag. It also cannot be used for
+ MPOL_PREFERRED policies that were created with an empty nodemask
+ (local allocation).
+
+Memory Policy Reference Counting
+================================
+
+To resolve use/free races, struct mempolicy contains an atomic reference
+count field. Internal interfaces, mpol_get()/mpol_put() increment and
+decrement this reference count, respectively. mpol_put() will only free
+the structure back to the mempolicy kmem cache when the reference count
+goes to zero.
+
+When a new memory policy is allocated, its reference count is initialized
+to '1', representing the reference held by the task that is installing the
+new policy. When a pointer to a memory policy structure is stored in another
+structure, another reference is added, as the task's reference will be dropped
+on completion of the policy installation.
+
+During run-time "usage" of the policy, we attempt to minimize atomic operations
+on the reference count, as this can lead to cache lines bouncing between cpus
+and NUMA nodes. "Usage" here means one of the following:
+
+1) querying of the policy, either by the task itself [using the get_mempolicy()
+ API discussed below] or by another task using the /proc/<pid>/numa_maps
+ interface.
+
+2) examination of the policy to determine the policy mode and associated node
+ or node lists, if any, for page allocation. This is considered a "hot
+ path". Note that for MPOL_BIND, the "usage" extends across the entire
+ allocation process, which may sleep during page reclaimation, because the
+ BIND policy nodemask is used, by reference, to filter ineligible nodes.
+
+We can avoid taking an extra reference during the usages listed above as
+follows:
+
+1) we never need to get/free the system default policy as this is never
+ changed nor freed, once the system is up and running.
+
+2) for querying the policy, we do not need to take an extra reference on the
+ target task's task policy nor vma policies because we always acquire the
+ task's mm's mmap_sem for read during the query. The set_mempolicy() and
+ mbind() APIs [see below] always acquire the mmap_sem for write when
+ installing or replacing task or vma policies. Thus, there is no possibility
+ of a task or thread freeing a policy while another task or thread is
+ querying it.
+
+3) Page allocation usage of task or vma policy occurs in the fault path where
+ we hold them mmap_sem for read. Again, because replacing the task or vma
+ policy requires that the mmap_sem be held for write, the policy can't be
+ freed out from under us while we're using it for page allocation.
+
+4) Shared policies require special consideration. One task can replace a
+ shared memory policy while another task, with a distinct mmap_sem, is
+ querying or allocating a page based on the policy. To resolve this
+ potential race, the shared policy infrastructure adds an extra reference
+ to the shared policy during lookup while holding a spin lock on the shared
+ policy management structure. This requires that we drop this extra
+ reference when we're finished "using" the policy. We must drop the
+ extra reference on shared policies in the same query/allocation paths
+ used for non-shared policies. For this reason, shared policies are marked
+ as such, and the extra reference is dropped "conditionally"--i.e., only
+ for shared policies.
+
+ Because of this extra reference counting, and because we must lookup
+ shared policies in a tree structure under spinlock, shared policies are
+ more expensive to use in the page allocation path. This is especially
+ true for shared policies on shared memory regions shared by tasks running
+ on different NUMA nodes. This extra overhead can be avoided by always
+ falling back to task or system default policy for shared memory regions,
+ or by prefaulting the entire shared memory region into memory and locking
+ it down. However, this might not be appropriate for all applications.
+
+.. _memory_policy_apis:
+
+Memory Policy APIs
+==================
+
+Linux supports 3 system calls for controlling memory policy. These APIS
+always affect only the calling task, the calling task's address space, or
+some shared object mapped into the calling task's address space.
+
+.. note::
+ the headers that define these APIs and the parameter data types for
+ user space applications reside in a package that is not part of the
+ Linux kernel. The kernel system call interfaces, with the 'sys\_'
+ prefix, are defined in <linux/syscalls.h>; the mode and flag
+ definitions are defined in <linux/mempolicy.h>.
+
+Set [Task] Memory Policy::
+
+ long set_mempolicy(int mode, const unsigned long *nmask,
+ unsigned long maxnode);
+
+Set's the calling task's "task/process memory policy" to mode
+specified by the 'mode' argument and the set of nodes defined by
+'nmask'. 'nmask' points to a bit mask of node ids containing at least
+'maxnode' ids. Optional mode flags may be passed by combining the
+'mode' argument with the flag (for example: MPOL_INTERLEAVE |
+MPOL_F_STATIC_NODES).
+
+See the set_mempolicy(2) man page for more details
+
+
+Get [Task] Memory Policy or Related Information::
+
+ long get_mempolicy(int *mode,
+ const unsigned long *nmask, unsigned long maxnode,
+ void *addr, int flags);
+
+Queries the "task/process memory policy" of the calling task, or the
+policy or location of a specified virtual address, depending on the
+'flags' argument.
+
+See the get_mempolicy(2) man page for more details
+
+
+Install VMA/Shared Policy for a Range of Task's Address Space::
+
+ long mbind(void *start, unsigned long len, int mode,
+ const unsigned long *nmask, unsigned long maxnode,
+ unsigned flags);
+
+mbind() installs the policy specified by (mode, nmask, maxnodes) as a
+VMA policy for the range of the calling task's address space specified
+by the 'start' and 'len' arguments. Additional actions may be
+requested via the 'flags' argument.
+
+See the mbind(2) man page for more details.
+
+Memory Policy Command Line Interface
+====================================
+
+Although not strictly part of the Linux implementation of memory policy,
+a command line tool, numactl(8), exists that allows one to:
+
++ set the task policy for a specified program via set_mempolicy(2), fork(2) and
+ exec(2)
+
++ set the shared policy for a shared memory segment via mbind(2)
+
+The numactl(8) tool is packaged with the run-time version of the library
+containing the memory policy system call wrappers. Some distributions
+package the headers and compile-time libraries in a separate development
+package.
+
+.. _mem_pol_and_cpusets:
+
+Memory Policies and cpusets
+===========================
+
+Memory policies work within cpusets as described above. For memory policies
+that require a node or set of nodes, the nodes are restricted to the set of
+nodes whose memories are allowed by the cpuset constraints. If the nodemask
+specified for the policy contains nodes that are not allowed by the cpuset and
+MPOL_F_RELATIVE_NODES is not used, the intersection of the set of nodes
+specified for the policy and the set of nodes with memory is used. If the
+result is the empty set, the policy is considered invalid and cannot be
+installed. If MPOL_F_RELATIVE_NODES is used, the policy's nodes are mapped
+onto and folded into the task's set of allowed nodes as previously described.
+
+The interaction of memory policies and cpusets can be problematic when tasks
+in two cpusets share access to a memory region, such as shared memory segments
+created by shmget() of mmap() with the MAP_ANONYMOUS and MAP_SHARED flags, and
+any of the tasks install shared policy on the region, only nodes whose
+memories are allowed in both cpusets may be used in the policies. Obtaining
+this information requires "stepping outside" the memory policy APIs to use the
+cpuset information and requires that one know in what cpusets other task might
+be attaching to the shared region. Furthermore, if the cpusets' allowed
+memory sets are disjoint, "local" allocation is the only valid policy.
diff --git a/Documentation/vm/pagemap.txt b/Documentation/admin-guide/mm/pagemap.rst
index eafcefa15261..577af85beb41 100644
--- a/Documentation/vm/pagemap.txt
+++ b/Documentation/admin-guide/mm/pagemap.rst
@@ -1,21 +1,25 @@
-pagemap, from the userspace perspective
----------------------------------------
+.. _pagemap:
+
+=============================
+Examining Process Page Tables
+=============================
pagemap is a new (as of 2.6.25) set of interfaces in the kernel that allow
userspace programs to examine the page tables and related information by
-reading files in /proc.
+reading files in ``/proc``.
There are four components to pagemap:
- * /proc/pid/pagemap. This file lets a userspace process find out which
+ * ``/proc/pid/pagemap``. This file lets a userspace process find out which
physical frame each virtual page is mapped to. It contains one 64-bit
value for each virtual page, containing the following data (from
- fs/proc/task_mmu.c, above pagemap_read):
+ ``fs/proc/task_mmu.c``, above pagemap_read):
* Bits 0-54 page frame number (PFN) if present
* Bits 0-4 swap type if swapped
* Bits 5-54 swap offset if swapped
- * Bit 55 pte is soft-dirty (see Documentation/vm/soft-dirty.txt)
+ * Bit 55 pte is soft-dirty (see
+ :ref:`Documentation/admin-guide/mm/soft-dirty.rst <soft_dirty>`)
* Bit 56 page exclusively mapped (since 4.2)
* Bits 57-60 zero
* Bit 61 page is file-page or shared-anon (since 3.5)
@@ -33,28 +37,28 @@ There are four components to pagemap:
precisely which pages are mapped (or in swap) and comparing mapped
pages between processes.
- Efficient users of this interface will use /proc/pid/maps to
+ Efficient users of this interface will use ``/proc/pid/maps`` to
determine which areas of memory are actually mapped and llseek to
skip over unmapped regions.
- * /proc/kpagecount. This file contains a 64-bit count of the number of
+ * ``/proc/kpagecount``. This file contains a 64-bit count of the number of
times each page is mapped, indexed by PFN.
- * /proc/kpageflags. This file contains a 64-bit set of flags for each
+ * ``/proc/kpageflags``. This file contains a 64-bit set of flags for each
page, indexed by PFN.
- The flags are (from fs/proc/page.c, above kpageflags_read):
-
- 0. LOCKED
- 1. ERROR
- 2. REFERENCED
- 3. UPTODATE
- 4. DIRTY
- 5. LRU
- 6. ACTIVE
- 7. SLAB
- 8. WRITEBACK
- 9. RECLAIM
+ The flags are (from ``fs/proc/page.c``, above kpageflags_read):
+
+ 0. LOCKED
+ 1. ERROR
+ 2. REFERENCED
+ 3. UPTODATE
+ 4. DIRTY
+ 5. LRU
+ 6. ACTIVE
+ 7. SLAB
+ 8. WRITEBACK
+ 9. RECLAIM
10. BUDDY
11. MMAP
12. ANON
@@ -72,98 +76,111 @@ There are four components to pagemap:
24. ZERO_PAGE
25. IDLE
- * /proc/kpagecgroup. This file contains a 64-bit inode number of the
+ * ``/proc/kpagecgroup``. This file contains a 64-bit inode number of the
memory cgroup each page is charged to, indexed by PFN. Only available when
CONFIG_MEMCG is set.
-Short descriptions to the page flags:
-
- 0. LOCKED
- page is being locked for exclusive access, eg. by undergoing read/write IO
-
- 7. SLAB
- page is managed by the SLAB/SLOB/SLUB/SLQB kernel memory allocator
- When compound page is used, SLUB/SLQB will only set this flag on the head
- page; SLOB will not flag it at all.
+Short descriptions to the page flags
+====================================
-10. BUDDY
+0 - LOCKED
+ page is being locked for exclusive access, e.g. by undergoing read/write IO
+7 - SLAB
+ page is managed by the SLAB/SLOB/SLUB/SLQB kernel memory allocator
+ When compound page is used, SLUB/SLQB will only set this flag on the head
+ page; SLOB will not flag it at all.
+10 - BUDDY
a free memory block managed by the buddy system allocator
The buddy system organizes free memory in blocks of various orders.
An order N block has 2^N physically contiguous pages, with the BUDDY flag
set for and _only_ for the first page.
-
-15. COMPOUND_HEAD
-16. COMPOUND_TAIL
+15 - COMPOUND_HEAD
A compound page with order N consists of 2^N physically contiguous pages.
A compound page with order 2 takes the form of "HTTT", where H donates its
head page and T donates its tail page(s). The major consumers of compound
- pages are hugeTLB pages (Documentation/vm/hugetlbpage.txt), the SLUB etc.
- memory allocators and various device drivers. However in this interface,
- only huge/giga pages are made visible to end users.
-17. HUGE
+ pages are hugeTLB pages
+ (:ref:`Documentation/admin-guide/mm/hugetlbpage.rst <hugetlbpage>`),
+ the SLUB etc. memory allocators and various device drivers.
+ However in this interface, only huge/giga pages are made visible
+ to end users.
+16 - COMPOUND_TAIL
+ A compound page tail (see description above).
+17 - HUGE
this is an integral part of a HugeTLB page
-
-19. HWPOISON
+19 - HWPOISON
hardware detected memory corruption on this page: don't touch the data!
-
-20. NOPAGE
+20 - NOPAGE
no page frame exists at the requested address
-
-21. KSM
+21 - KSM
identical memory pages dynamically shared between one or more processes
-
-22. THP
+22 - THP
contiguous pages which construct transparent hugepages
-
-23. BALLOON
+23 - BALLOON
balloon compaction page
-
-24. ZERO_PAGE
+24 - ZERO_PAGE
zero page for pfn_zero or huge_zero page
-
-25. IDLE
+25 - IDLE
page has not been accessed since it was marked idle (see
- Documentation/vm/idle_page_tracking.txt). Note that this flag may be
- stale in case the page was accessed via a PTE. To make sure the flag
- is up-to-date one has to read /sys/kernel/mm/page_idle/bitmap first.
-
- [IO related page flags]
- 1. ERROR IO error occurred
- 3. UPTODATE page has up-to-date data
- ie. for file backed page: (in-memory data revision >= on-disk one)
- 4. DIRTY page has been written to, hence contains new data
- ie. for file backed page: (in-memory data revision > on-disk one)
- 8. WRITEBACK page is being synced to disk
-
- [LRU related page flags]
- 5. LRU page is in one of the LRU lists
- 6. ACTIVE page is in the active LRU list
-18. UNEVICTABLE page is in the unevictable (non-)LRU list
- It is somehow pinned and not a candidate for LRU page reclaims,
- eg. ramfs pages, shmctl(SHM_LOCK) and mlock() memory segments
- 2. REFERENCED page has been referenced since last LRU list enqueue/requeue
- 9. RECLAIM page will be reclaimed soon after its pageout IO completed
-11. MMAP a memory mapped page
-12. ANON a memory mapped page that is not part of a file
-13. SWAPCACHE page is mapped to swap space, ie. has an associated swap entry
-14. SWAPBACKED page is backed by swap/RAM
+ :ref:`Documentation/admin-guide/mm/idle_page_tracking.rst <idle_page_tracking>`).
+ Note that this flag may be stale in case the page was accessed via
+ a PTE. To make sure the flag is up-to-date one has to read
+ ``/sys/kernel/mm/page_idle/bitmap`` first.
+
+IO related page flags
+---------------------
+
+1 - ERROR
+ IO error occurred
+3 - UPTODATE
+ page has up-to-date data
+ ie. for file backed page: (in-memory data revision >= on-disk one)
+4 - DIRTY
+ page has been written to, hence contains new data
+ i.e. for file backed page: (in-memory data revision > on-disk one)
+8 - WRITEBACK
+ page is being synced to disk
+
+LRU related page flags
+----------------------
+
+5 - LRU
+ page is in one of the LRU lists
+6 - ACTIVE
+ page is in the active LRU list
+18 - UNEVICTABLE
+ page is in the unevictable (non-)LRU list It is somehow pinned and
+ not a candidate for LRU page reclaims, e.g. ramfs pages,
+ shmctl(SHM_LOCK) and mlock() memory segments
+2 - REFERENCED
+ page has been referenced since last LRU list enqueue/requeue
+9 - RECLAIM
+ page will be reclaimed soon after its pageout IO completed
+11 - MMAP
+ a memory mapped page
+12 - ANON
+ a memory mapped page that is not part of a file
+13 - SWAPCACHE
+ page is mapped to swap space, i.e. has an associated swap entry
+14 - SWAPBACKED
+ page is backed by swap/RAM
The page-types tool in the tools/vm directory can be used to query the
above flags.
-Using pagemap to do something useful:
+Using pagemap to do something useful
+====================================
The general procedure for using pagemap to find out about a process' memory
usage goes like this:
- 1. Read /proc/pid/maps to determine which parts of the memory space are
+ 1. Read ``/proc/pid/maps`` to determine which parts of the memory space are
mapped to what.
2. Select the maps you are interested in -- all of them, or a particular
library, or the stack or the heap, etc.
- 3. Open /proc/pid/pagemap and seek to the pages you would like to examine.
+ 3. Open ``/proc/pid/pagemap`` and seek to the pages you would like to examine.
4. Read a u64 for each page from pagemap.
- 5. Open /proc/kpagecount and/or /proc/kpageflags. For each PFN you just
- read, seek to that entry in the file, and read the data you want.
+ 5. Open ``/proc/kpagecount`` and/or ``/proc/kpageflags``. For each PFN you
+ just read, seek to that entry in the file, and read the data you want.
For example, to find the "unique set size" (USS), which is the amount of
memory that a process is using that is not shared with any other process,
@@ -171,7 +188,8 @@ you can go through every map in the process, find the PFNs, look those up
in kpagecount, and tally up the number of pages that are only referenced
once.
-Other notes:
+Other notes
+===========
Reading from any of the files will return -EINVAL if you are not starting
the read on an 8-byte boundary (e.g., if you sought an odd number of bytes
diff --git a/Documentation/vm/soft-dirty.txt b/Documentation/admin-guide/mm/soft-dirty.rst
index 55684d11a1e8..cb0cfd6672fa 100644
--- a/Documentation/vm/soft-dirty.txt
+++ b/Documentation/admin-guide/mm/soft-dirty.rst
@@ -1,34 +1,38 @@
- SOFT-DIRTY PTEs
+.. _soft_dirty:
- The soft-dirty is a bit on a PTE which helps to track which pages a task
+===============
+Soft-Dirty PTEs
+===============
+
+The soft-dirty is a bit on a PTE which helps to track which pages a task
writes to. In order to do this tracking one should
1. Clear soft-dirty bits from the task's PTEs.
- This is done by writing "4" into the /proc/PID/clear_refs file of the
+ This is done by writing "4" into the ``/proc/PID/clear_refs`` file of the
task in question.
2. Wait some time.
3. Read soft-dirty bits from the PTEs.
- This is done by reading from the /proc/PID/pagemap. The bit 55 of the
+ This is done by reading from the ``/proc/PID/pagemap``. The bit 55 of the
64-bit qword is the soft-dirty one. If set, the respective PTE was
written to since step 1.
- Internally, to do this tracking, the writable bit is cleared from PTEs
+Internally, to do this tracking, the writable bit is cleared from PTEs
when the soft-dirty bit is cleared. So, after this, when the task tries to
modify a page at some virtual address the #PF occurs and the kernel sets
the soft-dirty bit on the respective PTE.
- Note, that although all the task's address space is marked as r/o after the
+Note, that although all the task's address space is marked as r/o after the
soft-dirty bits clear, the #PF-s that occur after that are processed fast.
This is so, since the pages are still mapped to physical memory, and thus all
the kernel does is finds this fact out and puts both writable and soft-dirty
bits on the PTE.
- While in most cases tracking memory changes by #PF-s is more than enough
+While in most cases tracking memory changes by #PF-s is more than enough
there is still a scenario when we can lose soft dirty bits -- a task
unmaps a previously mapped memory region and then maps a new one at exactly
the same place. When unmap is called, the kernel internally clears PTE values
@@ -36,7 +40,7 @@ including soft dirty bits. To notify user space application about such
memory region renewal the kernel always marks new memory regions (and
expanded regions) as soft dirty.
- This feature is actively used by the checkpoint-restore project. You
+This feature is actively used by the checkpoint-restore project. You
can find more details about it on http://criu.org
diff --git a/Documentation/admin-guide/mm/transhuge.rst b/Documentation/admin-guide/mm/transhuge.rst
new file mode 100644
index 000000000000..7ab93a8404b9
--- /dev/null
+++ b/Documentation/admin-guide/mm/transhuge.rst
@@ -0,0 +1,418 @@
+.. _admin_guide_transhuge:
+
+============================
+Transparent Hugepage Support
+============================
+
+Objective
+=========
+
+Performance critical computing applications dealing with large memory
+working sets are already running on top of libhugetlbfs and in turn
+hugetlbfs. Transparent HugePage Support (THP) is an alternative mean of
+using huge pages for the backing of virtual memory with huge pages
+that supports the automatic promotion and demotion of page sizes and
+without the shortcomings of hugetlbfs.
+
+Currently THP only works for anonymous memory mappings and tmpfs/shmem.
+But in the future it can expand to other filesystems.
+
+.. note::
+ in the examples below we presume that the basic page size is 4K and
+ the huge page size is 2M, although the actual numbers may vary
+ depending on the CPU architecture.
+
+The reason applications are running faster is because of two
+factors. The first factor is almost completely irrelevant and it's not
+of significant interest because it'll also have the downside of
+requiring larger clear-page copy-page in page faults which is a
+potentially negative effect. The first factor consists in taking a
+single page fault for each 2M virtual region touched by userland (so
+reducing the enter/exit kernel frequency by a 512 times factor). This
+only matters the first time the memory is accessed for the lifetime of
+a memory mapping. The second long lasting and much more important
+factor will affect all subsequent accesses to the memory for the whole
+runtime of the application. The second factor consist of two
+components:
+
+1) the TLB miss will run faster (especially with virtualization using
+ nested pagetables but almost always also on bare metal without
+ virtualization)
+
+2) a single TLB entry will be mapping a much larger amount of virtual
+ memory in turn reducing the number of TLB misses. With
+ virtualization and nested pagetables the TLB can be mapped of
+ larger size only if both KVM and the Linux guest are using
+ hugepages but a significant speedup already happens if only one of
+ the two is using hugepages just because of the fact the TLB miss is
+ going to run faster.
+
+THP can be enabled system wide or restricted to certain tasks or even
+memory ranges inside task's address space. Unless THP is completely
+disabled, there is ``khugepaged`` daemon that scans memory and
+collapses sequences of basic pages into huge pages.
+
+The THP behaviour is controlled via :ref:`sysfs <thp_sysfs>`
+interface and using madivse(2) and prctl(2) system calls.
+
+Transparent Hugepage Support maximizes the usefulness of free memory
+if compared to the reservation approach of hugetlbfs by allowing all
+unused memory to be used as cache or other movable (or even unmovable
+entities). It doesn't require reservation to prevent hugepage
+allocation failures to be noticeable from userland. It allows paging
+and all other advanced VM features to be available on the
+hugepages. It requires no modifications for applications to take
+advantage of it.
+
+Applications however can be further optimized to take advantage of
+this feature, like for example they've been optimized before to avoid
+a flood of mmap system calls for every malloc(4k). Optimizing userland
+is by far not mandatory and khugepaged already can take care of long
+lived page allocations even for hugepage unaware applications that
+deals with large amounts of memory.
+
+In certain cases when hugepages are enabled system wide, application
+may end up allocating more memory resources. An application may mmap a
+large region but only touch 1 byte of it, in that case a 2M page might
+be allocated instead of a 4k page for no good. This is why it's
+possible to disable hugepages system-wide and to only have them inside
+MADV_HUGEPAGE madvise regions.
+
+Embedded systems should enable hugepages only inside madvise regions
+to eliminate any risk of wasting any precious byte of memory and to
+only run faster.
+
+Applications that gets a lot of benefit from hugepages and that don't
+risk to lose memory by using hugepages, should use
+madvise(MADV_HUGEPAGE) on their critical mmapped regions.
+
+.. _thp_sysfs:
+
+sysfs
+=====
+
+Global THP controls
+-------------------
+
+Transparent Hugepage Support for anonymous memory can be entirely disabled
+(mostly for debugging purposes) or only enabled inside MADV_HUGEPAGE
+regions (to avoid the risk of consuming more memory resources) or enabled
+system wide. This can be achieved with one of::
+
+ echo always >/sys/kernel/mm/transparent_hugepage/enabled
+ echo madvise >/sys/kernel/mm/transparent_hugepage/enabled
+ echo never >/sys/kernel/mm/transparent_hugepage/enabled
+
+It's also possible to limit defrag efforts in the VM to generate
+anonymous hugepages in case they're not immediately free to madvise
+regions or to never try to defrag memory and simply fallback to regular
+pages unless hugepages are immediately available. Clearly if we spend CPU
+time to defrag memory, we would expect to gain even more by the fact we
+use hugepages later instead of regular pages. This isn't always
+guaranteed, but it may be more likely in case the allocation is for a
+MADV_HUGEPAGE region.
+
+::
+
+ echo always >/sys/kernel/mm/transparent_hugepage/defrag
+ echo defer >/sys/kernel/mm/transparent_hugepage/defrag
+ echo defer+madvise >/sys/kernel/mm/transparent_hugepage/defrag
+ echo madvise >/sys/kernel/mm/transparent_hugepage/defrag
+ echo never >/sys/kernel/mm/transparent_hugepage/defrag
+
+always
+ means that an application requesting THP will stall on
+ allocation failure and directly reclaim pages and compact
+ memory in an effort to allocate a THP immediately. This may be
+ desirable for virtual machines that benefit heavily from THP
+ use and are willing to delay the VM start to utilise them.
+
+defer
+ means that an application will wake kswapd in the background
+ to reclaim pages and wake kcompactd to compact memory so that
+ THP is available in the near future. It's the responsibility
+ of khugepaged to then install the THP pages later.
+
+defer+madvise
+ will enter direct reclaim and compaction like ``always``, but
+ only for regions that have used madvise(MADV_HUGEPAGE); all
+ other regions will wake kswapd in the background to reclaim
+ pages and wake kcompactd to compact memory so that THP is
+ available in the near future.
+
+madvise
+ will enter direct reclaim like ``always`` but only for regions
+ that are have used madvise(MADV_HUGEPAGE). This is the default
+ behaviour.
+
+never
+ should be self-explanatory.
+
+By default kernel tries to use huge zero page on read page fault to
+anonymous mapping. It's possible to disable huge zero page by writing 0
+or enable it back by writing 1::
+
+ echo 0 >/sys/kernel/mm/transparent_hugepage/use_zero_page
+ echo 1 >/sys/kernel/mm/transparent_hugepage/use_zero_page
+
+Some userspace (such as a test program, or an optimized memory allocation
+library) may want to know the size (in bytes) of a transparent hugepage::
+
+ cat /sys/kernel/mm/transparent_hugepage/hpage_pmd_size
+
+khugepaged will be automatically started when
+transparent_hugepage/enabled is set to "always" or "madvise, and it'll
+be automatically shutdown if it's set to "never".
+
+Khugepaged controls
+-------------------
+
+khugepaged runs usually at low frequency so while one may not want to
+invoke defrag algorithms synchronously during the page faults, it
+should be worth invoking defrag at least in khugepaged. However it's
+also possible to disable defrag in khugepaged by writing 0 or enable
+defrag in khugepaged by writing 1::
+
+ echo 0 >/sys/kernel/mm/transparent_hugepage/khugepaged/defrag
+ echo 1 >/sys/kernel/mm/transparent_hugepage/khugepaged/defrag
+
+You can also control how many pages khugepaged should scan at each
+pass::
+
+ /sys/kernel/mm/transparent_hugepage/khugepaged/pages_to_scan
+
+and how many milliseconds to wait in khugepaged between each pass (you
+can set this to 0 to run khugepaged at 100% utilization of one core)::
+
+ /sys/kernel/mm/transparent_hugepage/khugepaged/scan_sleep_millisecs
+
+and how many milliseconds to wait in khugepaged if there's an hugepage
+allocation failure to throttle the next allocation attempt::
+
+ /sys/kernel/mm/transparent_hugepage/khugepaged/alloc_sleep_millisecs
+
+The khugepaged progress can be seen in the number of pages collapsed::
+
+ /sys/kernel/mm/transparent_hugepage/khugepaged/pages_collapsed
+
+for each pass::
+
+ /sys/kernel/mm/transparent_hugepage/khugepaged/full_scans
+
+``max_ptes_none`` specifies how many extra small pages (that are
+not already mapped) can be allocated when collapsing a group
+of small pages into one large page::
+
+ /sys/kernel/mm/transparent_hugepage/khugepaged/max_ptes_none
+
+A higher value leads to use additional memory for programs.
+A lower value leads to gain less thp performance. Value of
+max_ptes_none can waste cpu time very little, you can
+ignore it.
+
+``max_ptes_swap`` specifies how many pages can be brought in from
+swap when collapsing a group of pages into a transparent huge page::
+
+ /sys/kernel/mm/transparent_hugepage/khugepaged/max_ptes_swap
+
+A higher value can cause excessive swap IO and waste
+memory. A lower value can prevent THPs from being
+collapsed, resulting fewer pages being collapsed into
+THPs, and lower memory access performance.
+
+Boot parameter
+==============
+
+You can change the sysfs boot time defaults of Transparent Hugepage
+Support by passing the parameter ``transparent_hugepage=always`` or
+``transparent_hugepage=madvise`` or ``transparent_hugepage=never``
+to the kernel command line.
+
+Hugepages in tmpfs/shmem
+========================
+
+You can control hugepage allocation policy in tmpfs with mount option
+``huge=``. It can have following values:
+
+always
+ Attempt to allocate huge pages every time we need a new page;
+
+never
+ Do not allocate huge pages;
+
+within_size
+ Only allocate huge page if it will be fully within i_size.
+ Also respect fadvise()/madvise() hints;
+
+advise
+ Only allocate huge pages if requested with fadvise()/madvise();
+
+The default policy is ``never``.
+
+``mount -o remount,huge= /mountpoint`` works fine after mount: remounting
+``huge=never`` will not attempt to break up huge pages at all, just stop more
+from being allocated.
+
+There's also sysfs knob to control hugepage allocation policy for internal
+shmem mount: /sys/kernel/mm/transparent_hugepage/shmem_enabled. The mount
+is used for SysV SHM, memfds, shared anonymous mmaps (of /dev/zero or
+MAP_ANONYMOUS), GPU drivers' DRM objects, Ashmem.
+
+In addition to policies listed above, shmem_enabled allows two further
+values:
+
+deny
+ For use in emergencies, to force the huge option off from
+ all mounts;
+force
+ Force the huge option on for all - very useful for testing;
+
+Need of application restart
+===========================
+
+The transparent_hugepage/enabled values and tmpfs mount option only affect
+future behavior. So to make them effective you need to restart any
+application that could have been using hugepages. This also applies to the
+regions registered in khugepaged.
+
+Monitoring usage
+================
+
+The number of anonymous transparent huge pages currently used by the
+system is available by reading the AnonHugePages field in ``/proc/meminfo``.
+To identify what applications are using anonymous transparent huge pages,
+it is necessary to read ``/proc/PID/smaps`` and count the AnonHugePages fields
+for each mapping.
+
+The number of file transparent huge pages mapped to userspace is available
+by reading ShmemPmdMapped and ShmemHugePages fields in ``/proc/meminfo``.
+To identify what applications are mapping file transparent huge pages, it
+is necessary to read ``/proc/PID/smaps`` and count the FileHugeMapped fields
+for each mapping.
+
+Note that reading the smaps file is expensive and reading it
+frequently will incur overhead.
+
+There are a number of counters in ``/proc/vmstat`` that may be used to
+monitor how successfully the system is providing huge pages for use.
+
+thp_fault_alloc
+ is incremented every time a huge page is successfully
+ allocated to handle a page fault. This applies to both the
+ first time a page is faulted and for COW faults.
+
+thp_collapse_alloc
+ is incremented by khugepaged when it has found
+ a range of pages to collapse into one huge page and has
+ successfully allocated a new huge page to store the data.
+
+thp_fault_fallback
+ is incremented if a page fault fails to allocate
+ a huge page and instead falls back to using small pages.
+
+thp_collapse_alloc_failed
+ is incremented if khugepaged found a range
+ of pages that should be collapsed into one huge page but failed
+ the allocation.
+
+thp_file_alloc
+ is incremented every time a file huge page is successfully
+ allocated.
+
+thp_file_mapped
+ is incremented every time a file huge page is mapped into
+ user address space.
+
+thp_split_page
+ is incremented every time a huge page is split into base
+ pages. This can happen for a variety of reasons but a common
+ reason is that a huge page is old and is being reclaimed.
+ This action implies splitting all PMD the page mapped with.
+
+thp_split_page_failed
+ is incremented if kernel fails to split huge
+ page. This can happen if the page was pinned by somebody.
+
+thp_deferred_split_page
+ is incremented when a huge page is put onto split
+ queue. This happens when a huge page is partially unmapped and
+ splitting it would free up some memory. Pages on split queue are
+ going to be split under memory pressure.
+
+thp_split_pmd
+ is incremented every time a PMD split into table of PTEs.
+ This can happen, for instance, when application calls mprotect() or
+ munmap() on part of huge page. It doesn't split huge page, only
+ page table entry.
+
+thp_zero_page_alloc
+ is incremented every time a huge zero page is
+ successfully allocated. It includes allocations which where
+ dropped due race with other allocation. Note, it doesn't count
+ every map of the huge zero page, only its allocation.
+
+thp_zero_page_alloc_failed
+ is incremented if kernel fails to allocate
+ huge zero page and falls back to using small pages.
+
+thp_swpout
+ is incremented every time a huge page is swapout in one
+ piece without splitting.
+
+thp_swpout_fallback
+ is incremented if a huge page has to be split before swapout.
+ Usually because failed to allocate some continuous swap space
+ for the huge page.
+
+As the system ages, allocating huge pages may be expensive as the
+system uses memory compaction to copy data around memory to free a
+huge page for use. There are some counters in ``/proc/vmstat`` to help
+monitor this overhead.
+
+compact_stall
+ is incremented every time a process stalls to run
+ memory compaction so that a huge page is free for use.
+
+compact_success
+ is incremented if the system compacted memory and
+ freed a huge page for use.
+
+compact_fail
+ is incremented if the system tries to compact memory
+ but failed.
+
+compact_pages_moved
+ is incremented each time a page is moved. If
+ this value is increasing rapidly, it implies that the system
+ is copying a lot of data to satisfy the huge page allocation.
+ It is possible that the cost of copying exceeds any savings
+ from reduced TLB misses.
+
+compact_pagemigrate_failed
+ is incremented when the underlying mechanism
+ for moving a page failed.
+
+compact_blocks_moved
+ is incremented each time memory compaction examines
+ a huge page aligned range of pages.
+
+It is possible to establish how long the stalls were using the function
+tracer to record how long was spent in __alloc_pages_nodemask and
+using the mm_page_alloc tracepoint to identify which allocations were
+for huge pages.
+
+Optimizing the applications
+===========================
+
+To be guaranteed that the kernel will map a 2M page immediately in any
+memory region, the mmap region has to be hugepage naturally
+aligned. posix_memalign() can provide that guarantee.
+
+Hugetlbfs
+=========
+
+You can use hugetlbfs on a kernel that has transparent hugepage
+support enabled just fine as always. No difference can be noted in
+hugetlbfs other than there will be less overall fragmentation. All
+usual features belonging to hugetlbfs are preserved and
+unaffected. libhugetlbfs will also work fine as usual.
diff --git a/Documentation/vm/userfaultfd.txt b/Documentation/admin-guide/mm/userfaultfd.rst
index bb2f945f87ab..5048cf661a8a 100644
--- a/Documentation/vm/userfaultfd.txt
+++ b/Documentation/admin-guide/mm/userfaultfd.rst
@@ -1,6 +1,11 @@
-= Userfaultfd =
+.. _userfaultfd:
-== Objective ==
+===========
+Userfaultfd
+===========
+
+Objective
+=========
Userfaults allow the implementation of on-demand paging from userland
and more generally they allow userland to take control of various
@@ -9,7 +14,8 @@ memory page faults, something otherwise only the kernel code could do.
For example userfaults allows a proper and more optimal implementation
of the PROT_NONE+SIGSEGV trick.
-== Design ==
+Design
+======
Userfaults are delivered and resolved through the userfaultfd syscall.
@@ -41,7 +47,8 @@ different processes without them being aware about what is going on
themselves on the same region the manager is already tracking, which
is a corner case that would currently return -EBUSY).
-== API ==
+API
+===
When first opened the userfaultfd must be enabled invoking the
UFFDIO_API ioctl specifying a uffdio_api.api value set to UFFD_API (or
@@ -101,7 +108,8 @@ UFFDIO_COPY. They're atomic as in guaranteeing that nothing can see an
half copied page since it'll keep userfaulting until the copy has
finished.
-== QEMU/KVM ==
+QEMU/KVM
+========
QEMU/KVM is using the userfaultfd syscall to implement postcopy live
migration. Postcopy live migration is one form of memory
@@ -163,7 +171,8 @@ sending the same page twice (in case the userfault is read by the
postcopy thread just before UFFDIO_COPY|ZEROPAGE runs in the migration
thread).
-== Non-cooperative userfaultfd ==
+Non-cooperative userfaultfd
+===========================
When the userfaultfd is monitored by an external manager, the manager
must be able to track changes in the process virtual memory
@@ -172,27 +181,30 @@ the same read(2) protocol as for the page fault notifications. The
manager has to explicitly enable these events by setting appropriate
bits in uffdio_api.features passed to UFFDIO_API ioctl:
-UFFD_FEATURE_EVENT_FORK - enable userfaultfd hooks for fork(). When
-this feature is enabled, the userfaultfd context of the parent process
-is duplicated into the newly created process. The manager receives
-UFFD_EVENT_FORK with file descriptor of the new userfaultfd context in
-the uffd_msg.fork.
-
-UFFD_FEATURE_EVENT_REMAP - enable notifications about mremap()
-calls. When the non-cooperative process moves a virtual memory area to
-a different location, the manager will receive UFFD_EVENT_REMAP. The
-uffd_msg.remap will contain the old and new addresses of the area and
-its original length.
-
-UFFD_FEATURE_EVENT_REMOVE - enable notifications about
-madvise(MADV_REMOVE) and madvise(MADV_DONTNEED) calls. The event
-UFFD_EVENT_REMOVE will be generated upon these calls to madvise. The
-uffd_msg.remove will contain start and end addresses of the removed
-area.
-
-UFFD_FEATURE_EVENT_UNMAP - enable notifications about memory
-unmapping. The manager will get UFFD_EVENT_UNMAP with uffd_msg.remove
-containing start and end addresses of the unmapped area.
+UFFD_FEATURE_EVENT_FORK
+ enable userfaultfd hooks for fork(). When this feature is
+ enabled, the userfaultfd context of the parent process is
+ duplicated into the newly created process. The manager
+ receives UFFD_EVENT_FORK with file descriptor of the new
+ userfaultfd context in the uffd_msg.fork.
+
+UFFD_FEATURE_EVENT_REMAP
+ enable notifications about mremap() calls. When the
+ non-cooperative process moves a virtual memory area to a
+ different location, the manager will receive
+ UFFD_EVENT_REMAP. The uffd_msg.remap will contain the old and
+ new addresses of the area and its original length.
+
+UFFD_FEATURE_EVENT_REMOVE
+ enable notifications about madvise(MADV_REMOVE) and
+ madvise(MADV_DONTNEED) calls. The event UFFD_EVENT_REMOVE will
+ be generated upon these calls to madvise. The uffd_msg.remove
+ will contain start and end addresses of the removed area.
+
+UFFD_FEATURE_EVENT_UNMAP
+ enable notifications about memory unmapping. The manager will
+ get UFFD_EVENT_UNMAP with uffd_msg.remove containing start and
+ end addresses of the unmapped area.
Although the UFFD_FEATURE_EVENT_REMOVE and UFFD_FEATURE_EVENT_UNMAP
are pretty similar, they quite differ in the action expected from the
diff --git a/Documentation/admin-guide/pm/intel_pstate.rst b/Documentation/admin-guide/pm/intel_pstate.rst
index d2b6fda3d67b..ab2fe0eda1d7 100644
--- a/Documentation/admin-guide/pm/intel_pstate.rst
+++ b/Documentation/admin-guide/pm/intel_pstate.rst
@@ -145,7 +145,7 @@ feature enabled.]
In this mode ``intel_pstate`` registers utilization update callbacks with the
CPU scheduler in order to run a P-state selection algorithm, either
-``powersave`` or ``performance``, depending on the ``scaling_cur_freq`` policy
+``powersave`` or ``performance``, depending on the ``scaling_governor`` policy
setting in ``sysfs``. The current CPU frequency information to be made
available from the ``scaling_cur_freq`` policy attribute in ``sysfs`` is
periodically updated by those utilization update callbacks too.
diff --git a/Documentation/admin-guide/pm/sleep-states.rst b/Documentation/admin-guide/pm/sleep-states.rst
index 1e5c0f00cb2f..dbf5acd49f35 100644
--- a/Documentation/admin-guide/pm/sleep-states.rst
+++ b/Documentation/admin-guide/pm/sleep-states.rst
@@ -15,7 +15,7 @@ Sleep States That Can Be Supported
==================================
Depending on its configuration and the capabilities of the platform it runs on,
-the Linux kernel can support up to four system sleep states, includig
+the Linux kernel can support up to four system sleep states, including
hibernation and up to three variants of system suspend. The sleep states that
can be supported by the kernel are listed below.
diff --git a/Documentation/admin-guide/ramoops.rst b/Documentation/admin-guide/ramoops.rst
index 4efd7ce77565..6dbcc5481000 100644
--- a/Documentation/admin-guide/ramoops.rst
+++ b/Documentation/admin-guide/ramoops.rst
@@ -61,7 +61,7 @@ Setting the ramoops parameters can be done in several different manners:
mem=128M ramoops.mem_address=0x8000000 ramoops.ecc=1
B. Use Device Tree bindings, as described in
- ``Documentation/device-tree/bindings/reserved-memory/admin-guide/ramoops.rst``.
+ ``Documentation/devicetree/bindings/reserved-memory/ramoops.txt``.
For example::
reserved-memory {
diff --git a/Documentation/arm/Marvell/README b/Documentation/arm/Marvell/README
index b5bb7f518840..56ada27c53be 100644
--- a/Documentation/arm/Marvell/README
+++ b/Documentation/arm/Marvell/README
@@ -302,19 +302,15 @@ Berlin family (Multimedia Solutions)
88DE3010, Armada 1000 (no Linux support)
Core: Marvell PJ1 (ARMv5TE), Dual-core
Product Brief: http://www.marvell.com.cn/digital-entertainment/assets/armada_1000_pb.pdf
- 88DE3005, Armada 1500-mini
88DE3005, Armada 1500 Mini
Design name: BG2CD
Core: ARM Cortex-A9, PL310 L2CC
- Homepage: http://www.marvell.com/multimedia-solutions/armada-1500-mini/
- 88DE3006, Armada 1500 Mini Plus
- Design name: BG2CDP
- Core: Dual Core ARM Cortex-A7
- Homepage: http://www.marvell.com/multimedia-solutions/armada-1500-mini-plus/
+ 88DE3006, Armada 1500 Mini Plus
+ Design name: BG2CDP
+ Core: Dual Core ARM Cortex-A7
88DE3100, Armada 1500
Design name: BG2
Core: Marvell PJ4B-MP (ARMv7), Tauros3 L2CC
- Product Brief: http://www.marvell.com/digital-entertainment/armada-1500/assets/Marvell-ARMADA-1500-Product-Brief.pdf
88DE3114, Armada 1500 Pro
Design name: BG2Q
Core: Quad Core ARM Cortex-A9, PL310 L2CC
@@ -324,13 +320,16 @@ Berlin family (Multimedia Solutions)
88DE3218, ARMADA 1500 Ultra
Core: ARM Cortex-A53
- Homepage: http://www.marvell.com/multimedia-solutions/
+ Homepage: https://www.synaptics.com/products/multimedia-solutions
Directory: arch/arm/mach-berlin
Comments:
+
* This line of SoCs is based on Marvell Sheeva or ARM Cortex CPUs
with Synopsys DesignWare (IRQ, GPIO, Timers, ...) and PXA IP (SDHCI, USB, ETH, ...).
+ * The Berlin family was acquired by Synaptics from Marvell in 2017.
+
CPU Cores
---------
diff --git a/Documentation/block/cmdline-partition.txt b/Documentation/block/cmdline-partition.txt
index 525b9f6d7fb4..760a3f7c3ed4 100644
--- a/Documentation/block/cmdline-partition.txt
+++ b/Documentation/block/cmdline-partition.txt
@@ -1,7 +1,9 @@
Embedded device command line partition parsing
=====================================================================
-Support for reading the block device partition table from the command line.
+The "blkdevparts" command line option adds support for reading the
+block device partition table from the kernel command line.
+
It is typically used for fixed block (eMMC) embedded devices.
It has no MBR, so saves storage space. Bootloader can be easily accessed
by absolute address of data on the block device.
@@ -14,22 +16,27 @@ blkdevparts=<blkdev-def>[;<blkdev-def>]
<partdef> := <size>[@<offset>](part-name)
<blkdev-id>
- block device disk name, embedded device used fixed block device,
- it's disk name also fixed. such as: mmcblk0, mmcblk1, mmcblk0boot0.
+ block device disk name. Embedded device uses fixed block device.
+ Its disk name is also fixed, such as: mmcblk0, mmcblk1, mmcblk0boot0.
<size>
partition size, in bytes, such as: 512, 1m, 1G.
+ size may contain an optional suffix of (upper or lower case):
+ K, M, G, T, P, E.
+ "-" is used to denote all remaining space.
<offset>
partition start address, in bytes.
+ offset may contain an optional suffix of (upper or lower case):
+ K, M, G, T, P, E.
(part-name)
- partition name, kernel send uevent with "PARTNAME". application can create
- a link to block device partition with the name "PARTNAME".
- user space application can access partition by partition name.
+ partition name. Kernel sends uevent with "PARTNAME". Application can
+ create a link to block device partition with the name "PARTNAME".
+ User space application can access partition by partition name.
Example:
- eMMC disk name is "mmcblk0" and "mmcblk0boot0"
+ eMMC disk names are "mmcblk0" and "mmcblk0boot0".
bootargs:
'blkdevparts=mmcblk0:1G(data0),1G(data1),-;mmcblk0boot0:1m(boot),-(kernel)'
diff --git a/Documentation/block/null_blk.txt b/Documentation/block/null_blk.txt
index 733927a7b501..07f147381f32 100644
--- a/Documentation/block/null_blk.txt
+++ b/Documentation/block/null_blk.txt
@@ -71,13 +71,16 @@ use_per_node_hctx=[0/1]: Default: 0
1: The multi-queue block layer is instantiated with a hardware dispatch
queue for each CPU node in the system.
-use_lightnvm=[0/1]: Default: 0
- Register device with LightNVM. Requires blk-mq and CONFIG_NVM to be enabled.
-
no_sched=[0/1]: Default: 0
0: nullb* use default blk-mq io scheduler.
1: nullb* doesn't use io scheduler.
+blocking=[0/1]: Default: 0
+ 0: Register as a non-blocking blk-mq driver device.
+ 1: Register as a blocking blk-mq driver device, null_blk will set
+ the BLK_MQ_F_BLOCKING flag, indicating that it sometimes/always
+ needs to block in its ->queue_rq() function.
+
shared_tags=[0/1]: Default: 0
0: Tag set is not shared.
1: Tag set shared between devices for blk-mq. Only makes sense with
diff --git a/Documentation/bpf/bpf_devel_QA.txt b/Documentation/bpf/bpf_devel_QA.txt
index 1a0b704e1a38..da57601153a0 100644
--- a/Documentation/bpf/bpf_devel_QA.txt
+++ b/Documentation/bpf/bpf_devel_QA.txt
@@ -557,6 +557,14 @@ A: Although LLVM IR generation and optimization try to stay architecture
pulls in some header files containing file scope host assembly codes.
- You can add "-fno-jump-tables" to work around the switch table issue.
- Otherwise, you can use bpf target.
+ Otherwise, you can use bpf target. Additionally, you _must_ use bpf target
+ when:
+
+ - Your program uses data structures with pointer or long / unsigned long
+ types that interface with BPF helpers or context data structures. Access
+ into these structures is verified by the BPF verifier and may result
+ in verification failures if the native architecture is not aligned with
+ the BPF architecture, e.g. 64-bit. An example of this is
+ BPF_PROG_TYPE_SK_MSG require '-target bpf'
Happy BPF hacking!
diff --git a/Documentation/core-api/atomic_ops.rst b/Documentation/core-api/atomic_ops.rst
index fce929144ccd..2e7165f86f55 100644
--- a/Documentation/core-api/atomic_ops.rst
+++ b/Documentation/core-api/atomic_ops.rst
@@ -111,7 +111,6 @@ If the compiler can prove that do_something() does not store to the
variable a, then the compiler is within its rights transforming this to
the following::
- tmp = a;
if (a > 0)
for (;;)
do_something();
@@ -119,7 +118,7 @@ the following::
If you don't want the compiler to do this (and you probably don't), then
you should use something like the following::
- while (READ_ONCE(a) < 0)
+ while (READ_ONCE(a) > 0)
do_something();
Alternatively, you could place a barrier() call in the loop.
@@ -467,10 +466,12 @@ Like the above, except that these routines return a boolean which
indicates whether the changed bit was set _BEFORE_ the atomic bit
operation.
-WARNING! It is incredibly important that the value be a boolean,
-ie. "0" or "1". Do not try to be fancy and save a few instructions by
-declaring the above to return "long" and just returning something like
-"old_val & mask" because that will not work.
+
+.. warning::
+ It is incredibly important that the value be a boolean, ie. "0" or "1".
+ Do not try to be fancy and save a few instructions by declaring the
+ above to return "long" and just returning something like "old_val &
+ mask" because that will not work.
For one thing, this return value gets truncated to int in many code
paths using these interfaces, so on 64-bit if the bit is set in the
diff --git a/Documentation/cachetlb.txt b/Documentation/core-api/cachetlb.rst
index 6eb9d3f090cd..6eb9d3f090cd 100644
--- a/Documentation/cachetlb.txt
+++ b/Documentation/core-api/cachetlb.rst
diff --git a/Documentation/circular-buffers.txt b/Documentation/core-api/circular-buffers.rst
index 53e51caa3347..53e51caa3347 100644
--- a/Documentation/circular-buffers.txt
+++ b/Documentation/core-api/circular-buffers.rst
diff --git a/Documentation/core-api/gfp_mask-from-fs-io.rst b/Documentation/core-api/gfp_mask-from-fs-io.rst
new file mode 100644
index 000000000000..e0df8f416582
--- /dev/null
+++ b/Documentation/core-api/gfp_mask-from-fs-io.rst
@@ -0,0 +1,66 @@
+=================================
+GFP masks used from FS/IO context
+=================================
+
+:Date: May, 2018
+:Author: Michal Hocko <mhocko@kernel.org>
+
+Introduction
+============
+
+Code paths in the filesystem and IO stacks must be careful when
+allocating memory to prevent recursion deadlocks caused by direct
+memory reclaim calling back into the FS or IO paths and blocking on
+already held resources (e.g. locks - most commonly those used for the
+transaction context).
+
+The traditional way to avoid this deadlock problem is to clear __GFP_FS
+respectively __GFP_IO (note the latter implies clearing the first as well) in
+the gfp mask when calling an allocator. GFP_NOFS respectively GFP_NOIO can be
+used as shortcut. It turned out though that above approach has led to
+abuses when the restricted gfp mask is used "just in case" without a
+deeper consideration which leads to problems because an excessive use
+of GFP_NOFS/GFP_NOIO can lead to memory over-reclaim or other memory
+reclaim issues.
+
+New API
+========
+
+Since 4.12 we do have a generic scope API for both NOFS and NOIO context
+``memalloc_nofs_save``, ``memalloc_nofs_restore`` respectively ``memalloc_noio_save``,
+``memalloc_noio_restore`` which allow to mark a scope to be a critical
+section from a filesystem or I/O point of view. Any allocation from that
+scope will inherently drop __GFP_FS respectively __GFP_IO from the given
+mask so no memory allocation can recurse back in the FS/IO.
+
+.. kernel-doc:: include/linux/sched/mm.h
+ :functions: memalloc_nofs_save memalloc_nofs_restore
+.. kernel-doc:: include/linux/sched/mm.h
+ :functions: memalloc_noio_save memalloc_noio_restore
+
+FS/IO code then simply calls the appropriate save function before
+any critical section with respect to the reclaim is started - e.g.
+lock shared with the reclaim context or when a transaction context
+nesting would be possible via reclaim. The restore function should be
+called when the critical section ends. All that ideally along with an
+explanation what is the reclaim context for easier maintenance.
+
+Please note that the proper pairing of save/restore functions
+allows nesting so it is safe to call ``memalloc_noio_save`` or
+``memalloc_noio_restore`` respectively from an existing NOIO or NOFS
+scope.
+
+What about __vmalloc(GFP_NOFS)
+==============================
+
+vmalloc doesn't support GFP_NOFS semantic because there are hardcoded
+GFP_KERNEL allocations deep inside the allocator which are quite non-trivial
+to fix up. That means that calling ``vmalloc`` with GFP_NOFS/GFP_NOIO is
+almost always a bug. The good news is that the NOFS/NOIO semantic can be
+achieved by the scope API.
+
+In the ideal world, upper layers should already mark dangerous contexts
+and so no special care is required and vmalloc should be called without
+any problems. Sometimes if the context is not really clear or there are
+layering violations then the recommended way around that is to wrap ``vmalloc``
+by the scope API with a comment explaining the problem.
diff --git a/Documentation/core-api/index.rst b/Documentation/core-api/index.rst
index c670a8031786..f5a66b72f984 100644
--- a/Documentation/core-api/index.rst
+++ b/Documentation/core-api/index.rst
@@ -14,6 +14,7 @@ Core utilities
kernel-api
assoc_array
atomic_ops
+ cachetlb
refcount-vs-atomic
cpu_hotplug
idr
@@ -25,6 +26,8 @@ Core utilities
genalloc
errseq
printk-formats
+ circular-buffers
+ gfp_mask-from-fs-io
Interfaces for kernel debugging
===============================
diff --git a/Documentation/core-api/kernel-api.rst b/Documentation/core-api/kernel-api.rst
index 92f30006adae..8e44aea366c2 100644
--- a/Documentation/core-api/kernel-api.rst
+++ b/Documentation/core-api/kernel-api.rst
@@ -39,17 +39,17 @@ String Manipulation
.. kernel-doc:: lib/string.c
:export:
+Basic Kernel Library Functions
+==============================
+
+The Linux kernel provides more basic utility functions.
+
Bit Operations
--------------
.. kernel-doc:: arch/x86/include/asm/bitops.h
:internal:
-Basic Kernel Library Functions
-==============================
-
-The Linux kernel provides more basic utility functions.
-
Bitmap Operations
-----------------
@@ -80,6 +80,31 @@ Command-line Parsing
.. kernel-doc:: lib/cmdline.c
:export:
+Sorting
+-------
+
+.. kernel-doc:: lib/sort.c
+ :export:
+
+.. kernel-doc:: lib/list_sort.c
+ :export:
+
+Text Searching
+--------------
+
+.. kernel-doc:: lib/textsearch.c
+ :doc: ts_intro
+
+.. kernel-doc:: lib/textsearch.c
+ :export:
+
+.. kernel-doc:: include/linux/textsearch.h
+ :functions: textsearch_find textsearch_next \
+ textsearch_get_pattern textsearch_get_pattern_len
+
+CRC and Math Functions in Linux
+===============================
+
CRC Functions
-------------
@@ -103,9 +128,6 @@ CRC Functions
.. kernel-doc:: lib/crc-itu-t.c
:export:
-Math Functions in Linux
-=======================
-
Base 2 log and power Functions
------------------------------
@@ -127,28 +149,6 @@ Division Functions
.. kernel-doc:: lib/gcd.c
:export:
-Sorting
--------
-
-.. kernel-doc:: lib/sort.c
- :export:
-
-.. kernel-doc:: lib/list_sort.c
- :export:
-
-Text Searching
---------------
-
-.. kernel-doc:: lib/textsearch.c
- :doc: ts_intro
-
-.. kernel-doc:: lib/textsearch.c
- :export:
-
-.. kernel-doc:: include/linux/textsearch.h
- :functions: textsearch_find textsearch_next \
- textsearch_get_pattern textsearch_get_pattern_len
-
UUID/GUID
---------
diff --git a/Documentation/core-api/refcount-vs-atomic.rst b/Documentation/core-api/refcount-vs-atomic.rst
index 83351c258cdb..322851bada16 100644
--- a/Documentation/core-api/refcount-vs-atomic.rst
+++ b/Documentation/core-api/refcount-vs-atomic.rst
@@ -17,7 +17,7 @@ in order to help maintainers validate their code against the change in
these memory ordering guarantees.
The terms used through this document try to follow the formal LKMM defined in
-github.com/aparri/memory-model/blob/master/Documentation/explanation.txt
+tools/memory-model/Documentation/explanation.txt.
memory-barriers.txt and atomic_t.txt provide more background to the
memory ordering in general and for atomic operations specifically.
diff --git a/Documentation/crypto/index.rst b/Documentation/crypto/index.rst
index 94c4786f2573..c4ff5d791233 100644
--- a/Documentation/crypto/index.rst
+++ b/Documentation/crypto/index.rst
@@ -20,5 +20,6 @@ for cryptographic use cases, as well as programming examples.
architecture
devel-algos
userspace-if
+ crypto_engine
api
api-samples
diff --git a/Documentation/dev-tools/kasan.rst b/Documentation/dev-tools/kasan.rst
index f7a18f274357..aabc8738b3d8 100644
--- a/Documentation/dev-tools/kasan.rst
+++ b/Documentation/dev-tools/kasan.rst
@@ -120,7 +120,7 @@ A typical out of bounds access report looks like this::
The header of the report discribe what kind of bug happened and what kind of
access caused it. It's followed by the description of the accessed slub object
-(see 'SLUB Debug output' section in Documentation/vm/slub.txt for details) and
+(see 'SLUB Debug output' section in Documentation/vm/slub.rst for details) and
the description of the accessed memory page.
In the last section the report shows memory state around the accessed address.
diff --git a/Documentation/dev-tools/kselftest.rst b/Documentation/dev-tools/kselftest.rst
index e80850eefe13..3bf371a938d0 100644
--- a/Documentation/dev-tools/kselftest.rst
+++ b/Documentation/dev-tools/kselftest.rst
@@ -151,6 +151,11 @@ Contributing new tests (details)
TEST_FILES, TEST_GEN_FILES mean it is the file which is used by
test.
+ * First use the headers inside the kernel source and/or git repo, and then the
+ system headers. Headers for the kernel release as opposed to headers
+ installed by the distro on the system should be the primary focus to be able
+ to find regressions.
+
Test Harness
============
diff --git a/Documentation/device-mapper/thin-provisioning.txt b/Documentation/device-mapper/thin-provisioning.txt
index 4bcd4b7f79f9..3d01948ea061 100644
--- a/Documentation/device-mapper/thin-provisioning.txt
+++ b/Documentation/device-mapper/thin-provisioning.txt
@@ -264,7 +264,10 @@ i) Constructor
data device, but just remove the mapping.
read_only: Don't allow any changes to be made to the pool
- metadata.
+ metadata. This mode is only available after the
+ thin-pool has been created and first used in full
+ read/write mode. It cannot be specified on initial
+ thin-pool creation.
error_if_no_space: Error IOs, instead of queueing, if no space.
diff --git a/Documentation/devicetree/bindings/ata/ahci-platform.txt b/Documentation/devicetree/bindings/ata/ahci-platform.txt
index f4006d3c9fdf..c760ecb81381 100644
--- a/Documentation/devicetree/bindings/ata/ahci-platform.txt
+++ b/Documentation/devicetree/bindings/ata/ahci-platform.txt
@@ -30,7 +30,6 @@ compatible:
Optional properties:
- dma-coherent : Present if dma operations are coherent
- clocks : a list of phandle + clock specifier pairs
-- resets : a list of phandle + reset specifier pairs
- target-supply : regulator for SATA target power
- phys : reference to the SATA PHY node
- phy-names : must be "sata-phy"
diff --git a/Documentation/devicetree/bindings/display/panel/panel-common.txt b/Documentation/devicetree/bindings/display/panel/panel-common.txt
index 557fa765adcb..5d2519af4bb5 100644
--- a/Documentation/devicetree/bindings/display/panel/panel-common.txt
+++ b/Documentation/devicetree/bindings/display/panel/panel-common.txt
@@ -38,7 +38,7 @@ Display Timings
require specific display timings. The panel-timing subnode expresses those
timings as specified in the timing subnode section of the display timing
bindings defined in
- Documentation/devicetree/bindings/display/display-timing.txt.
+ Documentation/devicetree/bindings/display/panel/display-timing.txt.
Connectivity
diff --git a/Documentation/devicetree/bindings/dma/renesas,rcar-dmac.txt b/Documentation/devicetree/bindings/dma/renesas,rcar-dmac.txt
index aadfb236d53a..61315eaa7660 100644
--- a/Documentation/devicetree/bindings/dma/renesas,rcar-dmac.txt
+++ b/Documentation/devicetree/bindings/dma/renesas,rcar-dmac.txt
@@ -26,6 +26,7 @@ Required Properties:
- "renesas,dmac-r8a7794" (R-Car E2)
- "renesas,dmac-r8a7795" (R-Car H3)
- "renesas,dmac-r8a7796" (R-Car M3-W)
+ - "renesas,dmac-r8a77965" (R-Car M3-N)
- "renesas,dmac-r8a77970" (R-Car V3M)
- "renesas,dmac-r8a77980" (R-Car V3H)
diff --git a/Documentation/devicetree/bindings/hwmon/gpio-fan.txt b/Documentation/devicetree/bindings/hwmon/gpio-fan.txt
index 439a7430fc68..2becdcfdc840 100644
--- a/Documentation/devicetree/bindings/hwmon/gpio-fan.txt
+++ b/Documentation/devicetree/bindings/hwmon/gpio-fan.txt
@@ -11,7 +11,7 @@ Optional properties:
must have the RPM values in ascending order.
- alarm-gpios: This pin going active indicates something is wrong with
the fan, and a udev event will be fired.
-- cooling-cells: If used as a cooling device, must be <2>
+- #cooling-cells: If used as a cooling device, must be <2>
Also see: Documentation/devicetree/bindings/thermal/thermal.txt
min and max states are derived from the speed-map of the fan.
diff --git a/Documentation/devicetree/bindings/hwmon/ltc2990.txt b/Documentation/devicetree/bindings/hwmon/ltc2990.txt
new file mode 100644
index 000000000000..f92f54029e84
--- /dev/null
+++ b/Documentation/devicetree/bindings/hwmon/ltc2990.txt
@@ -0,0 +1,36 @@
+ltc2990: Linear Technology LTC2990 power monitor
+
+Required properties:
+- compatible: Must be "lltc,ltc2990"
+- reg: I2C slave address
+- lltc,meas-mode:
+ An array of two integers for configuring the chip measurement mode.
+
+ The first integer defines the bits 2..0 in the control register. In all
+ cases the internal temperature and supply voltage are measured. In
+ addition the following input measurements are enabled per mode:
+
+ 0: V1, V2, TR2
+ 1: V1-V2, TR2
+ 2: V1-V2, V3, V4
+ 3: TR1, V3, V4
+ 4: TR1, V3-V4
+ 5: TR1, TR2
+ 6: V1-V2, V3-V4
+ 7: V1, V2, V3, V4
+
+ The second integer defines the bits 4..3 in the control register. This
+ allows a subset of the measurements to be enabled:
+
+ 0: Internal temperature and supply voltage only
+ 1: TR1, V1 or V1-V2 only per mode
+ 2: TR2, V3 or V3-V4 only per mode
+ 3: All measurements per mode
+
+Example:
+
+ltc2990@4c {
+ compatible = "lltc,ltc2990";
+ reg = <0x4c>;
+ lltc,meas-mode = <7 3>; /* V1, V2, V3, V4 */
+};
diff --git a/Documentation/devicetree/bindings/input/atmel,maxtouch.txt b/Documentation/devicetree/bindings/input/atmel,maxtouch.txt
index 23e3abc3fdef..c88919480d37 100644
--- a/Documentation/devicetree/bindings/input/atmel,maxtouch.txt
+++ b/Documentation/devicetree/bindings/input/atmel,maxtouch.txt
@@ -4,6 +4,13 @@ Required properties:
- compatible:
atmel,maxtouch
+ The following compatibles have been used in various products but are
+ deprecated:
+ atmel,qt602240_ts
+ atmel,atmel_mxt_ts
+ atmel,atmel_mxt_tp
+ atmel,mXT224
+
- reg: The I2C address of the device
- interrupts: The sink for the touchpad's IRQ output
diff --git a/Documentation/devicetree/bindings/interrupt-controller/amlogic,meson-gpio-intc.txt b/Documentation/devicetree/bindings/interrupt-controller/amlogic,meson-gpio-intc.txt
index a83f9a5734ca..89674ad8a097 100644
--- a/Documentation/devicetree/bindings/interrupt-controller/amlogic,meson-gpio-intc.txt
+++ b/Documentation/devicetree/bindings/interrupt-controller/amlogic,meson-gpio-intc.txt
@@ -9,11 +9,12 @@ number of interrupt exposed depends on the SoC.
Required properties:
-- compatible : must have "amlogic,meson8-gpio-intc†and either
- “amlogic,meson8-gpio-intc†for meson8 SoCs (S802) or
- “amlogic,meson8b-gpio-intc†for meson8b SoCs (S805) or
- “amlogic,meson-gxbb-gpio-intc†for GXBB SoCs (S905) or
- “amlogic,meson-gxl-gpio-intc†for GXL SoCs (S905X, S912)
+- compatible : must have "amlogic,meson8-gpio-intc" and either
+ "amlogic,meson8-gpio-intc" for meson8 SoCs (S802) or
+ "amlogic,meson8b-gpio-intc" for meson8b SoCs (S805) or
+ "amlogic,meson-gxbb-gpio-intc" for GXBB SoCs (S905) or
+ "amlogic,meson-gxl-gpio-intc" for GXL SoCs (S905X, S912)
+ "amlogic,meson-axg-gpio-intc" for AXG SoCs (A113D, A113X)
- interrupt-parent : a phandle to the GIC the interrupts are routed to.
Usually this is provided at the root level of the device tree as it is
common to most of the SoC.
diff --git a/Documentation/devicetree/bindings/interrupt-controller/arm,gic-v3.txt b/Documentation/devicetree/bindings/interrupt-controller/arm,gic-v3.txt
index 0a57f2f4167d..3ea78c4ef887 100644
--- a/Documentation/devicetree/bindings/interrupt-controller/arm,gic-v3.txt
+++ b/Documentation/devicetree/bindings/interrupt-controller/arm,gic-v3.txt
@@ -57,6 +57,20 @@ Optional
occupied by the redistributors. Required if more than one such
region is present.
+- msi-controller: Boolean property. Identifies the node as an MSI
+ controller. Only present if the Message Based Interrupt
+ functionnality is being exposed by the HW, and the mbi-ranges
+ property present.
+
+- mbi-ranges: A list of pairs <intid span>, where "intid" is the first
+ SPI of a range that can be used an MBI, and "span" the size of that
+ range. Multiple ranges can be provided. Requires "msi-controller" to
+ be set.
+
+- mbi-alias: Address property. Base address of an alias of the GICD
+ region containing only the {SET,CLR}SPI registers to be used if
+ isolation is required, and if supported by the HW.
+
Sub-nodes:
PPI affinity can be expressed as a single "ppi-partitions" node,
@@ -99,6 +113,9 @@ Examples:
<0x0 0x2c020000 0 0x2000>; // GICV
interrupts = <1 9 4>;
+ msi-controller;
+ mbi-ranges = <256 128>;
+
gic-its@2c200000 {
compatible = "arm,gic-v3-its";
msi-controller;
diff --git a/Documentation/devicetree/bindings/interrupt-controller/st,stm32-exti.txt b/Documentation/devicetree/bindings/interrupt-controller/st,stm32-exti.txt
index edf03f09244b..136bd612bd83 100644
--- a/Documentation/devicetree/bindings/interrupt-controller/st,stm32-exti.txt
+++ b/Documentation/devicetree/bindings/interrupt-controller/st,stm32-exti.txt
@@ -5,11 +5,14 @@ Required properties:
- compatible: Should be:
"st,stm32-exti"
"st,stm32h7-exti"
+ "st,stm32mp1-exti"
- reg: Specifies base physical address and size of the registers
- interrupt-controller: Indentifies the node as an interrupt controller
- #interrupt-cells: Specifies the number of cells to encode an interrupt
specifier, shall be 2
- interrupts: interrupts references to primary interrupt controller
+ (only needed for exti controller with multiple exti under
+ same parent interrupt: st,stm32-exti and st,stm32h7-exti")
Example:
diff --git a/Documentation/devicetree/bindings/net/can/rcar_canfd.txt b/Documentation/devicetree/bindings/net/can/rcar_canfd.txt
index 93c3a6ae32f9..ac71daa46195 100644
--- a/Documentation/devicetree/bindings/net/can/rcar_canfd.txt
+++ b/Documentation/devicetree/bindings/net/can/rcar_canfd.txt
@@ -5,7 +5,9 @@ Required properties:
- compatible: Must contain one or more of the following:
- "renesas,rcar-gen3-canfd" for R-Car Gen3 compatible controller.
- "renesas,r8a7795-canfd" for R8A7795 (R-Car H3) compatible controller.
- - "renesas,r8a7796-canfd" for R8A7796 (R-Car M3) compatible controller.
+ - "renesas,r8a7796-canfd" for R8A7796 (R-Car M3-W) compatible controller.
+ - "renesas,r8a77970-canfd" for R8A77970 (R-Car V3M) compatible controller.
+ - "renesas,r8a77980-canfd" for R8A77980 (R-Car V3H) compatible controller.
When compatible with the generic version, nodes must list the
SoC-specific version corresponding to the platform first, followed by the
diff --git a/Documentation/devicetree/bindings/net/dsa/b53.txt b/Documentation/devicetree/bindings/net/dsa/b53.txt
index 8acf51a4dfa8..47a6a7fe0b86 100644
--- a/Documentation/devicetree/bindings/net/dsa/b53.txt
+++ b/Documentation/devicetree/bindings/net/dsa/b53.txt
@@ -10,6 +10,7 @@ Required properties:
"brcm,bcm53128"
"brcm,bcm5365"
"brcm,bcm5395"
+ "brcm,bcm5389"
"brcm,bcm5397"
"brcm,bcm5398"
diff --git a/Documentation/devicetree/bindings/net/marvell-pp2.txt b/Documentation/devicetree/bindings/net/marvell-pp2.txt
index 1814fa13f6ab..fc019df0d863 100644
--- a/Documentation/devicetree/bindings/net/marvell-pp2.txt
+++ b/Documentation/devicetree/bindings/net/marvell-pp2.txt
@@ -21,9 +21,10 @@ Required properties:
- main controller clock (for both armada-375-pp2 and armada-7k-pp2)
- GOP clock (for both armada-375-pp2 and armada-7k-pp2)
- MG clock (only for armada-7k-pp2)
+ - MG Core clock (only for armada-7k-pp2)
- AXI clock (only for armada-7k-pp2)
-- clock-names: names of used clocks, must be "pp_clk", "gop_clk", "mg_clk"
- and "axi_clk" (the 2 latter only for armada-7k-pp2).
+- clock-names: names of used clocks, must be "pp_clk", "gop_clk", "mg_clk",
+ "mg_core_clk" and "axi_clk" (the 3 latter only for armada-7k-pp2).
The ethernet ports are represented by subnodes. At least one port is
required.
@@ -80,8 +81,8 @@ cpm_ethernet: ethernet@0 {
compatible = "marvell,armada-7k-pp22";
reg = <0x0 0x100000>, <0x129000 0xb000>;
clocks = <&cpm_syscon0 1 3>, <&cpm_syscon0 1 9>,
- <&cpm_syscon0 1 5>, <&cpm_syscon0 1 18>;
- clock-names = "pp_clk", "gop_clk", "gp_clk", "axi_clk";
+ <&cpm_syscon0 1 5>, <&cpm_syscon0 1 6>, <&cpm_syscon0 1 18>;
+ clock-names = "pp_clk", "gop_clk", "mg_clk", "mg_core_clk", "axi_clk";
eth0: eth0 {
interrupts = <ICU_GRP_NSR 39 IRQ_TYPE_LEVEL_HIGH>,
diff --git a/Documentation/devicetree/bindings/net/micrel-ksz90x1.txt b/Documentation/devicetree/bindings/net/micrel-ksz90x1.txt
index 42a248301615..e22d8cfea687 100644
--- a/Documentation/devicetree/bindings/net/micrel-ksz90x1.txt
+++ b/Documentation/devicetree/bindings/net/micrel-ksz90x1.txt
@@ -57,6 +57,13 @@ KSZ9031:
- txd2-skew-ps : Skew control of TX data 2 pad
- txd3-skew-ps : Skew control of TX data 3 pad
+ - micrel,force-master:
+ Boolean, force phy to master mode. Only set this option if the phy
+ reference clock provided at CLK125_NDO pin is used as MAC reference
+ clock because the clock jitter in slave mode is to high (errata#2).
+ Attention: The link partner must be configurable as slave otherwise
+ no link will be established.
+
Examples:
mdio {
diff --git a/Documentation/devicetree/bindings/net/renesas,ravb.txt b/Documentation/devicetree/bindings/net/renesas,ravb.txt
index c306f55d335b..890526dbfc26 100644
--- a/Documentation/devicetree/bindings/net/renesas,ravb.txt
+++ b/Documentation/devicetree/bindings/net/renesas,ravb.txt
@@ -18,6 +18,7 @@ Required properties:
- "renesas,etheravb-r8a7795" for the R8A7795 SoC.
- "renesas,etheravb-r8a7796" for the R8A7796 SoC.
+ - "renesas,etheravb-r8a77965" for the R8A77965 SoC.
- "renesas,etheravb-r8a77970" for the R8A77970 SoC.
- "renesas,etheravb-r8a77980" for the R8A77980 SoC.
- "renesas,etheravb-r8a77995" for the R8A77995 SoC.
diff --git a/Documentation/devicetree/bindings/pinctrl/allwinner,sunxi-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/allwinner,sunxi-pinctrl.txt
index ed5eb547afc8..64bc5c2a76da 100644
--- a/Documentation/devicetree/bindings/pinctrl/allwinner,sunxi-pinctrl.txt
+++ b/Documentation/devicetree/bindings/pinctrl/allwinner,sunxi-pinctrl.txt
@@ -56,9 +56,9 @@ pins it needs, and how they should be configured, with regard to muxer
configuration, drive strength and pullups. If one of these options is
not set, its actual value will be unspecified.
-This driver supports the generic pin multiplexing and configuration
-bindings. For details on each properties, you can refer to
-./pinctrl-bindings.txt.
+Allwinner A1X Pin Controller supports the generic pin multiplexing and
+configuration bindings. For details on each properties, you can refer to
+ ./pinctrl-bindings.txt.
Required sub-node properties:
- pins
diff --git a/Documentation/devicetree/bindings/serial/renesas,sci-serial.txt b/Documentation/devicetree/bindings/serial/renesas,sci-serial.txt
index a006ea4d065f..106808b55b6d 100644
--- a/Documentation/devicetree/bindings/serial/renesas,sci-serial.txt
+++ b/Documentation/devicetree/bindings/serial/renesas,sci-serial.txt
@@ -43,6 +43,8 @@ Required properties:
- "renesas,hscif-r8a7795" for R8A7795 (R-Car H3) HSCIF compatible UART.
- "renesas,scif-r8a7796" for R8A7796 (R-Car M3-W) SCIF compatible UART.
- "renesas,hscif-r8a7796" for R8A7796 (R-Car M3-W) HSCIF compatible UART.
+ - "renesas,scif-r8a77965" for R8A77965 (R-Car M3-N) SCIF compatible UART.
+ - "renesas,hscif-r8a77965" for R8A77965 (R-Car M3-N) HSCIF compatible UART.
- "renesas,scif-r8a77970" for R8A77970 (R-Car V3M) SCIF compatible UART.
- "renesas,hscif-r8a77970" for R8A77970 (R-Car V3M) HSCIF compatible UART.
- "renesas,scif-r8a77980" for R8A77980 (R-Car V3H) SCIF compatible UART.
diff --git a/Documentation/devicetree/bindings/vendor-prefixes.txt b/Documentation/devicetree/bindings/vendor-prefixes.txt
index b5f978a4cac6..a38d8bfae19c 100644
--- a/Documentation/devicetree/bindings/vendor-prefixes.txt
+++ b/Documentation/devicetree/bindings/vendor-prefixes.txt
@@ -182,6 +182,7 @@ karo Ka-Ro electronics GmbH
keithkoep Keith & Koep GmbH
keymile Keymile GmbH
khadas Khadas
+kiebackpeter Kieback & Peter GmbH
kinetic Kinetic Technologies
kingnovel Kingnovel Technology Co., Ltd.
kosagi Sutajio Ko-Usagi PTE Ltd.
diff --git a/Documentation/devicetree/overlay-notes.txt b/Documentation/devicetree/overlay-notes.txt
index a4feb6dde8cd..725fb8d255c1 100644
--- a/Documentation/devicetree/overlay-notes.txt
+++ b/Documentation/devicetree/overlay-notes.txt
@@ -98,6 +98,14 @@ Finally, if you need to remove all overlays in one-go, just call
of_overlay_remove_all() which will remove every single one in the correct
order.
+In addition, there is the option to register notifiers that get called on
+overlay operations. See of_overlay_notifier_register/unregister and
+enum of_overlay_notify_action for details.
+
+Note that a notifier callback is not supposed to store pointers to a device
+tree node or its content beyond OF_OVERLAY_POST_REMOVE corresponding to the
+respective node it received.
+
Overlay DTS Format
------------------
diff --git a/Documentation/doc-guide/parse-headers.rst b/Documentation/doc-guide/parse-headers.rst
index 96a0423d5dba..82a3e43b6864 100644
--- a/Documentation/doc-guide/parse-headers.rst
+++ b/Documentation/doc-guide/parse-headers.rst
@@ -177,14 +177,14 @@ BUGS
****
-Report bugs to Mauro Carvalho Chehab <mchehab@s-opensource.com>
+Report bugs to Mauro Carvalho Chehab <mchehab@kernel.org>
COPYRIGHT
*********
-Copyright (c) 2016 by Mauro Carvalho Chehab <mchehab@s-opensource.com>.
+Copyright (c) 2016 by Mauro Carvalho Chehab <mchehab+samsung@kernel.org>.
License GPLv2: GNU GPL version 2 <http://gnu.org/licenses/gpl.html>.
diff --git a/Documentation/clk.txt b/Documentation/driver-api/clk.rst
index 511628bb3d3a..511628bb3d3a 100644
--- a/Documentation/clk.txt
+++ b/Documentation/driver-api/clk.rst
diff --git a/Documentation/driver-api/device_connection.rst b/Documentation/driver-api/device_connection.rst
index affbc5566ab0..ba364224c349 100644
--- a/Documentation/driver-api/device_connection.rst
+++ b/Documentation/driver-api/device_connection.rst
@@ -40,4 +40,4 @@ API
---
.. kernel-doc:: drivers/base/devcon.c
- : functions: device_connection_find_match device_connection_find device_connection_add device_connection_remove
+ :functions: device_connection_find_match device_connection_find device_connection_add device_connection_remove
diff --git a/Documentation/driver-api/gpio/driver.rst b/Documentation/driver-api/gpio/driver.rst
index 505ee906d7d9..cbe0242842d1 100644
--- a/Documentation/driver-api/gpio/driver.rst
+++ b/Documentation/driver-api/gpio/driver.rst
@@ -44,7 +44,7 @@ common to each controller of that type:
- methods to establish GPIO line direction
- methods used to access GPIO line values
- - method to set electrical configuration to a a given GPIO line
+ - method to set electrical configuration for a given GPIO line
- method to return the IRQ number associated to a given GPIO line
- flag saying whether calls to its methods may sleep
- optional line names array to identify lines
@@ -143,7 +143,7 @@ resistor will make the line tend to high level unless one of the transistors on
the rail actively pulls it down.
The level on the line will go as high as the VDD on the pull-up resistor, which
-may be higher than the level supported by the transistor, achieveing a
+may be higher than the level supported by the transistor, achieving a
level-shift to the higher VDD.
Integrated electronics often have an output driver stage in the form of a CMOS
@@ -382,7 +382,7 @@ Real-Time compliance for GPIO IRQ chips
Any provider of irqchips needs to be carefully tailored to support Real Time
preemption. It is desirable that all irqchips in the GPIO subsystem keep this
-in mind and does the proper testing to assure they are real time-enabled.
+in mind and do the proper testing to assure they are real time-enabled.
So, pay attention on above " RT_FULL:" notes, please.
The following is a checklist to follow when preparing a driver for real
time-compliance:
diff --git a/Documentation/driver-api/index.rst b/Documentation/driver-api/index.rst
index 6d8352c0f354..5d04296f5ce0 100644
--- a/Documentation/driver-api/index.rst
+++ b/Documentation/driver-api/index.rst
@@ -17,7 +17,9 @@ available subsections can be seen below.
basics
infrastructure
pm/index
+ clk
device-io
+ device_connection
dma-buf
device_link
message-based
diff --git a/Documentation/driver-api/uio-howto.rst b/Documentation/driver-api/uio-howto.rst
index 92056c20e070..fb2eb73be4a3 100644
--- a/Documentation/driver-api/uio-howto.rst
+++ b/Documentation/driver-api/uio-howto.rst
@@ -711,7 +711,8 @@ The vmbus device regions are mapped into uio device resources:
If a subchannel is created by a request to host, then the uio_hv_generic
device driver will create a sysfs binary file for the per-channel ring buffer.
-For example:
+For example::
+
/sys/bus/vmbus/devices/3811fe4d-0fa0-4b62-981a-74fc1084c757/channels/21/ring
Further information
diff --git a/Documentation/features/lib/strncasecmp/arch-support.txt b/Documentation/features/core/cBPF-JIT/arch-support.txt
index 4f3a6a0e4e68..90459cdde314 100644
--- a/Documentation/features/lib/strncasecmp/arch-support.txt
+++ b/Documentation/features/core/cBPF-JIT/arch-support.txt
@@ -1,7 +1,7 @@
#
-# Feature name: strncasecmp
-# Kconfig: __HAVE_ARCH_STRNCASECMP
-# description: arch provides an optimized strncasecmp() function
+# Feature name: cBPF-JIT
+# Kconfig: HAVE_CBPF_JIT
+# description: arch supports cBPF JIT optimizations
#
-----------------------
| arch |status|
@@ -16,14 +16,16 @@
| ia64: | TODO |
| m68k: | TODO |
| microblaze: | TODO |
- | mips: | TODO |
+ | mips: | ok |
+ | nds32: | TODO |
| nios2: | TODO |
| openrisc: | TODO |
| parisc: | TODO |
- | powerpc: | TODO |
+ | powerpc: | ok |
+ | riscv: | TODO |
| s390: | TODO |
| sh: | TODO |
- | sparc: | TODO |
+ | sparc: | ok |
| um: | TODO |
| unicore32: | TODO |
| x86: | TODO |
diff --git a/Documentation/features/core/BPF-JIT/arch-support.txt b/Documentation/features/core/eBPF-JIT/arch-support.txt
index 0b96b4e1e7d4..c90a0382fe66 100644
--- a/Documentation/features/core/BPF-JIT/arch-support.txt
+++ b/Documentation/features/core/eBPF-JIT/arch-support.txt
@@ -1,7 +1,7 @@
#
-# Feature name: BPF-JIT
-# Kconfig: HAVE_BPF_JIT
-# description: arch supports BPF JIT optimizations
+# Feature name: eBPF-JIT
+# Kconfig: HAVE_EBPF_JIT
+# description: arch supports eBPF JIT optimizations
#
-----------------------
| arch |status|
@@ -17,10 +17,12 @@
| m68k: | TODO |
| microblaze: | TODO |
| mips: | ok |
+ | nds32: | TODO |
| nios2: | TODO |
| openrisc: | TODO |
| parisc: | TODO |
| powerpc: | ok |
+ | riscv: | TODO |
| s390: | ok |
| sh: | TODO |
| sparc: | ok |
diff --git a/Documentation/features/core/generic-idle-thread/arch-support.txt b/Documentation/features/core/generic-idle-thread/arch-support.txt
index 372a2b18a617..0ef6acdb991c 100644
--- a/Documentation/features/core/generic-idle-thread/arch-support.txt
+++ b/Documentation/features/core/generic-idle-thread/arch-support.txt
@@ -17,10 +17,12 @@
| m68k: | TODO |
| microblaze: | TODO |
| mips: | ok |
+ | nds32: | TODO |
| nios2: | TODO |
- | openrisc: | TODO |
+ | openrisc: | ok |
| parisc: | ok |
| powerpc: | ok |
+ | riscv: | ok |
| s390: | ok |
| sh: | ok |
| sparc: | ok |
diff --git a/Documentation/features/core/jump-labels/arch-support.txt b/Documentation/features/core/jump-labels/arch-support.txt
index ad97217b003b..27cbd63abfd2 100644
--- a/Documentation/features/core/jump-labels/arch-support.txt
+++ b/Documentation/features/core/jump-labels/arch-support.txt
@@ -17,10 +17,12 @@
| m68k: | TODO |
| microblaze: | TODO |
| mips: | ok |
+ | nds32: | TODO |
| nios2: | TODO |
| openrisc: | TODO |
| parisc: | TODO |
| powerpc: | ok |
+ | riscv: | TODO |
| s390: | ok |
| sh: | TODO |
| sparc: | ok |
diff --git a/Documentation/features/core/tracehook/arch-support.txt b/Documentation/features/core/tracehook/arch-support.txt
index 36ee7bef5d18..f44c274e40ed 100644
--- a/Documentation/features/core/tracehook/arch-support.txt
+++ b/Documentation/features/core/tracehook/arch-support.txt
@@ -17,10 +17,12 @@
| m68k: | TODO |
| microblaze: | TODO |
| mips: | ok |
+ | nds32: | ok |
| nios2: | ok |
| openrisc: | ok |
| parisc: | ok |
| powerpc: | ok |
+ | riscv: | ok |
| s390: | ok |
| sh: | ok |
| sparc: | ok |
diff --git a/Documentation/features/debug/KASAN/arch-support.txt b/Documentation/features/debug/KASAN/arch-support.txt
index f5c99fa576d3..282ecc8ea1da 100644
--- a/Documentation/features/debug/KASAN/arch-support.txt
+++ b/Documentation/features/debug/KASAN/arch-support.txt
@@ -17,15 +17,17 @@
| m68k: | TODO |
| microblaze: | TODO |
| mips: | TODO |
+ | nds32: | TODO |
| nios2: | TODO |
| openrisc: | TODO |
| parisc: | TODO |
| powerpc: | TODO |
+ | riscv: | TODO |
| s390: | TODO |
| sh: | TODO |
| sparc: | TODO |
| um: | TODO |
| unicore32: | TODO |
- | x86: | ok | 64-bit only
+ | x86: | ok |
| xtensa: | ok |
-----------------------
diff --git a/Documentation/features/debug/gcov-profile-all/arch-support.txt b/Documentation/features/debug/gcov-profile-all/arch-support.txt
index 5170a9934843..01b2b3004e0a 100644
--- a/Documentation/features/debug/gcov-profile-all/arch-support.txt
+++ b/Documentation/features/debug/gcov-profile-all/arch-support.txt
@@ -17,10 +17,12 @@
| m68k: | TODO |
| microblaze: | ok |
| mips: | TODO |
+ | nds32: | TODO |
| nios2: | TODO |
| openrisc: | TODO |
| parisc: | TODO |
| powerpc: | ok |
+ | riscv: | TODO |
| s390: | ok |
| sh: | ok |
| sparc: | TODO |
diff --git a/Documentation/features/debug/kgdb/arch-support.txt b/Documentation/features/debug/kgdb/arch-support.txt
index 13b6e994ae1f..3b4dff22329f 100644
--- a/Documentation/features/debug/kgdb/arch-support.txt
+++ b/Documentation/features/debug/kgdb/arch-support.txt
@@ -11,16 +11,18 @@
| arm: | ok |
| arm64: | ok |
| c6x: | TODO |
- | h8300: | TODO |
+ | h8300: | ok |
| hexagon: | ok |
| ia64: | TODO |
| m68k: | TODO |
| microblaze: | ok |
| mips: | ok |
+ | nds32: | TODO |
| nios2: | ok |
| openrisc: | TODO |
| parisc: | TODO |
| powerpc: | ok |
+ | riscv: | TODO |
| s390: | TODO |
| sh: | ok |
| sparc: | ok |
diff --git a/Documentation/features/debug/kprobes-on-ftrace/arch-support.txt b/Documentation/features/debug/kprobes-on-ftrace/arch-support.txt
index 419bb38820e7..7e963d0ae646 100644
--- a/Documentation/features/debug/kprobes-on-ftrace/arch-support.txt
+++ b/Documentation/features/debug/kprobes-on-ftrace/arch-support.txt
@@ -17,10 +17,12 @@
| m68k: | TODO |
| microblaze: | TODO |
| mips: | TODO |
+ | nds32: | TODO |
| nios2: | TODO |
| openrisc: | TODO |
| parisc: | TODO |
| powerpc: | ok |
+ | riscv: | TODO |
| s390: | TODO |
| sh: | TODO |
| sparc: | TODO |
diff --git a/Documentation/features/debug/kprobes/arch-support.txt b/Documentation/features/debug/kprobes/arch-support.txt
index 52b3ace0a030..4ada027faf16 100644
--- a/Documentation/features/debug/kprobes/arch-support.txt
+++ b/Documentation/features/debug/kprobes/arch-support.txt
@@ -9,7 +9,7 @@
| alpha: | TODO |
| arc: | ok |
| arm: | ok |
- | arm64: | TODO |
+ | arm64: | ok |
| c6x: | TODO |
| h8300: | TODO |
| hexagon: | TODO |
@@ -17,10 +17,12 @@
| m68k: | TODO |
| microblaze: | TODO |
| mips: | ok |
+ | nds32: | TODO |
| nios2: | TODO |
| openrisc: | TODO |
| parisc: | TODO |
| powerpc: | ok |
+ | riscv: | ok |
| s390: | ok |
| sh: | ok |
| sparc: | ok |
diff --git a/Documentation/features/debug/kretprobes/arch-support.txt b/Documentation/features/debug/kretprobes/arch-support.txt
index 180d24419518..044e13fcca5d 100644
--- a/Documentation/features/debug/kretprobes/arch-support.txt
+++ b/Documentation/features/debug/kretprobes/arch-support.txt
@@ -9,7 +9,7 @@
| alpha: | TODO |
| arc: | ok |
| arm: | ok |
- | arm64: | TODO |
+ | arm64: | ok |
| c6x: | TODO |
| h8300: | TODO |
| hexagon: | TODO |
@@ -17,10 +17,12 @@
| m68k: | TODO |
| microblaze: | TODO |
| mips: | ok |
+ | nds32: | TODO |
| nios2: | TODO |
| openrisc: | TODO |
| parisc: | TODO |
| powerpc: | ok |
+ | riscv: | TODO |
| s390: | ok |
| sh: | ok |
| sparc: | ok |
diff --git a/Documentation/features/debug/optprobes/arch-support.txt b/Documentation/features/debug/optprobes/arch-support.txt
index 0a1241f45e41..dce7669c918f 100644
--- a/Documentation/features/debug/optprobes/arch-support.txt
+++ b/Documentation/features/debug/optprobes/arch-support.txt
@@ -17,10 +17,12 @@
| m68k: | TODO |
| microblaze: | TODO |
| mips: | TODO |
+ | nds32: | TODO |
| nios2: | TODO |
| openrisc: | TODO |
| parisc: | TODO |
- | powerpc: | TODO |
+ | powerpc: | ok |
+ | riscv: | TODO |
| s390: | TODO |
| sh: | TODO |
| sparc: | TODO |
diff --git a/Documentation/features/debug/stackprotector/arch-support.txt b/Documentation/features/debug/stackprotector/arch-support.txt
index 570019572383..74b89a9c8b3a 100644
--- a/Documentation/features/debug/stackprotector/arch-support.txt
+++ b/Documentation/features/debug/stackprotector/arch-support.txt
@@ -17,10 +17,12 @@
| m68k: | TODO |
| microblaze: | TODO |
| mips: | ok |
+ | nds32: | TODO |
| nios2: | TODO |
| openrisc: | TODO |
| parisc: | TODO |
| powerpc: | TODO |
+ | riscv: | TODO |
| s390: | TODO |
| sh: | ok |
| sparc: | TODO |
diff --git a/Documentation/features/debug/uprobes/arch-support.txt b/Documentation/features/debug/uprobes/arch-support.txt
index 0b8d922eb799..1a3f9d3229bf 100644
--- a/Documentation/features/debug/uprobes/arch-support.txt
+++ b/Documentation/features/debug/uprobes/arch-support.txt
@@ -9,7 +9,7 @@
| alpha: | TODO |
| arc: | TODO |
| arm: | ok |
- | arm64: | TODO |
+ | arm64: | ok |
| c6x: | TODO |
| h8300: | TODO |
| hexagon: | TODO |
@@ -17,13 +17,15 @@
| m68k: | TODO |
| microblaze: | TODO |
| mips: | ok |
+ | nds32: | TODO |
| nios2: | TODO |
| openrisc: | TODO |
| parisc: | TODO |
| powerpc: | ok |
+ | riscv: | TODO |
| s390: | ok |
| sh: | TODO |
- | sparc: | TODO |
+ | sparc: | ok |
| um: | TODO |
| unicore32: | TODO |
| x86: | ok |
diff --git a/Documentation/features/debug/user-ret-profiler/arch-support.txt b/Documentation/features/debug/user-ret-profiler/arch-support.txt
index 13852ae62e9e..1d78d1069a5f 100644
--- a/Documentation/features/debug/user-ret-profiler/arch-support.txt
+++ b/Documentation/features/debug/user-ret-profiler/arch-support.txt
@@ -17,10 +17,12 @@
| m68k: | TODO |
| microblaze: | TODO |
| mips: | TODO |
+ | nds32: | TODO |
| nios2: | TODO |
| openrisc: | TODO |
| parisc: | TODO |
| powerpc: | TODO |
+ | riscv: | TODO |
| s390: | TODO |
| sh: | TODO |
| sparc: | TODO |
diff --git a/Documentation/features/io/dma-api-debug/arch-support.txt b/Documentation/features/io/dma-api-debug/arch-support.txt
deleted file mode 100644
index e438ed675623..000000000000
--- a/Documentation/features/io/dma-api-debug/arch-support.txt
+++ /dev/null
@@ -1,31 +0,0 @@
-#
-# Feature name: dma-api-debug
-# Kconfig: HAVE_DMA_API_DEBUG
-# description: arch supports DMA debug facilities
-#
- -----------------------
- | arch |status|
- -----------------------
- | alpha: | TODO |
- | arc: | TODO |
- | arm: | ok |
- | arm64: | ok |
- | c6x: | ok |
- | h8300: | TODO |
- | hexagon: | TODO |
- | ia64: | ok |
- | m68k: | TODO |
- | microblaze: | ok |
- | mips: | ok |
- | nios2: | TODO |
- | openrisc: | TODO |
- | parisc: | TODO |
- | powerpc: | ok |
- | s390: | ok |
- | sh: | ok |
- | sparc: | ok |
- | um: | TODO |
- | unicore32: | TODO |
- | x86: | ok |
- | xtensa: | ok |
- -----------------------
diff --git a/Documentation/features/io/dma-contiguous/arch-support.txt b/Documentation/features/io/dma-contiguous/arch-support.txt
index 47f64a433df0..30c072d2b67c 100644
--- a/Documentation/features/io/dma-contiguous/arch-support.txt
+++ b/Documentation/features/io/dma-contiguous/arch-support.txt
@@ -17,11 +17,13 @@
| m68k: | TODO |
| microblaze: | TODO |
| mips: | ok |
+ | nds32: | TODO |
| nios2: | TODO |
| openrisc: | TODO |
| parisc: | TODO |
| powerpc: | TODO |
- | s390: | TODO |
+ | riscv: | ok |
+ | s390: | ok |
| sh: | TODO |
| sparc: | TODO |
| um: | TODO |
diff --git a/Documentation/features/io/sg-chain/arch-support.txt b/Documentation/features/io/sg-chain/arch-support.txt
index 07f357fadbff..6554f0372c3f 100644
--- a/Documentation/features/io/sg-chain/arch-support.txt
+++ b/Documentation/features/io/sg-chain/arch-support.txt
@@ -17,10 +17,12 @@
| m68k: | TODO |
| microblaze: | TODO |
| mips: | TODO |
+ | nds32: | TODO |
| nios2: | TODO |
| openrisc: | TODO |
| parisc: | TODO |
| powerpc: | ok |
+ | riscv: | TODO |
| s390: | ok |
| sh: | TODO |
| sparc: | ok |
diff --git a/Documentation/features/locking/cmpxchg-local/arch-support.txt b/Documentation/features/locking/cmpxchg-local/arch-support.txt
index 482a0b09d1f8..51704a2dc8d1 100644
--- a/Documentation/features/locking/cmpxchg-local/arch-support.txt
+++ b/Documentation/features/locking/cmpxchg-local/arch-support.txt
@@ -9,7 +9,7 @@
| alpha: | TODO |
| arc: | TODO |
| arm: | TODO |
- | arm64: | TODO |
+ | arm64: | ok |
| c6x: | TODO |
| h8300: | TODO |
| hexagon: | TODO |
@@ -17,10 +17,12 @@
| m68k: | TODO |
| microblaze: | TODO |
| mips: | TODO |
+ | nds32: | TODO |
| nios2: | TODO |
| openrisc: | TODO |
| parisc: | TODO |
| powerpc: | TODO |
+ | riscv: | TODO |
| s390: | ok |
| sh: | TODO |
| sparc: | TODO |
diff --git a/Documentation/features/locking/lockdep/arch-support.txt b/Documentation/features/locking/lockdep/arch-support.txt
index bb35c5ba6286..bd39c5edd460 100644
--- a/Documentation/features/locking/lockdep/arch-support.txt
+++ b/Documentation/features/locking/lockdep/arch-support.txt
@@ -17,10 +17,12 @@
| m68k: | TODO |
| microblaze: | ok |
| mips: | ok |
+ | nds32: | ok |
| nios2: | TODO |
- | openrisc: | TODO |
+ | openrisc: | ok |
| parisc: | TODO |
| powerpc: | ok |
+ | riscv: | TODO |
| s390: | ok |
| sh: | ok |
| sparc: | ok |
diff --git a/Documentation/features/locking/queued-rwlocks/arch-support.txt b/Documentation/features/locking/queued-rwlocks/arch-support.txt
index 627e9a6b2db9..da7aff3bee0b 100644
--- a/Documentation/features/locking/queued-rwlocks/arch-support.txt
+++ b/Documentation/features/locking/queued-rwlocks/arch-support.txt
@@ -9,21 +9,23 @@
| alpha: | TODO |
| arc: | TODO |
| arm: | TODO |
- | arm64: | TODO |
+ | arm64: | ok |
| c6x: | TODO |
| h8300: | TODO |
| hexagon: | TODO |
| ia64: | TODO |
| m68k: | TODO |
| microblaze: | TODO |
- | mips: | TODO |
+ | mips: | ok |
+ | nds32: | TODO |
| nios2: | TODO |
- | openrisc: | TODO |
+ | openrisc: | ok |
| parisc: | TODO |
| powerpc: | TODO |
+ | riscv: | TODO |
| s390: | TODO |
| sh: | TODO |
- | sparc: | TODO |
+ | sparc: | ok |
| um: | TODO |
| unicore32: | TODO |
| x86: | ok |
diff --git a/Documentation/features/locking/queued-spinlocks/arch-support.txt b/Documentation/features/locking/queued-spinlocks/arch-support.txt
index 9edda216cdfb..478e9101322c 100644
--- a/Documentation/features/locking/queued-spinlocks/arch-support.txt
+++ b/Documentation/features/locking/queued-spinlocks/arch-support.txt
@@ -16,14 +16,16 @@
| ia64: | TODO |
| m68k: | TODO |
| microblaze: | TODO |
- | mips: | TODO |
+ | mips: | ok |
+ | nds32: | TODO |
| nios2: | TODO |
- | openrisc: | TODO |
+ | openrisc: | ok |
| parisc: | TODO |
| powerpc: | TODO |
+ | riscv: | TODO |
| s390: | TODO |
| sh: | TODO |
- | sparc: | TODO |
+ | sparc: | ok |
| um: | TODO |
| unicore32: | TODO |
| x86: | ok |
diff --git a/Documentation/features/locking/rwsem-optimized/arch-support.txt b/Documentation/features/locking/rwsem-optimized/arch-support.txt
index 8d9afb10b16e..e54b1f1a8091 100644
--- a/Documentation/features/locking/rwsem-optimized/arch-support.txt
+++ b/Documentation/features/locking/rwsem-optimized/arch-support.txt
@@ -1,6 +1,6 @@
#
# Feature name: rwsem-optimized
-# Kconfig: Optimized asm/rwsem.h
+# Kconfig: !RWSEM_GENERIC_SPINLOCK
# description: arch provides optimized rwsem APIs
#
-----------------------
@@ -8,8 +8,8 @@
-----------------------
| alpha: | ok |
| arc: | TODO |
- | arm: | TODO |
- | arm64: | TODO |
+ | arm: | ok |
+ | arm64: | ok |
| c6x: | TODO |
| h8300: | TODO |
| hexagon: | TODO |
@@ -17,14 +17,16 @@
| m68k: | TODO |
| microblaze: | TODO |
| mips: | TODO |
+ | nds32: | TODO |
| nios2: | TODO |
| openrisc: | TODO |
| parisc: | TODO |
| powerpc: | TODO |
+ | riscv: | TODO |
| s390: | ok |
| sh: | ok |
| sparc: | ok |
- | um: | TODO |
+ | um: | ok |
| unicore32: | TODO |
| x86: | ok |
| xtensa: | ok |
diff --git a/Documentation/features/perf/kprobes-event/arch-support.txt b/Documentation/features/perf/kprobes-event/arch-support.txt
index d01239ee34b3..7331402d1887 100644
--- a/Documentation/features/perf/kprobes-event/arch-support.txt
+++ b/Documentation/features/perf/kprobes-event/arch-support.txt
@@ -9,7 +9,7 @@
| alpha: | TODO |
| arc: | TODO |
| arm: | ok |
- | arm64: | TODO |
+ | arm64: | ok |
| c6x: | TODO |
| h8300: | TODO |
| hexagon: | ok |
@@ -17,13 +17,15 @@
| m68k: | TODO |
| microblaze: | TODO |
| mips: | ok |
+ | nds32: | ok |
| nios2: | TODO |
| openrisc: | TODO |
| parisc: | TODO |
| powerpc: | ok |
+ | riscv: | TODO |
| s390: | ok |
| sh: | ok |
- | sparc: | TODO |
+ | sparc: | ok |
| um: | TODO |
| unicore32: | TODO |
| x86: | ok |
diff --git a/Documentation/features/perf/perf-regs/arch-support.txt b/Documentation/features/perf/perf-regs/arch-support.txt
index 458faba5311a..53feeee6cdad 100644
--- a/Documentation/features/perf/perf-regs/arch-support.txt
+++ b/Documentation/features/perf/perf-regs/arch-support.txt
@@ -17,11 +17,13 @@
| m68k: | TODO |
| microblaze: | TODO |
| mips: | TODO |
+ | nds32: | TODO |
| nios2: | TODO |
| openrisc: | TODO |
| parisc: | TODO |
| powerpc: | ok |
- | s390: | TODO |
+ | riscv: | TODO |
+ | s390: | ok |
| sh: | TODO |
| sparc: | TODO |
| um: | TODO |
diff --git a/Documentation/features/perf/perf-stackdump/arch-support.txt b/Documentation/features/perf/perf-stackdump/arch-support.txt
index 545d01c69c88..16164348e0ea 100644
--- a/Documentation/features/perf/perf-stackdump/arch-support.txt
+++ b/Documentation/features/perf/perf-stackdump/arch-support.txt
@@ -17,11 +17,13 @@
| m68k: | TODO |
| microblaze: | TODO |
| mips: | TODO |
+ | nds32: | TODO |
| nios2: | TODO |
| openrisc: | TODO |
| parisc: | TODO |
| powerpc: | ok |
- | s390: | TODO |
+ | riscv: | TODO |
+ | s390: | ok |
| sh: | TODO |
| sparc: | TODO |
| um: | TODO |
diff --git a/Documentation/features/sched/membarrier-sync-core/arch-support.txt b/Documentation/features/sched/membarrier-sync-core/arch-support.txt
index 85a6c9d4571c..dbdf62907703 100644
--- a/Documentation/features/sched/membarrier-sync-core/arch-support.txt
+++ b/Documentation/features/sched/membarrier-sync-core/arch-support.txt
@@ -40,10 +40,12 @@
| m68k: | TODO |
| microblaze: | TODO |
| mips: | TODO |
+ | nds32: | TODO |
| nios2: | TODO |
| openrisc: | TODO |
| parisc: | TODO |
| powerpc: | TODO |
+ | riscv: | TODO |
| s390: | TODO |
| sh: | TODO |
| sparc: | TODO |
diff --git a/Documentation/features/sched/numa-balancing/arch-support.txt b/Documentation/features/sched/numa-balancing/arch-support.txt
index 347508863872..c68bb2c2cb62 100644
--- a/Documentation/features/sched/numa-balancing/arch-support.txt
+++ b/Documentation/features/sched/numa-balancing/arch-support.txt
@@ -9,7 +9,7 @@
| alpha: | TODO |
| arc: | .. |
| arm: | .. |
- | arm64: | .. |
+ | arm64: | ok |
| c6x: | .. |
| h8300: | .. |
| hexagon: | .. |
@@ -17,11 +17,13 @@
| m68k: | .. |
| microblaze: | .. |
| mips: | TODO |
+ | nds32: | TODO |
| nios2: | .. |
| openrisc: | .. |
| parisc: | .. |
| powerpc: | ok |
- | s390: | .. |
+ | riscv: | TODO |
+ | s390: | ok |
| sh: | .. |
| sparc: | TODO |
| um: | .. |
diff --git a/Documentation/features/scripts/features-refresh.sh b/Documentation/features/scripts/features-refresh.sh
new file mode 100755
index 000000000000..9e72d38a0720
--- /dev/null
+++ b/Documentation/features/scripts/features-refresh.sh
@@ -0,0 +1,98 @@
+#
+# Small script that refreshes the kernel feature support status in place.
+#
+
+for F_FILE in Documentation/features/*/*/arch-support.txt; do
+ F=$(grep "^# Kconfig:" "$F_FILE" | cut -c26-)
+
+ #
+ # Each feature F is identified by a pair (O, K), where 'O' can
+ # be either the empty string (for 'nop') or "not" (the logical
+ # negation operator '!'); other operators are not supported.
+ #
+ O=""
+ K=$F
+ if [[ "$F" == !* ]]; then
+ O="not"
+ K=$(echo $F | sed -e 's/^!//g')
+ fi
+
+ #
+ # F := (O, K) is 'valid' iff there is a Kconfig file (for some
+ # arch) which contains K.
+ #
+ # Notice that this definition entails an 'asymmetry' between
+ # the case 'O = ""' and the case 'O = "not"'. E.g., F may be
+ # _invalid_ if:
+ #
+ # [case 'O = ""']
+ # 1) no arch provides support for F,
+ # 2) K does not exist (e.g., it was renamed/mis-typed);
+ #
+ # [case 'O = "not"']
+ # 3) all archs provide support for F,
+ # 4) as in (2).
+ #
+ # The rationale for adopting this definition (and, thus, for
+ # keeping the asymmetry) is:
+ #
+ # We want to be able to 'detect' (2) (or (4)).
+ #
+ # (1) and (3) may further warn the developers about the fact
+ # that K can be removed.
+ #
+ F_VALID="false"
+ for ARCH_DIR in arch/*/; do
+ K_FILES=$(find $ARCH_DIR -name "Kconfig*")
+ K_GREP=$(grep "$K" $K_FILES)
+ if [ ! -z "$K_GREP" ]; then
+ F_VALID="true"
+ break
+ fi
+ done
+ if [ "$F_VALID" = "false" ]; then
+ printf "WARNING: '%s' is not a valid Kconfig\n" "$F"
+ fi
+
+ T_FILE="$F_FILE.tmp"
+ grep "^#" $F_FILE > $T_FILE
+ echo " -----------------------" >> $T_FILE
+ echo " | arch |status|" >> $T_FILE
+ echo " -----------------------" >> $T_FILE
+ for ARCH_DIR in arch/*/; do
+ ARCH=$(echo $ARCH_DIR | sed -e 's/arch//g' | sed -e 's/\///g')
+ K_FILES=$(find $ARCH_DIR -name "Kconfig*")
+ K_GREP=$(grep "$K" $K_FILES)
+ #
+ # Arch support status values for (O, K) are updated according
+ # to the following rules.
+ #
+ # - ("", K) is 'supported by a given arch', if there is a
+ # Kconfig file for that arch which contains K;
+ #
+ # - ("not", K) is 'supported by a given arch', if there is
+ # no Kconfig file for that arch which contains K;
+ #
+ # - otherwise: preserve the previous status value (if any),
+ # default to 'not yet supported'.
+ #
+ # Notice that, according these rules, invalid features may be
+ # updated/modified.
+ #
+ if [ "$O" = "" ] && [ ! -z "$K_GREP" ]; then
+ printf " |%12s: | ok |\n" "$ARCH" >> $T_FILE
+ elif [ "$O" = "not" ] && [ -z "$K_GREP" ]; then
+ printf " |%12s: | ok |\n" "$ARCH" >> $T_FILE
+ else
+ S=$(grep -v "^#" "$F_FILE" | grep " $ARCH:")
+ if [ ! -z "$S" ]; then
+ echo "$S" >> $T_FILE
+ else
+ printf " |%12s: | TODO |\n" "$ARCH" \
+ >> $T_FILE
+ fi
+ fi
+ done
+ echo " -----------------------" >> $T_FILE
+ mv $T_FILE $F_FILE
+done
diff --git a/Documentation/features/seccomp/seccomp-filter/arch-support.txt b/Documentation/features/seccomp/seccomp-filter/arch-support.txt
index e4fad58a05e5..d4271b493b41 100644
--- a/Documentation/features/seccomp/seccomp-filter/arch-support.txt
+++ b/Documentation/features/seccomp/seccomp-filter/arch-support.txt
@@ -17,10 +17,12 @@
| m68k: | TODO |
| microblaze: | TODO |
| mips: | ok |
+ | nds32: | TODO |
| nios2: | TODO |
| openrisc: | TODO |
- | parisc: | TODO |
- | powerpc: | TODO |
+ | parisc: | ok |
+ | powerpc: | ok |
+ | riscv: | TODO |
| s390: | ok |
| sh: | TODO |
| sparc: | TODO |
diff --git a/Documentation/features/time/arch-tick-broadcast/arch-support.txt b/Documentation/features/time/arch-tick-broadcast/arch-support.txt
index 8052904b25fc..83d9e68462bb 100644
--- a/Documentation/features/time/arch-tick-broadcast/arch-support.txt
+++ b/Documentation/features/time/arch-tick-broadcast/arch-support.txt
@@ -17,12 +17,14 @@
| m68k: | TODO |
| microblaze: | TODO |
| mips: | ok |
+ | nds32: | TODO |
| nios2: | TODO |
| openrisc: | TODO |
| parisc: | TODO |
| powerpc: | ok |
+ | riscv: | TODO |
| s390: | TODO |
- | sh: | TODO |
+ | sh: | ok |
| sparc: | TODO |
| um: | TODO |
| unicore32: | TODO |
diff --git a/Documentation/features/time/clockevents/arch-support.txt b/Documentation/features/time/clockevents/arch-support.txt
index 7c76b946297e..3d4908fce6da 100644
--- a/Documentation/features/time/clockevents/arch-support.txt
+++ b/Documentation/features/time/clockevents/arch-support.txt
@@ -17,10 +17,12 @@
| m68k: | ok |
| microblaze: | ok |
| mips: | ok |
+ | nds32: | ok |
| nios2: | ok |
| openrisc: | ok |
- | parisc: | TODO |
+ | parisc: | ok |
| powerpc: | ok |
+ | riscv: | ok |
| s390: | ok |
| sh: | ok |
| sparc: | ok |
diff --git a/Documentation/features/time/context-tracking/arch-support.txt b/Documentation/features/time/context-tracking/arch-support.txt
index 9433b3e523b3..c29974afffaa 100644
--- a/Documentation/features/time/context-tracking/arch-support.txt
+++ b/Documentation/features/time/context-tracking/arch-support.txt
@@ -17,10 +17,12 @@
| m68k: | TODO |
| microblaze: | TODO |
| mips: | ok |
+ | nds32: | TODO |
| nios2: | TODO |
| openrisc: | TODO |
| parisc: | TODO |
| powerpc: | ok |
+ | riscv: | TODO |
| s390: | TODO |
| sh: | TODO |
| sparc: | ok |
diff --git a/Documentation/features/time/irq-time-acct/arch-support.txt b/Documentation/features/time/irq-time-acct/arch-support.txt
index 212dde0b578c..8d73c463ec27 100644
--- a/Documentation/features/time/irq-time-acct/arch-support.txt
+++ b/Documentation/features/time/irq-time-acct/arch-support.txt
@@ -17,10 +17,12 @@
| m68k: | TODO |
| microblaze: | TODO |
| mips: | ok |
+ | nds32: | TODO |
| nios2: | TODO |
| openrisc: | TODO |
| parisc: | .. |
- | powerpc: | .. |
+ | powerpc: | ok |
+ | riscv: | TODO |
| s390: | .. |
| sh: | TODO |
| sparc: | .. |
diff --git a/Documentation/features/time/modern-timekeeping/arch-support.txt b/Documentation/features/time/modern-timekeeping/arch-support.txt
index 4074028f72f7..e7c6ea6b8fb3 100644
--- a/Documentation/features/time/modern-timekeeping/arch-support.txt
+++ b/Documentation/features/time/modern-timekeeping/arch-support.txt
@@ -17,10 +17,12 @@
| m68k: | TODO |
| microblaze: | ok |
| mips: | ok |
+ | nds32: | ok |
| nios2: | ok |
| openrisc: | ok |
| parisc: | ok |
| powerpc: | ok |
+ | riscv: | ok |
| s390: | ok |
| sh: | ok |
| sparc: | ok |
diff --git a/Documentation/features/time/virt-cpuacct/arch-support.txt b/Documentation/features/time/virt-cpuacct/arch-support.txt
index a394d8820517..4646457461cf 100644
--- a/Documentation/features/time/virt-cpuacct/arch-support.txt
+++ b/Documentation/features/time/virt-cpuacct/arch-support.txt
@@ -17,10 +17,12 @@
| m68k: | TODO |
| microblaze: | TODO |
| mips: | ok |
+ | nds32: | TODO |
| nios2: | TODO |
| openrisc: | TODO |
| parisc: | ok |
| powerpc: | ok |
+ | riscv: | TODO |
| s390: | ok |
| sh: | TODO |
| sparc: | ok |
diff --git a/Documentation/features/vm/ELF-ASLR/arch-support.txt b/Documentation/features/vm/ELF-ASLR/arch-support.txt
index 082f93d5b40e..1f71d090ff2c 100644
--- a/Documentation/features/vm/ELF-ASLR/arch-support.txt
+++ b/Documentation/features/vm/ELF-ASLR/arch-support.txt
@@ -17,10 +17,12 @@
| m68k: | TODO |
| microblaze: | TODO |
| mips: | ok |
+ | nds32: | TODO |
| nios2: | TODO |
| openrisc: | TODO |
- | parisc: | TODO |
+ | parisc: | ok |
| powerpc: | ok |
+ | riscv: | TODO |
| s390: | ok |
| sh: | TODO |
| sparc: | TODO |
diff --git a/Documentation/features/vm/PG_uncached/arch-support.txt b/Documentation/features/vm/PG_uncached/arch-support.txt
index 605e0abb756d..fbd5aa463b0a 100644
--- a/Documentation/features/vm/PG_uncached/arch-support.txt
+++ b/Documentation/features/vm/PG_uncached/arch-support.txt
@@ -17,10 +17,12 @@
| m68k: | TODO |
| microblaze: | TODO |
| mips: | TODO |
+ | nds32: | TODO |
| nios2: | TODO |
| openrisc: | TODO |
| parisc: | TODO |
| powerpc: | TODO |
+ | riscv: | TODO |
| s390: | TODO |
| sh: | TODO |
| sparc: | TODO |
diff --git a/Documentation/features/vm/THP/arch-support.txt b/Documentation/features/vm/THP/arch-support.txt
index 7a8eb0bd5ca8..5d7ecc378f29 100644
--- a/Documentation/features/vm/THP/arch-support.txt
+++ b/Documentation/features/vm/THP/arch-support.txt
@@ -17,10 +17,12 @@
| m68k: | .. |
| microblaze: | .. |
| mips: | ok |
+ | nds32: | TODO |
| nios2: | .. |
| openrisc: | .. |
| parisc: | TODO |
| powerpc: | ok |
+ | riscv: | TODO |
| s390: | ok |
| sh: | .. |
| sparc: | ok |
diff --git a/Documentation/features/vm/TLB/arch-support.txt b/Documentation/features/vm/TLB/arch-support.txt
index 35fb99b2b3ea..f7af9678eb66 100644
--- a/Documentation/features/vm/TLB/arch-support.txt
+++ b/Documentation/features/vm/TLB/arch-support.txt
@@ -17,10 +17,12 @@
| m68k: | .. |
| microblaze: | .. |
| mips: | TODO |
+ | nds32: | TODO |
| nios2: | .. |
| openrisc: | .. |
| parisc: | TODO |
| powerpc: | TODO |
+ | riscv: | TODO |
| s390: | TODO |
| sh: | TODO |
| sparc: | TODO |
diff --git a/Documentation/features/vm/huge-vmap/arch-support.txt b/Documentation/features/vm/huge-vmap/arch-support.txt
index ed8b943ad8fc..d0713ccc7117 100644
--- a/Documentation/features/vm/huge-vmap/arch-support.txt
+++ b/Documentation/features/vm/huge-vmap/arch-support.txt
@@ -17,10 +17,12 @@
| m68k: | TODO |
| microblaze: | TODO |
| mips: | TODO |
+ | nds32: | TODO |
| nios2: | TODO |
| openrisc: | TODO |
| parisc: | TODO |
| powerpc: | TODO |
+ | riscv: | TODO |
| s390: | TODO |
| sh: | TODO |
| sparc: | TODO |
diff --git a/Documentation/features/vm/ioremap_prot/arch-support.txt b/Documentation/features/vm/ioremap_prot/arch-support.txt
index 589947bdf0a8..8527601a3739 100644
--- a/Documentation/features/vm/ioremap_prot/arch-support.txt
+++ b/Documentation/features/vm/ioremap_prot/arch-support.txt
@@ -17,10 +17,12 @@
| m68k: | TODO |
| microblaze: | TODO |
| mips: | TODO |
+ | nds32: | TODO |
| nios2: | TODO |
| openrisc: | TODO |
| parisc: | TODO |
| powerpc: | ok |
+ | riscv: | TODO |
| s390: | TODO |
| sh: | ok |
| sparc: | TODO |
diff --git a/Documentation/features/vm/numa-memblock/arch-support.txt b/Documentation/features/vm/numa-memblock/arch-support.txt
index 8b8bea0318a0..1a988052cd24 100644
--- a/Documentation/features/vm/numa-memblock/arch-support.txt
+++ b/Documentation/features/vm/numa-memblock/arch-support.txt
@@ -9,7 +9,7 @@
| alpha: | TODO |
| arc: | .. |
| arm: | .. |
- | arm64: | .. |
+ | arm64: | ok |
| c6x: | .. |
| h8300: | .. |
| hexagon: | .. |
@@ -17,10 +17,12 @@
| m68k: | .. |
| microblaze: | ok |
| mips: | ok |
+ | nds32: | TODO |
| nios2: | .. |
| openrisc: | .. |
| parisc: | .. |
| powerpc: | ok |
+ | riscv: | ok |
| s390: | ok |
| sh: | ok |
| sparc: | ok |
diff --git a/Documentation/features/vm/pte_special/arch-support.txt b/Documentation/features/vm/pte_special/arch-support.txt
index 055004f467d2..6a608a6dcf71 100644
--- a/Documentation/features/vm/pte_special/arch-support.txt
+++ b/Documentation/features/vm/pte_special/arch-support.txt
@@ -17,10 +17,12 @@
| m68k: | TODO |
| microblaze: | TODO |
| mips: | TODO |
+ | nds32: | TODO |
| nios2: | TODO |
| openrisc: | TODO |
| parisc: | TODO |
| powerpc: | ok |
+ | riscv: | TODO |
| s390: | ok |
| sh: | ok |
| sparc: | ok |
diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index 75d2d57e2c44..2c391338c675 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -69,31 +69,31 @@ prototypes:
locking rules:
all may block
- i_mutex(inode)
-lookup: yes
-create: yes
-link: yes (both)
-mknod: yes
-symlink: yes
-mkdir: yes
-unlink: yes (both)
-rmdir: yes (both) (see below)
-rename: yes (all) (see below)
+ i_rwsem(inode)
+lookup: shared
+create: exclusive
+link: exclusive (both)
+mknod: exclusive
+symlink: exclusive
+mkdir: exclusive
+unlink: exclusive (both)
+rmdir: exclusive (both)(see below)
+rename: exclusive (all) (see below)
readlink: no
get_link: no
-setattr: yes
+setattr: exclusive
permission: no (may not block if called in rcu-walk mode)
get_acl: no
getattr: no
listxattr: no
fiemap: no
update_time: no
-atomic_open: yes
+atomic_open: exclusive
tmpfile: no
- Additionally, ->rmdir(), ->unlink() and ->rename() have ->i_mutex on
-victim.
+ Additionally, ->rmdir(), ->unlink() and ->rename() have ->i_rwsem
+ exclusive on victim.
cross-directory ->rename() has (per-superblock) ->s_vfs_rename_sem.
See Documentation/filesystems/directory-locking for more detailed discussion
@@ -111,10 +111,10 @@ prototypes:
locking rules:
all may block
- i_mutex(inode)
+ i_rwsem(inode)
list: no
get: no
-set: yes
+set: exclusive
--------------------------- super_operations ---------------------------
prototypes:
@@ -217,14 +217,14 @@ prototypes:
locking rules:
All except set_page_dirty and freepage may block
- PageLocked(page) i_mutex
+ PageLocked(page) i_rwsem
writepage: yes, unlocks (see below)
readpage: yes, unlocks
writepages:
set_page_dirty no
readpages:
-write_begin: locks the page yes
-write_end: yes, unlocks yes
+write_begin: locks the page exclusive
+write_end: yes, unlocks exclusive
bmap:
invalidatepage: yes
releasepage: yes
@@ -439,7 +439,10 @@ prototypes:
ssize_t (*read_iter) (struct kiocb *, struct iov_iter *);
ssize_t (*write_iter) (struct kiocb *, struct iov_iter *);
int (*iterate) (struct file *, struct dir_context *);
- unsigned int (*poll) (struct file *, struct poll_table_struct *);
+ int (*iterate_shared) (struct file *, struct dir_context *);
+ __poll_t (*poll) (struct file *, struct poll_table_struct *);
+ struct wait_queue_head * (*get_poll_head)(struct file *, __poll_t);
+ __poll_t (*poll_mask) (struct file *, __poll_t);
long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
long (*compat_ioctl) (struct file *, unsigned int, unsigned long);
int (*mmap) (struct file *, struct vm_area_struct *);
@@ -470,7 +473,7 @@ prototypes:
};
locking rules:
- All may block.
+ All except for ->poll_mask may block.
->llseek() locking has moved from llseek to the individual llseek
implementations. If your fs is not using generic_file_llseek, you
@@ -480,6 +483,10 @@ mutex or just to use i_size_read() instead.
Note: this does not protect the file->f_pos against concurrent modifications
since this is something the userspace has to take care about.
+->iterate() is called with i_rwsem exclusive.
+
+->iterate_shared() is called with i_rwsem at least shared.
+
->fasync() is responsible for maintaining the FASYNC bit in filp->f_flags.
Most instances call fasync_helper(), which does that maintenance, so it's
not normally something one needs to worry about. Return values > 0 will be
@@ -498,6 +505,9 @@ in sys_read() and friends.
the lease within the individual filesystem to record the result of the
operation
+->poll_mask can be called with or without the waitqueue lock for the waitqueue
+returned from ->get_poll_head.
+
--------------------------- dquot_operations -------------------------------
prototypes:
int (*write_dquot) (struct dquot *);
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index 2a84bb334894..520f6a84cf50 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -515,7 +515,8 @@ guarantees:
The /proc/PID/clear_refs is used to reset the PG_Referenced and ACCESSED/YOUNG
bits on both physical and virtual pages associated with a process, and the
-soft-dirty bit on pte (see Documentation/vm/soft-dirty.txt for details).
+soft-dirty bit on pte (see Documentation/admin-guide/mm/soft-dirty.rst
+for details).
To clear the bits for all the pages associated with the process
> echo 1 > /proc/PID/clear_refs
@@ -536,7 +537,8 @@ Any other value written to /proc/PID/clear_refs will have no effect.
The /proc/pid/pagemap gives the PFN, which can be used to find the pageflags
using /proc/kpageflags and number of times a page is mapped using
-/proc/kpagecount. For detailed explanation, see Documentation/vm/pagemap.txt.
+/proc/kpagecount. For detailed explanation, see
+Documentation/admin-guide/mm/pagemap.rst.
The /proc/pid/numa_maps is an extension based on maps, showing the memory
locality and binding policy, as well as the memory usage (in pages) of
@@ -564,7 +566,7 @@ address policy mapping details
Where:
"address" is the starting address for the mapping;
-"policy" reports the NUMA memory policy set for the mapping (see vm/numa_memory_policy.txt);
+"policy" reports the NUMA memory policy set for the mapping (see Documentation/admin-guide/mm/numa_memory_policy.rst);
"mapping details" summarizes mapping data such as mapping type, page usage counters,
node locality page counters (N0 == node0, N1 == node1, ...) and the kernel page
size, in KB, that is backing the mapping up.
diff --git a/Documentation/filesystems/tmpfs.txt b/Documentation/filesystems/tmpfs.txt
index a85355cf85f4..d06e9a59a9f4 100644
--- a/Documentation/filesystems/tmpfs.txt
+++ b/Documentation/filesystems/tmpfs.txt
@@ -105,8 +105,9 @@ policy for the file will revert to "default" policy.
NUMA memory allocation policies have optional flags that can be used in
conjunction with their modes. These optional flags can be specified
when tmpfs is mounted by appending them to the mode before the NodeList.
-See Documentation/vm/numa_memory_policy.txt for a list of all available
-memory allocation policy mode flags and their effect on memory policy.
+See Documentation/admin-guide/mm/numa_memory_policy.rst for a list of
+all available memory allocation policy mode flags and their effect on
+memory policy.
=static is equivalent to MPOL_F_STATIC_NODES
=relative is equivalent to MPOL_F_RELATIVE_NODES
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index 5fd325df59e2..829a7b7857a4 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -856,7 +856,9 @@ struct file_operations {
ssize_t (*read_iter) (struct kiocb *, struct iov_iter *);
ssize_t (*write_iter) (struct kiocb *, struct iov_iter *);
int (*iterate) (struct file *, struct dir_context *);
- unsigned int (*poll) (struct file *, struct poll_table_struct *);
+ __poll_t (*poll) (struct file *, struct poll_table_struct *);
+ struct wait_queue_head * (*get_poll_head)(struct file *, __poll_t);
+ __poll_t (*poll_mask) (struct file *, __poll_t);
long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
long (*compat_ioctl) (struct file *, unsigned int, unsigned long);
int (*mmap) (struct file *, struct vm_area_struct *);
@@ -901,6 +903,17 @@ otherwise noted.
activity on this file and (optionally) go to sleep until there
is activity. Called by the select(2) and poll(2) system calls
+ get_poll_head: Returns the struct wait_queue_head that callers can
+ wait on. Callers need to check the returned events using ->poll_mask
+ once woken. Can return NULL to indicate polling is not supported,
+ or any error code using the ERR_PTR convention to indicate that a
+ grave error occured and ->poll_mask shall not be called.
+
+ poll_mask: return the mask of EPOLL* values describing the file descriptor
+ state. Called either before going to sleep on the waitqueue returned by
+ get_poll_head, or after it has been woken. If ->get_poll_head and
+ ->poll_mask are implemented ->poll does not need to be implement.
+
unlocked_ioctl: called by the ioctl(2) system call.
compat_ioctl: called by the ioctl(2) system call when 32 bit system calls
diff --git a/Documentation/hwmon/hwmon-kernel-api.txt b/Documentation/hwmon/hwmon-kernel-api.txt
index 53a806696c64..eb7a78aebb38 100644
--- a/Documentation/hwmon/hwmon-kernel-api.txt
+++ b/Documentation/hwmon/hwmon-kernel-api.txt
@@ -71,7 +71,8 @@ hwmon_device_register_with_info is the most comprehensive and preferred means
to register a hardware monitoring device. It creates the standard sysfs
attributes in the hardware monitoring core, letting the driver focus on reading
from and writing to the chip instead of having to bother with sysfs attributes.
-Its parameters are described in more detail below.
+The parent device parameter cannot be NULL with non-NULL chip info. Its
+parameters are described in more detail below.
devm_hwmon_device_register_with_info is similar to
hwmon_device_register_with_info. However, it is device managed, meaning the
diff --git a/Documentation/hwmon/ltc2990 b/Documentation/hwmon/ltc2990
index c25211e90bdc..3ed68f676c0f 100644
--- a/Documentation/hwmon/ltc2990
+++ b/Documentation/hwmon/ltc2990
@@ -8,6 +8,7 @@ Supported chips:
Datasheet: http://www.linear.com/product/ltc2990
Author: Mike Looijmans <mike.looijmans@topic.nl>
+ Tom Levens <tom.levens@cern.ch>
Description
@@ -16,10 +17,8 @@ Description
LTC2990 is a Quad I2C Voltage, Current and Temperature Monitor.
The chip's inputs can measure 4 voltages, or two inputs together (1+2 and 3+4)
can be combined to measure a differential voltage, which is typically used to
-measure current through a series resistor, or a temperature.
-
-This driver currently uses the 2x differential mode only. In order to support
-other modes, the driver will need to be expanded.
+measure current through a series resistor, or a temperature with an external
+diode.
Usage Notes
@@ -32,12 +31,19 @@ devices explicitly.
Sysfs attributes
----------------
+in0_input Voltage at Vcc pin in millivolt (range 2.5V to 5V)
+temp1_input Internal chip temperature in millidegrees Celcius
+
+A subset of the following attributes are visible, depending on the measurement
+mode of the chip.
+
+in[1-4]_input Voltage at V[1-4] pin in millivolt
+temp2_input External temperature sensor TR1 in millidegrees Celcius
+temp3_input External temperature sensor TR2 in millidegrees Celcius
+curr1_input Current in mA across V1-V2 assuming a 1mOhm sense resistor
+curr2_input Current in mA across V3-V4 assuming a 1mOhm sense resistor
+
The "curr*_input" measurements actually report the voltage drop across the
input pins in microvolts. This is equivalent to the current through a 1mOhm
sense resistor. Divide the reported value by the actual sense resistor value
in mOhm to get the actual value.
-
-in0_input Voltage at Vcc pin in millivolt (range 2.5V to 5V)
-temp1_input Internal chip temperature in millidegrees Celcius
-curr1_input Current in mA across v1-v2 assuming a 1mOhm sense resistor.
-curr2_input Current in mA across v3-v4 assuming a 1mOhm sense resistor.
diff --git a/Documentation/i2c/busses/i2c-ocores b/Documentation/i2c/busses/i2c-ocores
index c269aaa2f26a..9e1dfe7553ad 100644
--- a/Documentation/i2c/busses/i2c-ocores
+++ b/Documentation/i2c/busses/i2c-ocores
@@ -2,7 +2,7 @@ Kernel driver i2c-ocores
Supported adapters:
* OpenCores.org I2C controller by Richard Herveille (see datasheet link)
- Datasheet: http://www.opencores.org/projects.cgi/web/i2c/overview
+ https://opencores.org/project/i2c/overview
Author: Peter Korsgaard <jacmet@sunsite.dk>
diff --git a/Documentation/index.rst b/Documentation/index.rst
index 3b99ab931d41..fdc585703498 100644
--- a/Documentation/index.rst
+++ b/Documentation/index.rst
@@ -45,7 +45,7 @@ the kernel interface as seen by application developers.
.. toctree::
:maxdepth: 2
- userspace-api/index
+ userspace-api/index
Introduction to kernel development
@@ -89,6 +89,7 @@ needed).
sound/index
crypto/index
filesystems/index
+ vm/index
Architecture-specific documentation
-----------------------------------
diff --git a/Documentation/ioctl/botching-up-ioctls.txt b/Documentation/ioctl/botching-up-ioctls.txt
index d02cfb48901c..883fb034bd04 100644
--- a/Documentation/ioctl/botching-up-ioctls.txt
+++ b/Documentation/ioctl/botching-up-ioctls.txt
@@ -73,7 +73,9 @@ will have a second iteration or at least an extension for any given interface.
future extensions is going right down the gutters since someone will submit
an ioctl struct with random stack garbage in the yet unused parts. Which
then bakes in the ABI that those fields can never be used for anything else
- but garbage.
+ but garbage. This is also the reason why you must explicitly pad all
+ structures, even if you never use them in an array - the padding the compiler
+ might insert could contain garbage.
* Have simple testcases for all of the above.
diff --git a/Documentation/media/uapi/rc/keytable.c.rst b/Documentation/media/uapi/rc/keytable.c.rst
index e6ce1e3f5a78..217237f93b37 100644
--- a/Documentation/media/uapi/rc/keytable.c.rst
+++ b/Documentation/media/uapi/rc/keytable.c.rst
@@ -7,7 +7,7 @@ file: uapi/v4l/keytable.c
/* keytable.c - This program allows checking/replacing keys at IR
- Copyright (C) 2006-2009 Mauro Carvalho Chehab <mchehab@infradead.org>
+ Copyright (C) 2006-2009 Mauro Carvalho Chehab <mchehab@kernel.org>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
diff --git a/Documentation/media/uapi/v4l/v4l2grab.c.rst b/Documentation/media/uapi/v4l/v4l2grab.c.rst
index 5aabd0b7b089..f0d0ab6abd41 100644
--- a/Documentation/media/uapi/v4l/v4l2grab.c.rst
+++ b/Documentation/media/uapi/v4l/v4l2grab.c.rst
@@ -6,7 +6,7 @@ file: media/v4l/v4l2grab.c
.. code-block:: c
/* V4L2 video picture grabber
- Copyright (C) 2009 Mauro Carvalho Chehab <mchehab@infradead.org>
+ Copyright (C) 2009 Mauro Carvalho Chehab <mchehab@kernel.org>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt
index 6dafc8085acc..a02d6bbfc9d0 100644
--- a/Documentation/memory-barriers.txt
+++ b/Documentation/memory-barriers.txt
@@ -1920,9 +1920,6 @@ There are some more advanced barrier functions:
/* assign ownership */
desc->status = DEVICE_OWN;
- /* force memory to sync before notifying device via MMIO */
- wmb();
-
/* notify device of new descriptors */
writel(DESC_NOTIFY, doorbell);
}
@@ -1930,11 +1927,15 @@ There are some more advanced barrier functions:
The dma_rmb() allows us guarantee the device has released ownership
before we read the data from the descriptor, and the dma_wmb() allows
us to guarantee the data is written to the descriptor before the device
- can see it now has ownership. The wmb() is needed to guarantee that the
- cache coherent memory writes have completed before attempting a write to
- the cache incoherent MMIO region.
+ can see it now has ownership. Note that, when using writel(), a prior
+ wmb() is not needed to guarantee that the cache coherent memory writes
+ have completed before writing to the MMIO region. The cheaper
+ writel_relaxed() does not provide this guarantee and must not be used
+ here.
- See Documentation/DMA-API.txt for more information on consistent memory.
+ See the subsection "Kernel I/O barrier effects" for more information on
+ relaxed I/O accessors and the Documentation/DMA-API.txt file for more
+ information on consistent memory.
MMIO WRITE BARRIER
@@ -2903,7 +2904,7 @@ is discarded from the CPU's cache and reloaded. To deal with this, the
appropriate part of the kernel must invalidate the overlapping bits of the
cache on each CPU.
-See Documentation/cachetlb.txt for more information on cache management.
+See Documentation/core-api/cachetlb.rst for more information on cache management.
CACHE COHERENCY VS MMIO
@@ -3083,7 +3084,7 @@ CIRCULAR BUFFERS
Memory barriers can be used to implement circular buffering without the need
of a lock to serialise the producer with the consumer. See:
- Documentation/circular-buffers.txt
+ Documentation/core-api/circular-buffers.rst
for details.
diff --git a/Documentation/networking/ppp_generic.txt b/Documentation/networking/ppp_generic.txt
index 091d20273dcb..61daf4b39600 100644
--- a/Documentation/networking/ppp_generic.txt
+++ b/Documentation/networking/ppp_generic.txt
@@ -300,12 +300,6 @@ unattached instance are:
The ioctl calls available on an instance of /dev/ppp attached to a
channel are:
-* PPPIOCDETACH detaches the instance from the channel. This ioctl is
- deprecated since the same effect can be achieved by closing the
- instance. In order to prevent possible races this ioctl will fail
- with an EINVAL error if more than one file descriptor refers to this
- instance (i.e. as a result of dup(), dup2() or fork()).
-
* PPPIOCCONNECT connects this channel to a PPP interface. The
argument should point to an int containing the interface unit
number. It will return an EINVAL error if the channel is already
diff --git a/Documentation/process/2.Process.rst b/Documentation/process/2.Process.rst
index ce5561bb3f8e..a9c46dd0706b 100644
--- a/Documentation/process/2.Process.rst
+++ b/Documentation/process/2.Process.rst
@@ -18,17 +18,17 @@ major kernel release happening every two or three months. The recent
release history looks like this:
====== =================
- 2.6.38 March 14, 2011
- 2.6.37 January 4, 2011
- 2.6.36 October 20, 2010
- 2.6.35 August 1, 2010
- 2.6.34 May 15, 2010
- 2.6.33 February 24, 2010
+ 4.11 April 30, 2017
+ 4.12 July 2, 2017
+ 4.13 September 3, 2017
+ 4.14 November 12, 2017
+ 4.15 January 28, 2018
+ 4.16 April 1, 2018
====== =================
-Every 2.6.x release is a major kernel release with new features, internal
-API changes, and more. A typical 2.6 release can contain nearly 10,000
-changesets with changes to several hundred thousand lines of code. 2.6 is
+Every 4.x release is a major kernel release with new features, internal
+API changes, and more. A typical 4.x release contain about 13,000
+changesets with changes to several hundred thousand lines of code. 4.x is
thus the leading edge of Linux kernel development; the kernel uses a
rolling development model which is continually integrating major changes.
@@ -70,20 +70,19 @@ will get up to somewhere between -rc6 and -rc9 before the kernel is
considered to be sufficiently stable and the final 2.6.x release is made.
At that point the whole process starts over again.
-As an example, here is how the 2.6.38 development cycle went (all dates in
-2011):
+As an example, here is how the 4.16 development cycle went (all dates in
+2018):
============== ===============================
- January 4 2.6.37 stable release
- January 18 2.6.38-rc1, merge window closes
- January 21 2.6.38-rc2
- February 1 2.6.38-rc3
- February 7 2.6.38-rc4
- February 15 2.6.38-rc5
- February 21 2.6.38-rc6
- March 1 2.6.38-rc7
- March 7 2.6.38-rc8
- March 14 2.6.38 stable release
+ January 28 4.15 stable release
+ February 11 4.16-rc1, merge window closes
+ February 18 4.16-rc2
+ February 25 4.16-rc3
+ March 4 4.16-rc4
+ March 11 4.16-rc5
+ March 18 4.16-rc6
+ March 25 4.16-rc7
+ April 1 4.17 stable release
============== ===============================
How do the developers decide when to close the development cycle and create
@@ -99,37 +98,42 @@ release is made. In the real world, this kind of perfection is hard to
achieve; there are just too many variables in a project of this size.
There comes a point where delaying the final release just makes the problem
worse; the pile of changes waiting for the next merge window will grow
-larger, creating even more regressions the next time around. So most 2.6.x
+larger, creating even more regressions the next time around. So most 4.x
kernels go out with a handful of known regressions though, hopefully, none
of them are serious.
Once a stable release is made, its ongoing maintenance is passed off to the
"stable team," currently consisting of Greg Kroah-Hartman. The stable team
-will release occasional updates to the stable release using the 2.6.x.y
+will release occasional updates to the stable release using the 4.x.y
numbering scheme. To be considered for an update release, a patch must (1)
fix a significant bug, and (2) already be merged into the mainline for the
next development kernel. Kernels will typically receive stable updates for
a little more than one development cycle past their initial release. So,
-for example, the 2.6.36 kernel's history looked like:
+for example, the 4.13 kernel's history looked like:
============== ===============================
- October 10 2.6.36 stable release
- November 22 2.6.36.1
- December 9 2.6.36.2
- January 7 2.6.36.3
- February 17 2.6.36.4
+ September 3 4.13 stable release
+ September 13 4.13.1
+ September 20 4.13.2
+ September 27 4.13.3
+ October 5 4.13.4
+ October 12 4.13.5
+ ... ...
+ November 24 4.13.16
============== ===============================
-2.6.36.4 was the final stable update for the 2.6.36 release.
+4.13.16 was the final stable update of the 4.13 release.
Some kernels are designated "long term" kernels; they will receive support
for a longer period. As of this writing, the current long term kernels
and their maintainers are:
- ====== ====================== ===========================
- 2.6.27 Willy Tarreau (Deep-frozen stable kernel)
- 2.6.32 Greg Kroah-Hartman
- 2.6.35 Andi Kleen (Embedded flag kernel)
+ ====== ====================== ==============================
+ 3.16 Ben Hutchings (very long-term stable kernel)
+ 4.1 Sasha Levin
+ 4.4 Greg Kroah-Hartman (very long-term stable kernel)
+ 4.9 Greg Kroah-Hartman
+ 4.14 Greg Kroah-Hartman
====== ====================== ===========================
The selection of a kernel for long-term support is purely a matter of a
diff --git a/Documentation/process/5.Posting.rst b/Documentation/process/5.Posting.rst
index c209d70da66f..c418c5d6cae4 100644
--- a/Documentation/process/5.Posting.rst
+++ b/Documentation/process/5.Posting.rst
@@ -10,8 +10,8 @@ of conventions and procedures which are used in the posting of patches;
following them will make life much easier for everybody involved. This
document will attempt to cover these expectations in reasonable detail;
more information can also be found in the files process/submitting-patches.rst,
-process/submitting-drivers.rst, and process/submit-checklist.rst in the kernel documentation
-directory.
+process/submitting-drivers.rst, and process/submit-checklist.rst in the kernel
+documentation directory.
When to post
@@ -198,8 +198,8 @@ pass it to diff with the "-X" option.
The tags mentioned above are used to describe how various developers have
been associated with the development of this patch. They are described in
-detail in the process/submitting-patches.rst document; what follows here is a brief
-summary. Each of these lines has the format:
+detail in the process/submitting-patches.rst document; what follows here is a
+brief summary. Each of these lines has the format:
::
@@ -210,8 +210,8 @@ The tags in common use are:
- Signed-off-by: this is a developer's certification that he or she has
the right to submit the patch for inclusion into the kernel. It is an
agreement to the Developer's Certificate of Origin, the full text of
- which can be found in Documentation/process/submitting-patches.rst. Code without a
- proper signoff cannot be merged into the mainline.
+ which can be found in Documentation/process/submitting-patches.rst. Code
+ without a proper signoff cannot be merged into the mainline.
- Co-developed-by: states that the patch was also created by another developer
along with the original author. This is useful at times when multiple
@@ -226,8 +226,8 @@ The tags in common use are:
it to work.
- Reviewed-by: the named developer has reviewed the patch for correctness;
- see the reviewer's statement in Documentation/process/submitting-patches.rst for more
- detail.
+ see the reviewer's statement in Documentation/process/submitting-patches.rst
+ for more detail.
- Reported-by: names a user who reported a problem which is fixed by this
patch; this tag is used to give credit to the (often underappreciated)
diff --git a/Documentation/process/index.rst b/Documentation/process/index.rst
index 1c9fe657ed01..37bd0628b6ee 100644
--- a/Documentation/process/index.rst
+++ b/Documentation/process/index.rst
@@ -52,6 +52,7 @@ lack of a better place.
adding-syscalls
magic-number
volatile-considered-harmful
+ clang-format
.. only:: subproject and html
diff --git a/Documentation/process/maintainer-pgp-guide.rst b/Documentation/process/maintainer-pgp-guide.rst
index b453561a7148..aff9b1a4d77b 100644
--- a/Documentation/process/maintainer-pgp-guide.rst
+++ b/Documentation/process/maintainer-pgp-guide.rst
@@ -219,7 +219,7 @@ Our goal is to protect your master key by moving it to offline media, so
if you only have a combined **[SC]** key, then you should create a separate
signing subkey::
- $ gpg --quick-add-key [fpr] ed25519 sign
+ $ gpg --quick-addkey [fpr] ed25519 sign
Remember to tell the keyservers about this change, so others can pull down
your new subkey::
@@ -450,11 +450,18 @@ functionality. There are several options available:
others. If you want to use ECC keys, your best bet among commercially
available devices is the Nitrokey Start.
+.. note::
+
+ If you are listed in MAINTAINERS or have an account at kernel.org,
+ you `qualify for a free Nitrokey Start`_ courtesy of The Linux
+ Foundation.
+
.. _`Nitrokey Start`: https://shop.nitrokey.com/shop/product/nitrokey-start-6
.. _`Nitrokey Pro`: https://shop.nitrokey.com/shop/product/nitrokey-pro-3
.. _`Yubikey 4`: https://www.yubico.com/product/yubikey-4-series/
.. _Gnuk: http://www.fsij.org/doc-gnuk/
.. _`LWN has a good review`: https://lwn.net/Articles/736231/
+.. _`qualify for a free Nitrokey Start`: https://www.kernel.org/nitrokey-digital-tokens-for-kernel-developers.html
Configure your smartcard device
-------------------------------
@@ -482,7 +489,7 @@ there are no convenient command-line switches::
You should set the user PIN (1), Admin PIN (3), and the Reset Code (4).
Please make sure to record and store these in a safe place -- especially
the Admin PIN and the Reset Code (which allows you to completely wipe
-the smartcard). You so rarely need to use the Admin PIN, that you will
+the smartcard). You so rarely need to use the Admin PIN, that you will
inevitably forget what it is if you do not record it.
Getting back to the main card menu, you can also set other values (such
@@ -494,6 +501,12 @@ additionally leak information about your smartcard should you lose it.
Despite having the name "PIN", neither the user PIN nor the admin
PIN on the card need to be numbers.
+.. warning::
+
+ Some devices may require that you move the subkeys onto the device
+ before you can change the passphrase. Please check the documentation
+ provided by the device manufacturer.
+
Move the subkeys to your smartcard
----------------------------------
@@ -655,6 +668,20 @@ want to import these changes back into your regular working directory::
$ gpg --export | gpg --homedir ~/.gnupg --import
$ unset GNUPGHOME
+Using gpg-agent over ssh
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+You can forward your gpg-agent over ssh if you need to sign tags or
+commits on a remote system. Please refer to the instructions provided
+on the GnuPG wiki:
+
+- `Agent Forwarding over SSH`_
+
+It works more smoothly if you can modify the sshd server settings on the
+remote end.
+
+.. _`Agent Forwarding over SSH`: https://wiki.gnupg.org/AgentForwarding
+
Using PGP with Git
==================
@@ -692,6 +719,7 @@ should be used (``[fpr]`` is the fingerprint of your key)::
tell git to always use it instead of the legacy ``gpg`` from version 1::
$ git config --global gpg.program gpg2
+ $ git config --global gpgv.program gpgv2
How to work with signed tags
----------------------------
@@ -731,6 +759,13 @@ If you are verifying someone else's git tag, then you will need to
import their PGP key. Please refer to the
":ref:`verify_identities`" section below.
+.. note::
+
+ If you get "``gpg: Can't check signature: unknown pubkey
+ algorithm``" error, you need to tell git to use gpgv2 for
+ verification, so it properly processes signatures made by ECC keys.
+ See instructions at the start of this section.
+
Configure git to always sign annotated tags
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
diff --git a/Documentation/process/submitting-patches.rst b/Documentation/process/submitting-patches.rst
index f7152ed565e5..908bb55be407 100644
--- a/Documentation/process/submitting-patches.rst
+++ b/Documentation/process/submitting-patches.rst
@@ -761,7 +761,7 @@ requests, especially from new, unknown developers. If in doubt you can use
the pull request as the cover letter for a normal posting of the patch
series, giving the maintainer the option of using either.
-A pull request should have [GIT] or [PULL] in the subject line. The
+A pull request should have [GIT PULL] in the subject line. The
request itself should include the repository name and the branch of
interest on a single line; it should look something like::
diff --git a/Documentation/scheduler/sched-deadline.txt b/Documentation/scheduler/sched-deadline.txt
index 8ce78f82ae23..b14e03ff3528 100644
--- a/Documentation/scheduler/sched-deadline.txt
+++ b/Documentation/scheduler/sched-deadline.txt
@@ -49,7 +49,7 @@ CONTENTS
2.1 Main algorithm
------------------
- SCHED_DEADLINE uses three parameters, named "runtime", "period", and
+ SCHED_DEADLINE [18] uses three parameters, named "runtime", "period", and
"deadline", to schedule tasks. A SCHED_DEADLINE task should receive
"runtime" microseconds of execution time every "period" microseconds, and
these "runtime" microseconds are available within "deadline" microseconds
@@ -117,6 +117,10 @@ CONTENTS
scheduling deadline = scheduling deadline + period
remaining runtime = remaining runtime + runtime
+ The SCHED_FLAG_DL_OVERRUN flag in sched_attr's sched_flags field allows a task
+ to get informed about runtime overruns through the delivery of SIGXCPU
+ signals.
+
2.2 Bandwidth reclaiming
------------------------
@@ -279,6 +283,19 @@ CONTENTS
running_bw is incremented.
+2.3 Energy-aware scheduling
+------------------------
+
+ When cpufreq's schedutil governor is selected, SCHED_DEADLINE implements the
+ GRUB-PA [19] algorithm, reducing the CPU operating frequency to the minimum
+ value that still allows to meet the deadlines. This behavior is currently
+ implemented only for ARM architectures.
+
+ A particular care must be taken in case the time needed for changing frequency
+ is of the same order of magnitude of the reservation period. In such cases,
+ setting a fixed CPU frequency results in a lower amount of deadline misses.
+
+
3. Scheduling Real-Time Tasks
=============================
@@ -505,6 +522,12 @@ CONTENTS
17 - L. Abeni, G. Lipari, A. Parri, Y. Sun, Multicore CPU reclaiming: parallel
or sequential?. In Proceedings of the 31st Annual ACM Symposium on Applied
Computing, 2016.
+ 18 - J. Lelli, C. Scordino, L. Abeni, D. Faggioli, Deadline scheduling in the
+ Linux kernel, Software: Practice and Experience, 46(6): 821-839, June
+ 2016.
+ 19 - C. Scordino, L. Abeni, J. Lelli, Energy-Aware Real-Time Scheduling in
+ the Linux Kernel, 33rd ACM/SIGAPP Symposium On Applied Computing (SAC
+ 2018), Pau, France, April 2018.
4. Bandwidth management
diff --git a/Documentation/scsi/scsi_eh.txt b/Documentation/scsi/scsi_eh.txt
index 11e447bdb3a5..1b7436932a2b 100644
--- a/Documentation/scsi/scsi_eh.txt
+++ b/Documentation/scsi/scsi_eh.txt
@@ -82,24 +82,13 @@ function
1. invokes optional hostt->eh_timed_out() callback. Return value can
be one of
- - BLK_EH_HANDLED
- This indicates that eh_timed_out() dealt with the timeout.
- The command is passed back to the block layer and completed
- via __blk_complete_requests().
-
- *NOTE* After returning BLK_EH_HANDLED the SCSI layer is
- assumed to be finished with the command, and no other
- functions from the SCSI layer will be called. So this
- should typically only be returned if the eh_timed_out()
- handler raced with normal completion.
-
- BLK_EH_RESET_TIMER
This indicates that more time is required to finish the
command. Timer is restarted. This action is counted as a
retry and only allowed scmd->allowed + 1(!) times. Once the
- limit is reached, action for BLK_EH_NOT_HANDLED is taken instead.
+ limit is reached, action for BLK_EH_DONE is taken instead.
- - BLK_EH_NOT_HANDLED
+ - BLK_EH_DONE
eh_timed_out() callback did not handle the command.
Step #2 is taken.
diff --git a/Documentation/security/index.rst b/Documentation/security/index.rst
index 298a94a33f05..85492bfca530 100644
--- a/Documentation/security/index.rst
+++ b/Documentation/security/index.rst
@@ -9,5 +9,7 @@ Security Documentation
IMA-templates
keys/index
LSM
+ LSM-sctp
+ SELinux-sctp
self-protection
tpm/index
diff --git a/Documentation/sound/alsa-configuration.rst b/Documentation/sound/alsa-configuration.rst
index aed6b4fb8e46..ab5761148163 100644
--- a/Documentation/sound/alsa-configuration.rst
+++ b/Documentation/sound/alsa-configuration.rst
@@ -1062,7 +1062,7 @@ output (with ``--no-upload`` option) to kernel bugzilla or alsa-devel
ML (see the section `Links and Addresses`_).
``power_save`` and ``power_save_controller`` options are for power-saving
-mode. See powersave.txt for details.
+mode. See powersave.rst for details.
Note 2: If you get click noises on output, try the module option
``position_fix=1`` or ``2``. ``position_fix=1`` will use the SD_LPIB
@@ -1133,7 +1133,7 @@ line_outs_monitor
enable_monitor
Enable Analog Out on Channel 63/64 by default.
-See hdspm.txt for details.
+See hdspm.rst for details.
Module snd-ice1712
------------------
diff --git a/Documentation/sound/soc/codec.rst b/Documentation/sound/soc/codec.rst
index f87612b94812..240770ea761e 100644
--- a/Documentation/sound/soc/codec.rst
+++ b/Documentation/sound/soc/codec.rst
@@ -139,7 +139,7 @@ DAPM description
----------------
The Dynamic Audio Power Management description describes the codec power
components and their relationships and registers to the ASoC core.
-Please read dapm.txt for details of building the description.
+Please read dapm.rst for details of building the description.
Please also see the examples in other codec drivers.
diff --git a/Documentation/sound/soc/platform.rst b/Documentation/sound/soc/platform.rst
index d5574904d981..02c93a8b9c3b 100644
--- a/Documentation/sound/soc/platform.rst
+++ b/Documentation/sound/soc/platform.rst
@@ -66,7 +66,7 @@ Each SoC DAI driver must provide the following features:-
4. SYSCLK configuration
5. Suspend and resume (optional)
-Please see codec.txt for a description of items 1 - 4.
+Please see codec.rst for a description of items 1 - 4.
SoC DSP Drivers
diff --git a/Documentation/sphinx/parse-headers.pl b/Documentation/sphinx/parse-headers.pl
index a958d8b5e99d..d410f47567e9 100755
--- a/Documentation/sphinx/parse-headers.pl
+++ b/Documentation/sphinx/parse-headers.pl
@@ -387,11 +387,11 @@ tree for more details.
=head1 BUGS
-Report bugs to Mauro Carvalho Chehab <mchehab@s-opensource.com>
+Report bugs to Mauro Carvalho Chehab <mchehab@kernel.org>
=head1 COPYRIGHT
-Copyright (c) 2016 by Mauro Carvalho Chehab <mchehab@s-opensource.com>.
+Copyright (c) 2016 by Mauro Carvalho Chehab <mchehab+samsung@kernel.org>.
License GPLv2: GNU GPL version 2 <http://gnu.org/licenses/gpl.html>.
diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt
index 17256f2ad919..697ef8c225df 100644
--- a/Documentation/sysctl/vm.txt
+++ b/Documentation/sysctl/vm.txt
@@ -515,7 +515,7 @@ nr_hugepages
Change the minimum size of the hugepage pool.
-See Documentation/vm/hugetlbpage.txt
+See Documentation/admin-guide/mm/hugetlbpage.rst
==============================================================
@@ -524,7 +524,7 @@ nr_overcommit_hugepages
Change the maximum size of the hugepage pool. The maximum is
nr_hugepages + nr_overcommit_hugepages.
-See Documentation/vm/hugetlbpage.txt
+See Documentation/admin-guide/mm/hugetlbpage.rst
==============================================================
@@ -667,7 +667,7 @@ and don't use much of it.
The default value is 0.
-See Documentation/vm/overcommit-accounting and
+See Documentation/vm/overcommit-accounting.rst and
mm/mmap.c::__vm_enough_memory() for more information.
==============================================================
diff --git a/Documentation/trace/coresight.txt b/Documentation/trace/coresight.txt
index 6f0120c3a4f1..1d74ad0202b6 100644
--- a/Documentation/trace/coresight.txt
+++ b/Documentation/trace/coresight.txt
@@ -187,13 +187,19 @@ that can be performed on them (see "struct coresight_ops"). The
specific to that component only. "Implementation defined" customisations are
expected to be accessed and controlled using those entries.
-Last but not least, "struct module *owner" is expected to be set to reflect
-the information carried in "THIS_MODULE".
How to use the tracer modules
-----------------------------
-Before trace collection can start, a coresight sink needs to be identify.
+There are two ways to use the Coresight framework: 1) using the perf cmd line
+tools and 2) interacting directly with the Coresight devices using the sysFS
+interface. Preference is given to the former as using the sysFS interface
+requires a deep understanding of the Coresight HW. The following sections
+provide details on using both methods.
+
+1) Using the sysFS interface:
+
+Before trace collection can start, a coresight sink needs to be identified.
There is no limit on the amount of sinks (nor sources) that can be enabled at
any given moment. As a generic operation, all device pertaining to the sink
class will have an "active" entry in sysfs:
@@ -298,42 +304,48 @@ Instruction 13570831 0x8026B584 E28DD00C false ADD
Instruction 0 0x8026B588 E8BD8000 true LDM sp!,{pc}
Timestamp Timestamp: 17107041535
-How to use the STM module
--------------------------
+2) Using perf framework:
-Using the System Trace Macrocell module is the same as the tracers - the only
-difference is that clients are driving the trace capture rather
-than the program flow through the code.
+Coresight tracers are represented using the Perf framework's Performance
+Monitoring Unit (PMU) abstraction. As such the perf framework takes charge of
+controlling when tracing gets enabled based on when the process of interest is
+scheduled. When configured in a system, Coresight PMUs will be listed when
+queried by the perf command line tool:
-As with any other CoreSight component, specifics about the STM tracer can be
-found in sysfs with more information on each entry being found in [1]:
+ linaro@linaro-nano:~$ ./perf list pmu
-root@genericarmv8:~# ls /sys/bus/coresight/devices/20100000.stm
-enable_source hwevent_select port_enable subsystem uevent
-hwevent_enable mgmt port_select traceid
-root@genericarmv8:~#
+ List of pre-defined events (to be used in -e):
-Like any other source a sink needs to be identified and the STM enabled before
-being used:
+ cs_etm// [Kernel PMU event]
-root@genericarmv8:~# echo 1 > /sys/bus/coresight/devices/20010000.etf/enable_sink
-root@genericarmv8:~# echo 1 > /sys/bus/coresight/devices/20100000.stm/enable_source
+ linaro@linaro-nano:~$
-From there user space applications can request and use channels using the devfs
-interface provided for that purpose by the generic STM API:
+Regardless of the number of tracers available in a system (usually equal to the
+amount of processor cores), the "cs_etm" PMU will be listed only once.
-root@genericarmv8:~# ls -l /dev/20100000.stm
-crw------- 1 root root 10, 61 Jan 3 18:11 /dev/20100000.stm
-root@genericarmv8:~#
+A Coresight PMU works the same way as any other PMU, i.e the name of the PMU is
+listed along with configuration options within forward slashes '/'. Since a
+Coresight system will typically have more than one sink, the name of the sink to
+work with needs to be specified as an event option. Names for sink to choose
+from are listed in sysFS under ($SYSFS)/bus/coresight/devices:
-Details on how to use the generic STM API can be found here [2].
+ root@linaro-nano:~# ls /sys/bus/coresight/devices/
+ 20010000.etf 20040000.funnel 20100000.stm 22040000.etm
+ 22140000.etm 230c0000.funnel 23240000.etm 20030000.tpiu
+ 20070000.etr 20120000.replicator 220c0000.funnel
+ 23040000.etm 23140000.etm 23340000.etm
-[1]. Documentation/ABI/testing/sysfs-bus-coresight-devices-stm
-[2]. Documentation/trace/stm.txt
+ root@linaro-nano:~# perf record -e cs_etm/@20070000.etr/u --per-thread program
+The syntax within the forward slashes '/' is important. The '@' character
+tells the parser that a sink is about to be specified and that this is the sink
+to use for the trace session.
-Using perf tools
-----------------
+More information on the above and other example on how to use Coresight with
+the perf tools can be found in the "HOWTO.md" file of the openCSD gitHub
+repository [3].
+
+2.1) AutoFDO analysis using the perf tools:
perf can be used to record and analyze trace of programs.
@@ -381,3 +393,38 @@ sort example is from the AutoFDO tutorial (https://gcc.gnu.org/wiki/AutoFDO/Tuto
$ taskset -c 2 ./sort_autofdo
Bubble sorting array of 30000 elements
5806 ms
+
+
+How to use the STM module
+-------------------------
+
+Using the System Trace Macrocell module is the same as the tracers - the only
+difference is that clients are driving the trace capture rather
+than the program flow through the code.
+
+As with any other CoreSight component, specifics about the STM tracer can be
+found in sysfs with more information on each entry being found in [1]:
+
+root@genericarmv8:~# ls /sys/bus/coresight/devices/20100000.stm
+enable_source hwevent_select port_enable subsystem uevent
+hwevent_enable mgmt port_select traceid
+root@genericarmv8:~#
+
+Like any other source a sink needs to be identified and the STM enabled before
+being used:
+
+root@genericarmv8:~# echo 1 > /sys/bus/coresight/devices/20010000.etf/enable_sink
+root@genericarmv8:~# echo 1 > /sys/bus/coresight/devices/20100000.stm/enable_source
+
+From there user space applications can request and use channels using the devfs
+interface provided for that purpose by the generic STM API:
+
+root@genericarmv8:~# ls -l /dev/20100000.stm
+crw------- 1 root root 10, 61 Jan 3 18:11 /dev/20100000.stm
+root@genericarmv8:~#
+
+Details on how to use the generic STM API can be found here [2].
+
+[1]. Documentation/ABI/testing/sysfs-bus-coresight-devices-stm
+[2]. Documentation/trace/stm.txt
+[3]. https://github.com/Linaro/perf-opencsd
diff --git a/Documentation/trace/ftrace-uses.rst b/Documentation/trace/ftrace-uses.rst
index 998a60a93015..00283b6dd101 100644
--- a/Documentation/trace/ftrace-uses.rst
+++ b/Documentation/trace/ftrace-uses.rst
@@ -12,7 +12,7 @@ Written for: 4.14
Introduction
============
-The ftrace infrastructure was originially created to attach callbacks to the
+The ftrace infrastructure was originally created to attach callbacks to the
beginning of functions in order to record and trace the flow of the kernel.
But callbacks to the start of a function can have other use cases. Either
for live kernel patching, or for security monitoring. This document describes
@@ -30,7 +30,7 @@ The ftrace context
This requires extra care to what can be done inside a callback. A callback
can be called outside the protective scope of RCU.
-The ftrace infrastructure has some protections agains recursions and RCU
+The ftrace infrastructure has some protections against recursions and RCU
but one must still be very careful how they use the callbacks.
diff --git a/Documentation/trace/ftrace.rst b/Documentation/trace/ftrace.rst
index 67d9c38e95eb..6b80ac4bbaae 100644
--- a/Documentation/trace/ftrace.rst
+++ b/Documentation/trace/ftrace.rst
@@ -224,6 +224,8 @@ of ftrace. Here is a list of some of the key files:
has a side effect of enabling or disabling specific functions
to be traced. Echoing names of functions into this file
will limit the trace to only those functions.
+ This influences the tracers "function" and "function_graph"
+ and thus also function profiling (see "function_profile_enabled").
The functions listed in "available_filter_functions" are what
can be written into this file.
@@ -265,6 +267,8 @@ of ftrace. Here is a list of some of the key files:
Functions listed in this file will cause the function graph
tracer to only trace these functions and the functions that
they call. (See the section "dynamic ftrace" for more details).
+ Note, set_ftrace_filter and set_ftrace_notrace still affects
+ what functions are being traced.
set_graph_notrace:
@@ -277,7 +281,8 @@ of ftrace. Here is a list of some of the key files:
This lists the functions that ftrace has processed and can trace.
These are the function names that you can pass to
- "set_ftrace_filter" or "set_ftrace_notrace".
+ "set_ftrace_filter", "set_ftrace_notrace",
+ "set_graph_function", or "set_graph_notrace".
(See the section "dynamic ftrace" below for more details.)
dyn_ftrace_total_info:
diff --git a/Documentation/translations/ko_KR/memory-barriers.txt b/Documentation/translations/ko_KR/memory-barriers.txt
index 0a0930ab4156..921739d00f69 100644
--- a/Documentation/translations/ko_KR/memory-barriers.txt
+++ b/Documentation/translations/ko_KR/memory-barriers.txt
@@ -36,6 +36,9 @@ Documentation/memory-barriers.txt
ë¶€ë¶„ë„ ìžˆê³ , ì˜ë„하진 않았지만 ì‚¬ëžŒì— ì˜í•´ 쓰였다보니 불완전한 ë¶€ë¶„ë„ ìžˆìŠµë‹ˆë‹¤.
ì´ ë¬¸ì„œëŠ” 리눅스ì—ì„œ 제공하는 다양한 메모리 ë°°ë¦¬ì–´ë“¤ì„ ì‚¬ìš©í•˜ê¸° 위한
안내서입니다만, 뭔가 ì´ìƒí•˜ë‹¤ 싶으면 (그런게 ë§Žì„ ê²ë‹ˆë‹¤) ì§ˆë¬¸ì„ ë¶€íƒë“œë¦½ë‹ˆë‹¤.
+ì¼ë¶€ ì´ìƒí•œ ì ë“¤ì€ ê³µì‹ì ì¸ 메모리 ì¼ê´€ì„± 모ë¸ê³¼ tools/memory-model/ ì— ìžˆëŠ”
+관련 문서를 참고해서 í•´ê²°ë  ìˆ˜ ìžˆì„ ê²ë‹ˆë‹¤. 그러나, ì´ ë©”ëª¨ë¦¬ 모ë¸ì¡°ì°¨ë„ ê·¸
+관리ìžë“¤ì˜ ì˜ê²¬ì˜ 집합으로 ë´ì•¼ì§€, 절대 ì˜³ì€ ì˜ˆì–¸ìžë¡œ 신봉해선 ì•ˆë  ê²ë‹ˆë‹¤.
다시 ë§í•˜ì§€ë§Œ, ì´ ë¬¸ì„œëŠ” 리눅스가 í•˜ë“œì›¨ì–´ì— ê¸°ëŒ€í•˜ëŠ” ì‚¬í•­ì— ëŒ€í•œ 명세서가
아닙니다.
@@ -77,7 +80,7 @@ Documentation/memory-barriers.txt
- 메모리 ë°°ë¦¬ì–´ì˜ ì¢…ë¥˜.
- 메모리 ë°°ë¦¬ì–´ì— ëŒ€í•´ 가정해선 ì•ˆë  ê²ƒ.
- - ë°ì´í„° ì˜ì¡´ì„± 배리어.
+ - ë°ì´í„° ì˜ì¡´ì„± 배리어 (역사ì ).
- 컨트롤 ì˜ì¡´ì„±.
- SMP 배리어 ì§ë§žì¶”기.
- 메모리 배리어 ì‹œí€€ìŠ¤ì˜ ì˜ˆ.
@@ -255,17 +258,20 @@ CPU ì—게 기대할 수 있는 ìµœì†Œí•œì˜ ë³´ìž¥ì‚¬í•­ 몇가지가 있습니
(*) ì–´ë–¤ CPU ë“ , ì˜ì¡´ì„±ì´ 존재하는 메모리 ì•¡ì„¸ìŠ¤ë“¤ì€ í•´ë‹¹ CPU ìžì‹ ì—게
있어서는 순서대로 메모리 ì‹œìŠ¤í…œì— ìˆ˜í–‰ 요청ë©ë‹ˆë‹¤. 즉, 다ìŒì— 대해서:
- Q = READ_ONCE(P); smp_read_barrier_depends(); D = READ_ONCE(*Q);
+ Q = READ_ONCE(P); D = READ_ONCE(*Q);
CPU 는 다ìŒê³¼ ê°™ì€ ë©”ëª¨ë¦¬ 오í¼ë ˆì´ì…˜ 시퀀스를 수행 요청합니다:
Q = LOAD P, D = LOAD *Q
- 그리고 ê·¸ 시퀀스 ë‚´ì—ì„œì˜ ìˆœì„œëŠ” í•­ìƒ ì§€ì¼œì§‘ë‹ˆë‹¤. ëŒ€ë¶€ë¶„ì˜ ì‹œìŠ¤í…œì—ì„œ
- smp_read_barrier_depends() 는 아무ì¼ë„ 안하지만 DEC Alpha ì—서는
- 명시ì ìœ¼ë¡œ 사용ë˜ì–´ì•¼ 합니다. ë³´í†µì˜ ê²½ìš°ì—는 smp_read_barrier_depends()
- 를 ì§ì ‘ 사용하는 대신 rcu_dereference() ê°™ì€ ê²ƒë“¤ì„ ì‚¬ìš©í•´ì•¼ 함ì„
- 알아ë‘세요.
+ 그리고 ê·¸ 시퀀스 ë‚´ì—ì„œì˜ ìˆœì„œëŠ” í•­ìƒ ì§€ì¼œì§‘ë‹ˆë‹¤. 하지만, DEC Alpha ì—ì„œ
+ READ_ONCE() 는 메모리 배리어 ëª…ë ¹ë„ ë‚´ê²Œ ë˜ì–´ 있어서, DEC Alpha CPU 는
+ 다ìŒê³¼ ê°™ì€ ë©”ëª¨ë¦¬ 오í¼ë ˆì´ì…˜ë“¤ì„ 내놓게 ë©ë‹ˆë‹¤:
+
+ Q = LOAD P, MEMORY_BARRIER, D = LOAD *Q, MEMORY_BARRIER
+
+ DEC Alpha ì—ì„œ 수행ë˜ë“  아니든, READ_ONCE() 는 컴파ì¼ëŸ¬ë¡œë¶€í„°ì˜ ì•…ì˜í–¥
+ ë˜í•œ 제거합니다.
(*) 특정 CPU ë‚´ì—ì„œ 겹치는 ì˜ì—­ì˜ ë©”ëª¨ë¦¬ì— í–‰í•´ì§€ëŠ” 로드와 스토어 ë“¤ì€ í•´ë‹¹
CPU 안ì—서는 순서가 바뀌지 ì•Šì€ ê²ƒìœ¼ë¡œ 보여집니다. 즉, 다ìŒì— 대해서:
@@ -421,8 +427,8 @@ CPU ì—게 기대할 수 있는 ìµœì†Œí•œì˜ ë³´ìž¥ì‚¬í•­ 몇가지가 있습니
ë°ì´í„° ì˜ì¡´ì„± 배리어는 ì½ê¸° ë°°ë¦¬ì–´ì˜ ë³´ë‹¤ ì™„í™”ëœ í˜•íƒœìž…ë‹ˆë‹¤. ë‘ê°œì˜ ë¡œë“œ
오í¼ë ˆì´ì…˜ì´ 있고 ë‘번째 ê²ƒì´ ì²«ë²ˆì§¸ ê²ƒì˜ ê²°ê³¼ì— ì˜ì¡´í•˜ê³  ìžˆì„ ë•Œ(예:
ë‘번째 로드가 참조할 주소를 첫번째 로드가 ì½ëŠ” 경우), ë‘번째 로드가 ì½ì–´ì˜¬
- ë°ì´í„°ëŠ” 첫번째 ë¡œë“œì— ì˜í•´ ê·¸ 주소가 얻어지기 ì „ì— ì—…ë°ì´íŠ¸ ë˜ì–´ 있ìŒì„
- 보장하기 위해서 ë°ì´í„° ì˜ì¡´ì„± 배리어가 필요할 수 있습니다.
+ ë°ì´í„°ëŠ” 첫번째 ë¡œë“œì— ì˜í•´ ê·¸ 주소가 얻어진 ë’¤ì— ì—…ë°ì´íŠ¸ ë¨ì„ 보장하기
+ 위해서 ë°ì´í„° ì˜ì¡´ì„± 배리어가 필요할 수 있습니다.
ë°ì´í„° ì˜ì¡´ì„± 배리어는 ìƒí˜¸ ì˜ì¡´ì ì¸ 로드 오í¼ë ˆì´ì…˜ë“¤ 사ì´ì˜ ë¶€ë¶„ì  ìˆœì„œ
세우기입니다; 스토어 오í¼ë ˆì´ì…˜ë“¤ì´ë‚˜ ë…립ì ì¸ 로드들, ë˜ëŠ” 중복ë˜ëŠ”
@@ -570,8 +576,14 @@ ACQUIRE 는 해당 오í¼ë ˆì´ì…˜ì˜ 로드 부분ì—만 ì ìš©ë˜ê³  RELEASE ë
Documentation/DMA-API.txt
-ë°ì´í„° ì˜ì¡´ì„± 배리어
---------------------
+ë°ì´í„° ì˜ì¡´ì„± 배리어 (역사ì )
+-----------------------------
+
+리눅스 ì»¤ë„ v4.15 기준으로, smp_read_barrier_depends() ê°€ READ_ONCE() ì—
+추가ë˜ì—ˆëŠ”ë°, ì´ëŠ” ì´ ì„¹ì…˜ì— ì£¼ì˜ë¥¼ 기울여야 하는 ì‚¬ëžŒë“¤ì€ DEC Alpha 아키í…ì³
+ì „ìš© 코드를 만드는 사람들과 READ_ONCE() ìžì²´ë¥¼ 만드는 사람들 ë¿ìž„ì„ ì˜ë¯¸í•©ë‹ˆë‹¤.
+그런 ë¶„ë“¤ì„ ìœ„í•´, 그리고 ì—­ì‚¬ì— ê´€ì‹¬ 있는 ë¶„ë“¤ì„ ìœ„í•´, 여기 ë°ì´í„° ì˜ì¡´ì„±
+ë°°ë¦¬ì–´ì— ëŒ€í•œ ì´ì•¼ê¸°ë¥¼ ì ìŠµë‹ˆë‹¤.
ë°ì´í„° ì˜ì¡´ì„± ë°°ë¦¬ì–´ì˜ ì‚¬ìš©ì— ìžˆì–´ 지켜야 하는 ì‚¬í•­ë“¤ì€ ì•½ê°„ 미묘하고, ë°ì´í„°
ì˜ì¡´ì„± 배리어가 사용ë˜ì–´ì•¼ 하는 ìƒí™©ë„ í•­ìƒ ëª…ë°±í•˜ì§€ëŠ” 않습니다. ì„¤ëª…ì„ ìœ„í•´
@@ -1787,7 +1799,7 @@ CPU 메모리 배리어
범용 mb() smp_mb()
쓰기 wmb() smp_wmb()
ì½ê¸° rmb() smp_rmb()
- ë°ì´í„° ì˜ì¡´ì„± read_barrier_depends() smp_read_barrier_depends()
+ ë°ì´í„° ì˜ì¡´ì„± READ_ONCE()
ë°ì´í„° ì˜ì¡´ì„± 배리어를 제외한 모든 메모리 배리어는 컴파ì¼ëŸ¬ 배리어를
@@ -2796,8 +2808,9 @@ CPU 2 는 C/D 를 갖습니다)ê°€ 병렬로 ì—°ê²°ë˜ì–´ 있는 ì‹œìŠ¤í…œì„ ë‹
ì—¬ê¸°ì— ê°œìž…í•˜ê¸° 위해선, ë°ì´í„° ì˜ì¡´ì„± 배리어나 ì½ê¸° 배리어를 로드 오í¼ë ˆì´ì…˜ë“¤
-사ì´ì— 넣어야 합니다. ì´ë ‡ê²Œ í•¨ìœ¼ë¡œì¨ ìºì‹œê°€ ë‹¤ìŒ ìš”ì²­ì„ ì²˜ë¦¬í•˜ê¸° ì „ì— ì¼ê´€ì„±
-í를 처리하ë„ë¡ ê°•ì œí•˜ê²Œ ë©ë‹ˆë‹¤.
+사ì´ì— 넣어야 합니다 (v4.15 부터는 READ_ONCE() 매í¬ë¡œì— ì˜í•´ 무조건ì ìœ¼ë¡œ
+그렇게 ë©ë‹ˆë‹¤). ì´ë ‡ê²Œ í•¨ìœ¼ë¡œì¨ ìºì‹œê°€ ë‹¤ìŒ ìš”ì²­ì„ ì²˜ë¦¬í•˜ê¸° ì „ì— ì¼ê´€ì„± í를
+처리하ë„ë¡ ê°•ì œí•˜ê²Œ ë©ë‹ˆë‹¤.
CPU 1 CPU 2 COMMENT
=============== =============== =======================================
@@ -2826,7 +2839,10 @@ CPU 2 는 C/D 를 갖습니다)ê°€ 병렬로 ì—°ê²°ë˜ì–´ 있는 ì‹œìŠ¤í…œì„ ë‹
다른 CPU ë“¤ë„ ë¶„í• ëœ ìºì‹œë¥¼ 가지고 ìžˆì„ ìˆ˜ 있지만, 그런 CPU ë“¤ì€ í‰ë²”í•œ 메모리
액세스를 ìœ„í•´ì„œë„ ì´ ë¶„í• ëœ ìºì‹œë“¤ 사ì´ì˜ ì¡°ì •ì„ í•´ì•¼ë§Œ 합니다. Alpha 는 가장
약한 메모리 순서 시맨틱 (semantic) ì„ ì„ íƒí•¨ìœ¼ë¡œì¨ 메모리 배리어가 명시ì ìœ¼ë¡œ
-사용ë˜ì§€ ì•Šì•˜ì„ ë•Œì—는 그런 ì¡°ì •ì´ í•„ìš”í•˜ì§€ 않게 했습니다.
+사용ë˜ì§€ ì•Šì•˜ì„ ë•Œì—는 그런 ì¡°ì •ì´ í•„ìš”í•˜ì§€ 않게 했으며, ì´ëŠ” Alpha ê°€ 당시ì—
+ë” ë†’ì€ CPU í´ë½ ì†ë„를 가질 수 있게 했습니다. 하지만, (다시 ë§í•˜ê±´ëŒ€, v4.15
+ì´í›„부터는) Alpha 아키í…ì³ ì „ìš© 코드와 READ_ONCE() 매í¬ë¡œ 내부ì—서를 제외하고는
+smp_read_barrier_depends() ê°€ 사용ë˜ì§€ 않아야 í•¨ì„ ì•Œì•„ë‘시기 ë°”ëžë‹ˆë‹¤.
ìºì‹œ ì¼ê´€ì„± VS DMA
@@ -2846,7 +2862,7 @@ CPU ì˜ ìºì‹œì—ì„œ RAM 으로 쓰여지는 ë”í‹° ìºì‹œ ë¼ì¸ì— ì˜í•´ ë®ì
문제를 해결하기 위해선, 커ë„ì˜ ì ì ˆí•œ 부분ì—ì„œ ê° CPU ì˜ ìºì‹œ ì•ˆì˜ ë¬¸ì œê°€ ë˜ëŠ”
ë¹„íŠ¸ë“¤ì„ ë¬´íš¨í™” 시켜야 합니다.
-ìºì‹œ ê´€ë¦¬ì— ëŒ€í•œ ë” ë§Žì€ ì •ë³´ë¥¼ 위해선 Documentation/cachetlb.txt 를
+ìºì‹œ ê´€ë¦¬ì— ëŒ€í•œ ë” ë§Žì€ ì •ë³´ë¥¼ 위해선 Documentation/core-api/cachetlb.rst 를
참고하세요.
@@ -2988,7 +3004,9 @@ Alpha CPU ì˜ ì¼ë¶€ ë²„ì „ì€ ë¶„í• ëœ ë°ì´í„° ìºì‹œë¥¼ 가지고 있어서
메모리 ì¼ê´€ì„± 시스템과 함께 ë‘ê°œì˜ ìºì‹œë¥¼ ë™ê¸°í™” 시켜서, í¬ì¸í„° 변경과 새로운
ë°ì´í„°ì˜ ë°œê²¬ì„ ì˜¬ë°”ë¥¸ 순서로 ì¼ì–´ë‚˜ê²Œ 하기 때문입니다.
-리눅스 커ë„ì˜ ë©”ëª¨ë¦¬ 배리어 모ë¸ì€ Alpha ì— ê¸°ì´ˆí•´ì„œ ì •ì˜ë˜ì—ˆìŠµë‹ˆë‹¤.
+리눅스 커ë„ì˜ ë©”ëª¨ë¦¬ 배리어 모ë¸ì€ Alpha ì— ê¸°ì´ˆí•´ì„œ ì •ì˜ë˜ì—ˆìŠµë‹ˆë‹¤ë§Œ, v4.15
+부터는 리눅스 커ë„ì´ READ_ONCE() ë‚´ì— smp_read_barrier_depends() 를 추가해서
+Alpha ì˜ ë©”ëª¨ë¦¬ 모ë¸ë¡œì˜ ì˜í–¥ë ¥ì´ í¬ê²Œ 줄어들긴 했습니다.
ìœ„ì˜ "ìºì‹œ ì¼ê´€ì„±" ì„œë¸Œì„¹ì…˜ì„ ì°¸ê³ í•˜ì„¸ìš”.
@@ -3023,7 +3041,7 @@ smp_mb() ê°€ ì•„ë‹ˆë¼ virt_mb() 를 사용해야 합니다.
ë™ê¸°í™”ì— ë½ì„ 사용하지 ì•Šê³  구현하는ë°ì— ì‚¬ìš©ë  ìˆ˜ 있습니다. ë” ìžì„¸í•œ ë‚´ìš©ì„
위해선 다ìŒì„ 참고하세요:
- Documentation/circular-buffers.txt
+ Documentation/core-api/circular-buffers.rst
=========
diff --git a/Documentation/translations/zh_CN/video4linux/v4l2-framework.txt b/Documentation/translations/zh_CN/video4linux/v4l2-framework.txt
index 698660b7f21f..c77c0f060864 100644
--- a/Documentation/translations/zh_CN/video4linux/v4l2-framework.txt
+++ b/Documentation/translations/zh_CN/video4linux/v4l2-framework.txt
@@ -6,7 +6,7 @@ communicating in English you can also ask the Chinese maintainer for
help. Contact the Chinese maintainer if this translation is outdated
or if there is a problem with the translation.
-Maintainer: Mauro Carvalho Chehab <mchehab@infradead.org>
+Maintainer: Mauro Carvalho Chehab <mchehab@kernel.org>
Chinese maintainer: Fu Wei <tekkamanninja@gmail.com>
---------------------------------------------------------------------
Documentation/video4linux/v4l2-framework.txt 的中文翻译
@@ -14,7 +14,7 @@ Documentation/video4linux/v4l2-framework.txt 的中文翻译
如果想评论或更新本文的内容,请直接è”系原文档的维护者。如果你使用英文
交æµæœ‰å›°éš¾çš„è¯ï¼Œä¹Ÿå¯ä»¥å‘中文版维护者求助。如果本翻译更新ä¸åŠæ—¶æˆ–者翻
译存在问题,请è”系中文版维护者。
-英文版维护者: Mauro Carvalho Chehab <mchehab@infradead.org>
+英文版维护者: Mauro Carvalho Chehab <mchehab@kernel.org>
中文版维护者: 傅炜 Fu Wei <tekkamanninja@gmail.com>
中文版翻译者: 傅炜 Fu Wei <tekkamanninja@gmail.com>
中文版校译者: 傅炜 Fu Wei <tekkamanninja@gmail.com>
diff --git a/Documentation/userspace-api/index.rst b/Documentation/userspace-api/index.rst
index 7b2eb1b7d4ca..a3233da7fa88 100644
--- a/Documentation/userspace-api/index.rst
+++ b/Documentation/userspace-api/index.rst
@@ -19,6 +19,7 @@ place where this information is gathered.
no_new_privs
seccomp_filter
unshare
+ spec_ctrl
.. only:: subproject and html
diff --git a/Documentation/userspace-api/spec_ctrl.rst b/Documentation/userspace-api/spec_ctrl.rst
new file mode 100644
index 000000000000..32f3d55c54b7
--- /dev/null
+++ b/Documentation/userspace-api/spec_ctrl.rst
@@ -0,0 +1,94 @@
+===================
+Speculation Control
+===================
+
+Quite some CPUs have speculation-related misfeatures which are in
+fact vulnerabilities causing data leaks in various forms even across
+privilege domains.
+
+The kernel provides mitigation for such vulnerabilities in various
+forms. Some of these mitigations are compile-time configurable and some
+can be supplied on the kernel command line.
+
+There is also a class of mitigations which are very expensive, but they can
+be restricted to a certain set of processes or tasks in controlled
+environments. The mechanism to control these mitigations is via
+:manpage:`prctl(2)`.
+
+There are two prctl options which are related to this:
+
+ * PR_GET_SPECULATION_CTRL
+
+ * PR_SET_SPECULATION_CTRL
+
+PR_GET_SPECULATION_CTRL
+-----------------------
+
+PR_GET_SPECULATION_CTRL returns the state of the speculation misfeature
+which is selected with arg2 of prctl(2). The return value uses bits 0-3 with
+the following meaning:
+
+==== ===================== ===================================================
+Bit Define Description
+==== ===================== ===================================================
+0 PR_SPEC_PRCTL Mitigation can be controlled per task by
+ PR_SET_SPECULATION_CTRL.
+1 PR_SPEC_ENABLE The speculation feature is enabled, mitigation is
+ disabled.
+2 PR_SPEC_DISABLE The speculation feature is disabled, mitigation is
+ enabled.
+3 PR_SPEC_FORCE_DISABLE Same as PR_SPEC_DISABLE, but cannot be undone. A
+ subsequent prctl(..., PR_SPEC_ENABLE) will fail.
+==== ===================== ===================================================
+
+If all bits are 0 the CPU is not affected by the speculation misfeature.
+
+If PR_SPEC_PRCTL is set, then the per-task control of the mitigation is
+available. If not set, prctl(PR_SET_SPECULATION_CTRL) for the speculation
+misfeature will fail.
+
+PR_SET_SPECULATION_CTRL
+-----------------------
+
+PR_SET_SPECULATION_CTRL allows to control the speculation misfeature, which
+is selected by arg2 of :manpage:`prctl(2)` per task. arg3 is used to hand
+in the control value, i.e. either PR_SPEC_ENABLE or PR_SPEC_DISABLE or
+PR_SPEC_FORCE_DISABLE.
+
+Common error codes
+------------------
+======= =================================================================
+Value Meaning
+======= =================================================================
+EINVAL The prctl is not implemented by the architecture or unused
+ prctl(2) arguments are not 0.
+
+ENODEV arg2 is selecting a not supported speculation misfeature.
+======= =================================================================
+
+PR_SET_SPECULATION_CTRL error codes
+-----------------------------------
+======= =================================================================
+Value Meaning
+======= =================================================================
+0 Success
+
+ERANGE arg3 is incorrect, i.e. it's neither PR_SPEC_ENABLE nor
+ PR_SPEC_DISABLE nor PR_SPEC_FORCE_DISABLE.
+
+ENXIO Control of the selected speculation misfeature is not possible.
+ See PR_GET_SPECULATION_CTRL.
+
+EPERM Speculation was disabled with PR_SPEC_FORCE_DISABLE and caller
+ tried to enable it again.
+======= =================================================================
+
+Speculation misfeature controls
+-------------------------------
+- PR_SPEC_STORE_BYPASS: Speculative Store Bypass
+
+ Invocations:
+ * prctl(PR_GET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, 0, 0, 0);
+ * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, PR_SPEC_ENABLE, 0, 0);
+ * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, PR_SPEC_DISABLE, 0, 0);
+ * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, PR_SPEC_FORCE_DISABLE, 0, 0);
diff --git a/Documentation/vfio.txt b/Documentation/vfio.txt
index ef6a5111eaa1..f1a4d3c3ba0b 100644
--- a/Documentation/vfio.txt
+++ b/Documentation/vfio.txt
@@ -252,15 +252,14 @@ into VFIO core. When devices are bound and unbound to the driver,
the driver should call vfio_add_group_dev() and vfio_del_group_dev()
respectively::
- extern int vfio_add_group_dev(struct iommu_group *iommu_group,
- struct device *dev,
+ extern int vfio_add_group_dev(struct device *dev,
const struct vfio_device_ops *ops,
void *device_data);
extern void *vfio_del_group_dev(struct device *dev);
vfio_add_group_dev() indicates to the core to begin tracking the
-specified iommu_group and register the specified dev as owned by
+iommu_group of the specified dev and register the dev as owned by
a VFIO bus driver. The driver provides an ops structure for callbacks
similar to a file operations structure::
diff --git a/Documentation/virtual/kvm/cpuid.txt b/Documentation/virtual/kvm/cpuid.txt
index d4f33eb805dd..ab022dcd0911 100644
--- a/Documentation/virtual/kvm/cpuid.txt
+++ b/Documentation/virtual/kvm/cpuid.txt
@@ -72,8 +72,8 @@ KVM_FEATURE_CLOCKSOURCE_STABLE_BIT || 24 || host will warn if no guest-side
flag || value || meaning
==================================================================================
-KVM_HINTS_DEDICATED || 0 || guest checks this feature bit to
- || || determine if there is vCPU pinning
- || || and there is no vCPU over-commitment,
+KVM_HINTS_REALTIME || 0 || guest checks this feature bit to
+ || || determine that vCPUs are never
+ || || preempted for an unlimited time,
|| || allowing optimizations
----------------------------------------------------------------------------------
diff --git a/Documentation/vm/00-INDEX b/Documentation/vm/00-INDEX
index 0278f2c85efb..f4a4f3e884cf 100644
--- a/Documentation/vm/00-INDEX
+++ b/Documentation/vm/00-INDEX
@@ -1,62 +1,50 @@
00-INDEX
- this file.
-active_mm.txt
+active_mm.rst
- An explanation from Linus about tsk->active_mm vs tsk->mm.
-balance
+balance.rst
- various information on memory balancing.
-cleancache.txt
+cleancache.rst
- Intro to cleancache and page-granularity victim cache.
-frontswap.txt
+frontswap.rst
- Outline frontswap, part of the transcendent memory frontend.
-highmem.txt
+highmem.rst
- Outline of highmem and common issues.
-hmm.txt
+hmm.rst
- Documentation of heterogeneous memory management
-hugetlbpage.txt
- - a brief summary of hugetlbpage support in the Linux kernel.
-hugetlbfs_reserv.txt
+hugetlbfs_reserv.rst
- A brief overview of hugetlbfs reservation design/implementation.
-hwpoison.txt
+hwpoison.rst
- explains what hwpoison is
-idle_page_tracking.txt
- - description of the idle page tracking feature.
-ksm.txt
+ksm.rst
- how to use the Kernel Samepage Merging feature.
-mmu_notifier.txt
+mmu_notifier.rst
- a note about clearing pte/pmd and mmu notifications
-numa
+numa.rst
- information about NUMA specific code in the Linux vm.
-numa_memory_policy.txt
- - documentation of concepts and APIs of the 2.6 memory policy support.
-overcommit-accounting
+overcommit-accounting.rst
- description of the Linux kernels overcommit handling modes.
-page_frags
+page_frags.rst
- description of page fragments allocator
-page_migration
+page_migration.rst
- description of page migration in NUMA systems.
-pagemap.txt
- - pagemap, from the userspace perspective
-page_owner.txt
+page_owner.rst
- tracking about who allocated each page
-remap_file_pages.txt
+remap_file_pages.rst
- a note about remap_file_pages() system call
-slub.txt
+slub.rst
- a short users guide for SLUB.
-soft-dirty.txt
- - short explanation for soft-dirty PTEs
-split_page_table_lock
+split_page_table_lock.rst
- Separate per-table lock to improve scalability of the old page_table_lock.
-swap_numa.txt
+swap_numa.rst
- automatic binding of swap device to numa node
-transhuge.txt
+transhuge.rst
- Transparent Hugepage Support, alternative way of using hugepages.
-unevictable-lru.txt
+unevictable-lru.rst
- Unevictable LRU infrastructure
-userfaultfd.txt
- - description of userfaultfd system call
z3fold.txt
- outline of z3fold allocator for storing compressed pages
-zsmalloc.txt
+zsmalloc.rst
- outline of zsmalloc allocator for storing compressed pages
-zswap.txt
+zswap.rst
- Intro to compressed cache for swap pages
diff --git a/Documentation/vm/active_mm.rst b/Documentation/vm/active_mm.rst
new file mode 100644
index 000000000000..c84471b180f8
--- /dev/null
+++ b/Documentation/vm/active_mm.rst
@@ -0,0 +1,91 @@
+.. _active_mm:
+
+=========
+Active MM
+=========
+
+::
+
+ List: linux-kernel
+ Subject: Re: active_mm
+ From: Linus Torvalds <torvalds () transmeta ! com>
+ Date: 1999-07-30 21:36:24
+
+ Cc'd to linux-kernel, because I don't write explanations all that often,
+ and when I do I feel better about more people reading them.
+
+ On Fri, 30 Jul 1999, David Mosberger wrote:
+ >
+ > Is there a brief description someplace on how "mm" vs. "active_mm" in
+ > the task_struct are supposed to be used? (My apologies if this was
+ > discussed on the mailing lists---I just returned from vacation and
+ > wasn't able to follow linux-kernel for a while).
+
+ Basically, the new setup is:
+
+ - we have "real address spaces" and "anonymous address spaces". The
+ difference is that an anonymous address space doesn't care about the
+ user-level page tables at all, so when we do a context switch into an
+ anonymous address space we just leave the previous address space
+ active.
+
+ The obvious use for a "anonymous address space" is any thread that
+ doesn't need any user mappings - all kernel threads basically fall into
+ this category, but even "real" threads can temporarily say that for
+ some amount of time they are not going to be interested in user space,
+ and that the scheduler might as well try to avoid wasting time on
+ switching the VM state around. Currently only the old-style bdflush
+ sync does that.
+
+ - "tsk->mm" points to the "real address space". For an anonymous process,
+ tsk->mm will be NULL, for the logical reason that an anonymous process
+ really doesn't _have_ a real address space at all.
+
+ - however, we obviously need to keep track of which address space we
+ "stole" for such an anonymous user. For that, we have "tsk->active_mm",
+ which shows what the currently active address space is.
+
+ The rule is that for a process with a real address space (ie tsk->mm is
+ non-NULL) the active_mm obviously always has to be the same as the real
+ one.
+
+ For a anonymous process, tsk->mm == NULL, and tsk->active_mm is the
+ "borrowed" mm while the anonymous process is running. When the
+ anonymous process gets scheduled away, the borrowed address space is
+ returned and cleared.
+
+ To support all that, the "struct mm_struct" now has two counters: a
+ "mm_users" counter that is how many "real address space users" there are,
+ and a "mm_count" counter that is the number of "lazy" users (ie anonymous
+ users) plus one if there are any real users.
+
+ Usually there is at least one real user, but it could be that the real
+ user exited on another CPU while a lazy user was still active, so you do
+ actually get cases where you have a address space that is _only_ used by
+ lazy users. That is often a short-lived state, because once that thread
+ gets scheduled away in favour of a real thread, the "zombie" mm gets
+ released because "mm_users" becomes zero.
+
+ Also, a new rule is that _nobody_ ever has "init_mm" as a real MM any
+ more. "init_mm" should be considered just a "lazy context when no other
+ context is available", and in fact it is mainly used just at bootup when
+ no real VM has yet been created. So code that used to check
+
+ if (current->mm == &init_mm)
+
+ should generally just do
+
+ if (!current->mm)
+
+ instead (which makes more sense anyway - the test is basically one of "do
+ we have a user context", and is generally done by the page fault handler
+ and things like that).
+
+ Anyway, I put a pre-patch-2.3.13-1 on ftp.kernel.org just a moment ago,
+ because it slightly changes the interfaces to accommodate the alpha (who
+ would have thought it, but the alpha actually ends up having one of the
+ ugliest context switch codes - unlike the other architectures where the MM
+ and register state is separate, the alpha PALcode joins the two, and you
+ need to switch both together).
+
+ (From http://marc.info/?l=linux-kernel&m=93337278602211&w=2)
diff --git a/Documentation/vm/active_mm.txt b/Documentation/vm/active_mm.txt
deleted file mode 100644
index dbf45817405f..000000000000
--- a/Documentation/vm/active_mm.txt
+++ /dev/null
@@ -1,83 +0,0 @@
-List: linux-kernel
-Subject: Re: active_mm
-From: Linus Torvalds <torvalds () transmeta ! com>
-Date: 1999-07-30 21:36:24
-
-Cc'd to linux-kernel, because I don't write explanations all that often,
-and when I do I feel better about more people reading them.
-
-On Fri, 30 Jul 1999, David Mosberger wrote:
->
-> Is there a brief description someplace on how "mm" vs. "active_mm" in
-> the task_struct are supposed to be used? (My apologies if this was
-> discussed on the mailing lists---I just returned from vacation and
-> wasn't able to follow linux-kernel for a while).
-
-Basically, the new setup is:
-
- - we have "real address spaces" and "anonymous address spaces". The
- difference is that an anonymous address space doesn't care about the
- user-level page tables at all, so when we do a context switch into an
- anonymous address space we just leave the previous address space
- active.
-
- The obvious use for a "anonymous address space" is any thread that
- doesn't need any user mappings - all kernel threads basically fall into
- this category, but even "real" threads can temporarily say that for
- some amount of time they are not going to be interested in user space,
- and that the scheduler might as well try to avoid wasting time on
- switching the VM state around. Currently only the old-style bdflush
- sync does that.
-
- - "tsk->mm" points to the "real address space". For an anonymous process,
- tsk->mm will be NULL, for the logical reason that an anonymous process
- really doesn't _have_ a real address space at all.
-
- - however, we obviously need to keep track of which address space we
- "stole" for such an anonymous user. For that, we have "tsk->active_mm",
- which shows what the currently active address space is.
-
- The rule is that for a process with a real address space (ie tsk->mm is
- non-NULL) the active_mm obviously always has to be the same as the real
- one.
-
- For a anonymous process, tsk->mm == NULL, and tsk->active_mm is the
- "borrowed" mm while the anonymous process is running. When the
- anonymous process gets scheduled away, the borrowed address space is
- returned and cleared.
-
-To support all that, the "struct mm_struct" now has two counters: a
-"mm_users" counter that is how many "real address space users" there are,
-and a "mm_count" counter that is the number of "lazy" users (ie anonymous
-users) plus one if there are any real users.
-
-Usually there is at least one real user, but it could be that the real
-user exited on another CPU while a lazy user was still active, so you do
-actually get cases where you have a address space that is _only_ used by
-lazy users. That is often a short-lived state, because once that thread
-gets scheduled away in favour of a real thread, the "zombie" mm gets
-released because "mm_users" becomes zero.
-
-Also, a new rule is that _nobody_ ever has "init_mm" as a real MM any
-more. "init_mm" should be considered just a "lazy context when no other
-context is available", and in fact it is mainly used just at bootup when
-no real VM has yet been created. So code that used to check
-
- if (current->mm == &init_mm)
-
-should generally just do
-
- if (!current->mm)
-
-instead (which makes more sense anyway - the test is basically one of "do
-we have a user context", and is generally done by the page fault handler
-and things like that).
-
-Anyway, I put a pre-patch-2.3.13-1 on ftp.kernel.org just a moment ago,
-because it slightly changes the interfaces to accommodate the alpha (who
-would have thought it, but the alpha actually ends up having one of the
-ugliest context switch codes - unlike the other architectures where the MM
-and register state is separate, the alpha PALcode joins the two, and you
-need to switch both together).
-
-(From http://marc.info/?l=linux-kernel&m=93337278602211&w=2)
diff --git a/Documentation/vm/balance b/Documentation/vm/balance.rst
index 964595481af6..6a1fadf3e173 100644
--- a/Documentation/vm/balance
+++ b/Documentation/vm/balance.rst
@@ -1,3 +1,9 @@
+.. _balance:
+
+================
+Memory Balancing
+================
+
Started Jan 2000 by Kanoj Sarcar <kanoj@sgi.com>
Memory balancing is needed for !__GFP_ATOMIC and !__GFP_KSWAPD_RECLAIM as
@@ -62,11 +68,11 @@ for non-sleepable allocations. Second, the HIGHMEM zone is also balanced,
so as to give a fighting chance for replace_with_highmem() to get a
HIGHMEM page, as well as to ensure that HIGHMEM allocations do not
fall back into regular zone. This also makes sure that HIGHMEM pages
-are not leaked (for example, in situations where a HIGHMEM page is in
+are not leaked (for example, in situations where a HIGHMEM page is in
the swapcache but is not being used by anyone)
kswapd also needs to know about the zones it should balance. kswapd is
-primarily needed in a situation where balancing can not be done,
+primarily needed in a situation where balancing can not be done,
probably because all allocation requests are coming from intr context
and all process contexts are sleeping. For 2.3, kswapd does not really
need to balance the highmem zone, since intr context does not request
@@ -89,7 +95,8 @@ pages is below watermark[WMARK_LOW]; in which case zone_wake_kswapd is also set.
(Good) Ideas that I have heard:
+
1. Dynamic experience should influence balancing: number of failed requests
-for a zone can be tracked and fed into the balancing scheme (jalvo@mbay.net)
+ for a zone can be tracked and fed into the balancing scheme (jalvo@mbay.net)
2. Implement a replace_with_highmem()-like replace_with_regular() to preserve
-dma pages. (lkd@tantalophile.demon.co.uk)
+ dma pages. (lkd@tantalophile.demon.co.uk)
diff --git a/Documentation/vm/cleancache.txt b/Documentation/vm/cleancache.rst
index e4b49df7a048..68cba9131c31 100644
--- a/Documentation/vm/cleancache.txt
+++ b/Documentation/vm/cleancache.rst
@@ -1,4 +1,11 @@
-MOTIVATION
+.. _cleancache:
+
+==========
+Cleancache
+==========
+
+Motivation
+==========
Cleancache is a new optional feature provided by the VFS layer that
potentially dramatically increases page cache effectiveness for
@@ -21,9 +28,10 @@ Transcendent memory "drivers" for cleancache are currently implemented
in Xen (using hypervisor memory) and zcache (using in-kernel compressed
memory) and other implementations are in development.
-FAQs are included below.
+:ref:`FAQs <faq>` are included below.
-IMPLEMENTATION OVERVIEW
+Implementation Overview
+=======================
A cleancache "backend" that provides transcendent memory registers itself
to the kernel's cleancache "frontend" by calling cleancache_register_ops,
@@ -80,22 +88,33 @@ different Linux threads are simultaneously putting and invalidating a page
with the same handle, the results are indeterminate. Callers must
lock the page to ensure serial behavior.
-CLEANCACHE PERFORMANCE METRICS
+Cleancache Performance Metrics
+==============================
If properly configured, monitoring of cleancache is done via debugfs in
-the /sys/kernel/debug/cleancache directory. The effectiveness of cleancache
+the `/sys/kernel/debug/cleancache` directory. The effectiveness of cleancache
can be measured (across all filesystems) with:
-succ_gets - number of gets that were successful
-failed_gets - number of gets that failed
-puts - number of puts attempted (all "succeed")
-invalidates - number of invalidates attempted
+``succ_gets``
+ number of gets that were successful
+
+``failed_gets``
+ number of gets that failed
+
+``puts``
+ number of puts attempted (all "succeed")
+
+``invalidates``
+ number of invalidates attempted
A backend implementation may provide additional metrics.
+.. _faq:
+
FAQ
+===
-1) Where's the value? (Andrew Morton)
+* Where's the value? (Andrew Morton)
Cleancache provides a significant performance benefit to many workloads
in many environments with negligible overhead by improving the
@@ -137,8 +156,8 @@ device that stores pages of data in a compressed state. And
the proposed "RAMster" driver shares RAM across multiple physical
systems.
-2) Why does cleancache have its sticky fingers so deep inside the
- filesystems and VFS? (Andrew Morton and Christoph Hellwig)
+* Why does cleancache have its sticky fingers so deep inside the
+ filesystems and VFS? (Andrew Morton and Christoph Hellwig)
The core hooks for cleancache in VFS are in most cases a single line
and the minimum set are placed precisely where needed to maintain
@@ -168,9 +187,9 @@ filesystems in the future.
The total impact of the hooks to existing fs and mm files is only
about 40 lines added (not counting comments and blank lines).
-3) Why not make cleancache asynchronous and batched so it can
- more easily interface with real devices with DMA instead
- of copying each individual page? (Minchan Kim)
+* Why not make cleancache asynchronous and batched so it can more
+ easily interface with real devices with DMA instead of copying each
+ individual page? (Minchan Kim)
The one-page-at-a-time copy semantics simplifies the implementation
on both the frontend and backend and also allows the backend to
@@ -182,8 +201,8 @@ are avoided. While the interface seems odd for a "real device"
or for real kernel-addressable RAM, it makes perfect sense for
transcendent memory.
-4) Why is non-shared cleancache "exclusive"? And where is the
- page "invalidated" after a "get"? (Minchan Kim)
+* Why is non-shared cleancache "exclusive"? And where is the
+ page "invalidated" after a "get"? (Minchan Kim)
The main reason is to free up space in transcendent memory and
to avoid unnecessary cleancache_invalidate calls. If you want inclusive,
@@ -193,7 +212,7 @@ be easily extended to add a "get_no_invalidate" call.
The invalidate is done by the cleancache backend implementation.
-5) What's the performance impact?
+* What's the performance impact?
Performance analysis has been presented at OLS'09 and LCA'10.
Briefly, performance gains can be significant on most workloads,
@@ -206,7 +225,7 @@ single-core systems with slow memory-copy speeds, cleancache
has little value, but in newer multicore machines, especially
consolidated/virtualized machines, it has great value.
-6) How do I add cleancache support for filesystem X? (Boaz Harrash)
+* How do I add cleancache support for filesystem X? (Boaz Harrash)
Filesystems that are well-behaved and conform to certain
restrictions can utilize cleancache simply by making a call to
@@ -217,26 +236,26 @@ not enable the optional cleancache.
Some points for a filesystem to consider:
-- The FS should be block-device-based (e.g. a ram-based FS such
- as tmpfs should not enable cleancache)
-- To ensure coherency/correctness, the FS must ensure that all
- file removal or truncation operations either go through VFS or
- add hooks to do the equivalent cleancache "invalidate" operations
-- To ensure coherency/correctness, either inode numbers must
- be unique across the lifetime of the on-disk file OR the
- FS must provide an "encode_fh" function.
-- The FS must call the VFS superblock alloc and deactivate routines
- or add hooks to do the equivalent cleancache calls done there.
-- To maximize performance, all pages fetched from the FS should
- go through the do_mpag_readpage routine or the FS should add
- hooks to do the equivalent (cf. btrfs)
-- Currently, the FS blocksize must be the same as PAGESIZE. This
- is not an architectural restriction, but no backends currently
- support anything different.
-- A clustered FS should invoke the "shared_init_fs" cleancache
- hook to get best performance for some backends.
-
-7) Why not use the KVA of the inode as the key? (Christoph Hellwig)
+ - The FS should be block-device-based (e.g. a ram-based FS such
+ as tmpfs should not enable cleancache)
+ - To ensure coherency/correctness, the FS must ensure that all
+ file removal or truncation operations either go through VFS or
+ add hooks to do the equivalent cleancache "invalidate" operations
+ - To ensure coherency/correctness, either inode numbers must
+ be unique across the lifetime of the on-disk file OR the
+ FS must provide an "encode_fh" function.
+ - The FS must call the VFS superblock alloc and deactivate routines
+ or add hooks to do the equivalent cleancache calls done there.
+ - To maximize performance, all pages fetched from the FS should
+ go through the do_mpag_readpage routine or the FS should add
+ hooks to do the equivalent (cf. btrfs)
+ - Currently, the FS blocksize must be the same as PAGESIZE. This
+ is not an architectural restriction, but no backends currently
+ support anything different.
+ - A clustered FS should invoke the "shared_init_fs" cleancache
+ hook to get best performance for some backends.
+
+* Why not use the KVA of the inode as the key? (Christoph Hellwig)
If cleancache would use the inode virtual address instead of
inode/filehandle, the pool id could be eliminated. But, this
@@ -251,7 +270,7 @@ of cleancache would be lost because the cache of pages in cleanache
is potentially much larger than the kernel pagecache and is most
useful if the pages survive inode cache removal.
-8) Why is a global variable required?
+* Why is a global variable required?
The cleancache_enabled flag is checked in all of the frequently-used
cleancache hooks. The alternative is a function call to check a static
@@ -262,14 +281,14 @@ global variable allows cleancache to be enabled by default at compile
time, but have insignificant performance impact when cleancache remains
disabled at runtime.
-9) Does cleanache work with KVM?
+* Does cleanache work with KVM?
The memory model of KVM is sufficiently different that a cleancache
backend may have less value for KVM. This remains to be tested,
especially in an overcommitted system.
-10) Does cleancache work in userspace? It sounds useful for
- memory hungry caches like web browsers. (Jamie Lokier)
+* Does cleancache work in userspace? It sounds useful for
+ memory hungry caches like web browsers. (Jamie Lokier)
No plans yet, though we agree it sounds useful, at least for
apps that bypass the page cache (e.g. O_DIRECT).
diff --git a/Documentation/vm/conf.py b/Documentation/vm/conf.py
new file mode 100644
index 000000000000..3b0b601af558
--- /dev/null
+++ b/Documentation/vm/conf.py
@@ -0,0 +1,10 @@
+# -*- coding: utf-8; mode: python -*-
+
+project = "Linux Memory Management Documentation"
+
+tags.add("subproject")
+
+latex_documents = [
+ ('index', 'memory-management.tex', project,
+ 'The kernel development community', 'manual'),
+]
diff --git a/Documentation/vm/frontswap.txt b/Documentation/vm/frontswap.rst
index c71a019be600..1979f430c1c5 100644
--- a/Documentation/vm/frontswap.txt
+++ b/Documentation/vm/frontswap.rst
@@ -1,13 +1,20 @@
+.. _frontswap:
+
+=========
+Frontswap
+=========
+
Frontswap provides a "transcendent memory" interface for swap pages.
In some environments, dramatic performance savings may be obtained because
swapped pages are saved in RAM (or a RAM-like device) instead of a swap disk.
-(Note, frontswap -- and cleancache (merged at 3.0) -- are the "frontends"
+(Note, frontswap -- and :ref:`cleancache` (merged at 3.0) -- are the "frontends"
and the only necessary changes to the core kernel for transcendent memory;
all other supporting code -- the "backends" -- is implemented as drivers.
-See the LWN.net article "Transcendent memory in a nutshell" for a detailed
-overview of frontswap and related kernel parts:
-https://lwn.net/Articles/454795/ )
+See the LWN.net article `Transcendent memory in a nutshell`_
+for a detailed overview of frontswap and related kernel parts)
+
+.. _Transcendent memory in a nutshell: https://lwn.net/Articles/454795/
Frontswap is so named because it can be thought of as the opposite of
a "backing" store for a swap device. The storage is assumed to be
@@ -50,19 +57,27 @@ or the store fails AND the page is invalidated. This ensures stale data may
never be obtained from frontswap.
If properly configured, monitoring of frontswap is done via debugfs in
-the /sys/kernel/debug/frontswap directory. The effectiveness of
+the `/sys/kernel/debug/frontswap` directory. The effectiveness of
frontswap can be measured (across all swap devices) with:
-failed_stores - how many store attempts have failed
-loads - how many loads were attempted (all should succeed)
-succ_stores - how many store attempts have succeeded
-invalidates - how many invalidates were attempted
+``failed_stores``
+ how many store attempts have failed
+
+``loads``
+ how many loads were attempted (all should succeed)
+
+``succ_stores``
+ how many store attempts have succeeded
+
+``invalidates``
+ how many invalidates were attempted
A backend implementation may provide additional metrics.
FAQ
+===
-1) Where's the value?
+* Where's the value?
When a workload starts swapping, performance falls through the floor.
Frontswap significantly increases performance in many such workloads by
@@ -117,8 +132,8 @@ A KVM implementation is underway and has been RFC'ed to lkml. And,
using frontswap, investigation is also underway on the use of NVM as
a memory extension technology.
-2) Sure there may be performance advantages in some situations, but
- what's the space/time overhead of frontswap?
+* Sure there may be performance advantages in some situations, but
+ what's the space/time overhead of frontswap?
If CONFIG_FRONTSWAP is disabled, every frontswap hook compiles into
nothingness and the only overhead is a few extra bytes per swapon'ed
@@ -148,8 +163,8 @@ pressure that can potentially outweigh the other advantages. A
backend, such as zcache, must implement policies to carefully (but
dynamically) manage memory limits to ensure this doesn't happen.
-3) OK, how about a quick overview of what this frontswap patch does
- in terms that a kernel hacker can grok?
+* OK, how about a quick overview of what this frontswap patch does
+ in terms that a kernel hacker can grok?
Let's assume that a frontswap "backend" has registered during
kernel initialization; this registration indicates that this
@@ -188,9 +203,9 @@ and (potentially) a swap device write are replaced by a "frontswap backend
store" and (possibly) a "frontswap backend loads", which are presumably much
faster.
-4) Can't frontswap be configured as a "special" swap device that is
- just higher priority than any real swap device (e.g. like zswap,
- or maybe swap-over-nbd/NFS)?
+* Can't frontswap be configured as a "special" swap device that is
+ just higher priority than any real swap device (e.g. like zswap,
+ or maybe swap-over-nbd/NFS)?
No. First, the existing swap subsystem doesn't allow for any kind of
swap hierarchy. Perhaps it could be rewritten to accommodate a hierarchy,
@@ -240,9 +255,9 @@ installation, frontswap is useless. Swapless portable devices
can still use frontswap but a backend for such devices must configure
some kind of "ghost" swap device and ensure that it is never used.
-5) Why this weird definition about "duplicate stores"? If a page
- has been previously successfully stored, can't it always be
- successfully overwritten?
+* Why this weird definition about "duplicate stores"? If a page
+ has been previously successfully stored, can't it always be
+ successfully overwritten?
Nearly always it can, but no, sometimes it cannot. Consider an example
where data is compressed and the original 4K page has been compressed
@@ -254,7 +269,7 @@ the old data and ensure that it is no longer accessible. Since the
swap subsystem then writes the new data to the read swap device,
this is the correct course of action to ensure coherency.
-6) What is frontswap_shrink for?
+* What is frontswap_shrink for?
When the (non-frontswap) swap subsystem swaps out a page to a real
swap device, that page is only taking up low-value pre-allocated disk
@@ -267,7 +282,7 @@ to "repatriate" pages sent to a remote machine back to the local machine;
this is driven using the frontswap_shrink mechanism when memory pressure
subsides.
-7) Why does the frontswap patch create the new include file swapfile.h?
+* Why does the frontswap patch create the new include file swapfile.h?
The frontswap code depends on some swap-subsystem-internal data
structures that have, over the years, moved back and forth between
diff --git a/Documentation/vm/highmem.txt b/Documentation/vm/highmem.rst
index 4324d24ffacd..0f69a9fec34d 100644
--- a/Documentation/vm/highmem.txt
+++ b/Documentation/vm/highmem.rst
@@ -1,25 +1,14 @@
+.. _highmem:
- ====================
- HIGH MEMORY HANDLING
- ====================
+====================
+High Memory Handling
+====================
By: Peter Zijlstra <a.p.zijlstra@chello.nl>
-Contents:
-
- (*) What is high memory?
-
- (*) Temporary virtual mappings.
-
- (*) Using kmap_atomic.
-
- (*) Cost of temporary mappings.
-
- (*) i386 PAE.
+.. contents:: :local:
-
-====================
-WHAT IS HIGH MEMORY?
+What Is High Memory?
====================
High memory (highmem) is used when the size of physical memory approaches or
@@ -38,7 +27,7 @@ kernel entry/exit. This means the available virtual memory space (4GiB on
i386) has to be divided between user and kernel space.
The traditional split for architectures using this approach is 3:1, 3GiB for
-userspace and the top 1GiB for kernel space:
+userspace and the top 1GiB for kernel space::
+--------+ 0xffffffff
| Kernel |
@@ -58,40 +47,38 @@ and user maps. Some hardware (like some ARMs), however, have limited virtual
space when they use mm context tags.
-==========================
-TEMPORARY VIRTUAL MAPPINGS
+Temporary Virtual Mappings
==========================
The kernel contains several ways of creating temporary mappings:
- (*) vmap(). This can be used to make a long duration mapping of multiple
- physical pages into a contiguous virtual space. It needs global
- synchronization to unmap.
+* vmap(). This can be used to make a long duration mapping of multiple
+ physical pages into a contiguous virtual space. It needs global
+ synchronization to unmap.
- (*) kmap(). This permits a short duration mapping of a single page. It needs
- global synchronization, but is amortized somewhat. It is also prone to
- deadlocks when using in a nested fashion, and so it is not recommended for
- new code.
+* kmap(). This permits a short duration mapping of a single page. It needs
+ global synchronization, but is amortized somewhat. It is also prone to
+ deadlocks when using in a nested fashion, and so it is not recommended for
+ new code.
- (*) kmap_atomic(). This permits a very short duration mapping of a single
- page. Since the mapping is restricted to the CPU that issued it, it
- performs well, but the issuing task is therefore required to stay on that
- CPU until it has finished, lest some other task displace its mappings.
+* kmap_atomic(). This permits a very short duration mapping of a single
+ page. Since the mapping is restricted to the CPU that issued it, it
+ performs well, but the issuing task is therefore required to stay on that
+ CPU until it has finished, lest some other task displace its mappings.
- kmap_atomic() may also be used by interrupt contexts, since it is does not
- sleep and the caller may not sleep until after kunmap_atomic() is called.
+ kmap_atomic() may also be used by interrupt contexts, since it is does not
+ sleep and the caller may not sleep until after kunmap_atomic() is called.
- It may be assumed that k[un]map_atomic() won't fail.
+ It may be assumed that k[un]map_atomic() won't fail.
-=================
-USING KMAP_ATOMIC
+Using kmap_atomic
=================
When and where to use kmap_atomic() is straightforward. It is used when code
wants to access the contents of a page that might be allocated from high memory
(see __GFP_HIGHMEM), for example a page in the pagecache. The API has two
-functions, and they can be used in a manner similar to the following:
+functions, and they can be used in a manner similar to the following::
/* Find the page of interest. */
struct page *page = find_get_page(mapping, offset);
@@ -109,7 +96,7 @@ Note that the kunmap_atomic() call takes the result of the kmap_atomic() call
not the argument.
If you need to map two pages because you want to copy from one page to
-another you need to keep the kmap_atomic calls strictly nested, like:
+another you need to keep the kmap_atomic calls strictly nested, like::
vaddr1 = kmap_atomic(page1);
vaddr2 = kmap_atomic(page2);
@@ -120,8 +107,7 @@ another you need to keep the kmap_atomic calls strictly nested, like:
kunmap_atomic(vaddr1);
-==========================
-COST OF TEMPORARY MAPPINGS
+Cost of Temporary Mappings
==========================
The cost of creating temporary mappings can be quite high. The arch has to
@@ -136,25 +122,24 @@ If CONFIG_MMU is not set, then there can be no temporary mappings and no
highmem. In such a case, the arithmetic approach will also be used.
-========
i386 PAE
========
The i386 arch, under some circumstances, will permit you to stick up to 64GiB
of RAM into your 32-bit machine. This has a number of consequences:
- (*) Linux needs a page-frame structure for each page in the system and the
- pageframes need to live in the permanent mapping, which means:
+* Linux needs a page-frame structure for each page in the system and the
+ pageframes need to live in the permanent mapping, which means:
- (*) you can have 896M/sizeof(struct page) page-frames at most; with struct
- page being 32-bytes that would end up being something in the order of 112G
- worth of pages; the kernel, however, needs to store more than just
- page-frames in that memory...
+* you can have 896M/sizeof(struct page) page-frames at most; with struct
+ page being 32-bytes that would end up being something in the order of 112G
+ worth of pages; the kernel, however, needs to store more than just
+ page-frames in that memory...
- (*) PAE makes your page tables larger - which slows the system down as more
- data has to be accessed to traverse in TLB fills and the like. One
- advantage is that PAE has more PTE bits and can provide advanced features
- like NX and PAT.
+* PAE makes your page tables larger - which slows the system down as more
+ data has to be accessed to traverse in TLB fills and the like. One
+ advantage is that PAE has more PTE bits and can provide advanced features
+ like NX and PAT.
The general recommendation is that you don't use more than 8GiB on a 32-bit
machine - although more might work for you and your workload, you're pretty
diff --git a/Documentation/vm/hmm.txt b/Documentation/vm/hmm.rst
index 2d1d6f69e91b..cdf3911582c8 100644
--- a/Documentation/vm/hmm.txt
+++ b/Documentation/vm/hmm.rst
@@ -1,4 +1,8 @@
+.. hmm:
+
+=====================================
Heterogeneous Memory Management (HMM)
+=====================================
Provide infrastructure and helpers to integrate non-conventional memory (device
memory like GPU on board memory) into regular kernel path, with the cornerstone
@@ -6,10 +10,10 @@ of this being specialized struct page for such memory (see sections 5 to 7 of
this document).
HMM also provides optional helpers for SVM (Share Virtual Memory), i.e.,
-allowing a device to transparently access program address coherently with the
-CPU meaning that any valid pointer on the CPU is also a valid pointer for the
-device. This is becoming mandatory to simplify the use of advanced hetero-
-geneous computing where GPU, DSP, or FPGA are used to perform various
+allowing a device to transparently access program address coherently with
+the CPU meaning that any valid pointer on the CPU is also a valid pointer
+for the device. This is becoming mandatory to simplify the use of advanced
+heterogeneous computing where GPU, DSP, or FPGA are used to perform various
computations on behalf of a process.
This document is divided as follows: in the first section I expose the problems
@@ -21,19 +25,10 @@ fifth section deals with how device memory is represented inside the kernel.
Finally, the last section presents a new migration helper that allows lever-
aging the device DMA engine.
+.. contents:: :local:
-1) Problems of using a device specific memory allocator:
-2) I/O bus, device memory characteristics
-3) Shared address space and migration
-4) Address space mirroring implementation and API
-5) Represent and manage device memory from core kernel point of view
-6) Migration to and from device memory
-7) Memory cgroup (memcg) and rss accounting
-
-
--------------------------------------------------------------------------------
-
-1) Problems of using a device specific memory allocator:
+Problems of using a device specific memory allocator
+====================================================
Devices with a large amount of on board memory (several gigabytes) like GPUs
have historically managed their memory through dedicated driver specific APIs.
@@ -77,9 +72,8 @@ are only do-able with a shared address space. It is also more reasonable to use
a shared address space for all other patterns.
--------------------------------------------------------------------------------
-
-2) I/O bus, device memory characteristics
+I/O bus, device memory characteristics
+======================================
I/O buses cripple shared address spaces due to a few limitations. Most I/O
buses only allow basic memory access from device to main memory; even cache
@@ -109,9 +103,8 @@ access any memory but we must also permit any memory to be migrated to device
memory while device is using it (blocking CPU access while it happens).
--------------------------------------------------------------------------------
-
-3) Shared address space and migration
+Shared address space and migration
+==================================
HMM intends to provide two main features. First one is to share the address
space by duplicating the CPU page table in the device page table so the same
@@ -148,23 +141,23 @@ ages device memory by migrating the part of the data set that is actively being
used by the device.
--------------------------------------------------------------------------------
-
-4) Address space mirroring implementation and API
+Address space mirroring implementation and API
+==============================================
Address space mirroring's main objective is to allow duplication of a range of
CPU page table into a device page table; HMM helps keep both synchronized. A
device driver that wants to mirror a process address space must start with the
-registration of an hmm_mirror struct:
+registration of an hmm_mirror struct::
int hmm_mirror_register(struct hmm_mirror *mirror,
struct mm_struct *mm);
int hmm_mirror_register_locked(struct hmm_mirror *mirror,
struct mm_struct *mm);
+
The locked variant is to be used when the driver is already holding mmap_sem
of the mm in write mode. The mirror struct has a set of callbacks that are used
-to propagate CPU page tables:
+to propagate CPU page tables::
struct hmm_mirror_ops {
/* sync_cpu_device_pagetables() - synchronize page tables
@@ -193,10 +186,10 @@ The device driver must perform the update action to the range (mark range
read only, or fully unmap, ...). The device must be done with the update before
the driver callback returns.
-
When the device driver wants to populate a range of virtual addresses, it can
-use either:
- int hmm_vma_get_pfns(struct vm_area_struct *vma,
+use either::
+
+ int hmm_vma_get_pfns(struct vm_area_struct *vma,
struct hmm_range *range,
unsigned long start,
unsigned long end,
@@ -221,7 +214,7 @@ provides a set of flags to help the driver identify special CPU page table
entries.
Locking with the update() callback is the most important aspect the driver must
-respect in order to keep things properly synchronized. The usage pattern is:
+respect in order to keep things properly synchronized. The usage pattern is::
int driver_populate_range(...)
{
@@ -262,9 +255,8 @@ report commands as executed is serialized (there is no point in doing this
concurrently).
--------------------------------------------------------------------------------
-
-5) Represent and manage device memory from core kernel point of view
+Represent and manage device memory from core kernel point of view
+=================================================================
Several different designs were tried to support device memory. First one used
a device specific data structure to keep information about migrated memory and
@@ -280,14 +272,14 @@ unaware of the difference. We only need to make sure that no one ever tries to
map those pages from the CPU side.
HMM provides a set of helpers to register and hotplug device memory as a new
-region needing a struct page. This is offered through a very simple API:
+region needing a struct page. This is offered through a very simple API::
struct hmm_devmem *hmm_devmem_add(const struct hmm_devmem_ops *ops,
struct device *device,
unsigned long size);
void hmm_devmem_remove(struct hmm_devmem *devmem);
-The hmm_devmem_ops is where most of the important things are:
+The hmm_devmem_ops is where most of the important things are::
struct hmm_devmem_ops {
void (*free)(struct hmm_devmem *devmem, struct page *page);
@@ -306,13 +298,12 @@ which it cannot do. This second callback must trigger a migration back to
system memory.
--------------------------------------------------------------------------------
-
-6) Migration to and from device memory
+Migration to and from device memory
+===================================
Because the CPU cannot access device memory, migration must use the device DMA
engine to perform copy from and to device memory. For this we need a new
-migration helper:
+migration helper::
int migrate_vma(const struct migrate_vma_ops *ops,
struct vm_area_struct *vma,
@@ -331,7 +322,7 @@ migration might be for a range of addresses the device is actively accessing.
The migrate_vma_ops struct defines two callbacks. First one (alloc_and_copy())
controls destination memory allocation and copy operation. Second one is there
-to allow the device driver to perform cleanup operations after migration.
+to allow the device driver to perform cleanup operations after migration::
struct migrate_vma_ops {
void (*alloc_and_copy)(struct vm_area_struct *vma,
@@ -365,9 +356,8 @@ bandwidth but this is considered as a rare event and a price that we are
willing to pay to keep all the code simpler.
--------------------------------------------------------------------------------
-
-7) Memory cgroup (memcg) and rss accounting
+Memory cgroup (memcg) and rss accounting
+========================================
For now device memory is accounted as any regular page in rss counters (either
anonymous if device page is used for anonymous, file if device page is used for
diff --git a/Documentation/vm/hugetlbfs_reserv.txt b/Documentation/vm/hugetlbfs_reserv.rst
index 9aca09a76bed..9d200762114f 100644
--- a/Documentation/vm/hugetlbfs_reserv.txt
+++ b/Documentation/vm/hugetlbfs_reserv.rst
@@ -1,6 +1,13 @@
-Hugetlbfs Reservation Overview
-------------------------------
-Huge pages as described at 'Documentation/vm/hugetlbpage.txt' are typically
+.. _hugetlbfs_reserve:
+
+=====================
+Hugetlbfs Reservation
+=====================
+
+Overview
+========
+
+Huge pages as described at :ref:`hugetlbpage` are typically
preallocated for application use. These huge pages are instantiated in a
task's address space at page fault time if the VMA indicates huge pages are
to be used. If no huge page exists at page fault time, the task is sent
@@ -17,47 +24,55 @@ describe how huge page reserve processing is done in the v4.10 kernel.
Audience
---------
+========
This description is primarily targeted at kernel developers who are modifying
hugetlbfs code.
The Data Structures
--------------------
+===================
+
resv_huge_pages
This is a global (per-hstate) count of reserved huge pages. Reserved
huge pages are only available to the task which reserved them.
Therefore, the number of huge pages generally available is computed
- as (free_huge_pages - resv_huge_pages).
+ as (``free_huge_pages - resv_huge_pages``).
Reserve Map
- A reserve map is described by the structure:
- struct resv_map {
- struct kref refs;
- spinlock_t lock;
- struct list_head regions;
- long adds_in_progress;
- struct list_head region_cache;
- long region_cache_count;
- };
+ A reserve map is described by the structure::
+
+ struct resv_map {
+ struct kref refs;
+ spinlock_t lock;
+ struct list_head regions;
+ long adds_in_progress;
+ struct list_head region_cache;
+ long region_cache_count;
+ };
+
There is one reserve map for each huge page mapping in the system.
The regions list within the resv_map describes the regions within
- the mapping. A region is described as:
- struct file_region {
- struct list_head link;
- long from;
- long to;
- };
+ the mapping. A region is described as::
+
+ struct file_region {
+ struct list_head link;
+ long from;
+ long to;
+ };
+
The 'from' and 'to' fields of the file region structure are huge page
indices into the mapping. Depending on the type of mapping, a
region in the reserv_map may indicate reservations exist for the
range, or reservations do not exist.
Flags for MAP_PRIVATE Reservations
These are stored in the bottom bits of the reservation map pointer.
- #define HPAGE_RESV_OWNER (1UL << 0) Indicates this task is the
- owner of the reservations associated with the mapping.
- #define HPAGE_RESV_UNMAPPED (1UL << 1) Indicates task originally
- mapping this range (and creating reserves) has unmapped a
- page from this task (the child) due to a failed COW.
+
+ ``#define HPAGE_RESV_OWNER (1UL << 0)``
+ Indicates this task is the owner of the reservations
+ associated with the mapping.
+ ``#define HPAGE_RESV_UNMAPPED (1UL << 1)``
+ Indicates task originally mapping this range (and creating
+ reserves) has unmapped a page from this task (the child)
+ due to a failed COW.
Page Flags
The PagePrivate page flag is used to indicate that a huge page
reservation must be restored when the huge page is freed. More
@@ -65,12 +80,14 @@ Page Flags
Reservation Map Location (Private or Shared)
---------------------------------------------
+============================================
+
A huge page mapping or segment is either private or shared. If private,
it is typically only available to a single address space (task). If shared,
it can be mapped into multiple address spaces (tasks). The location and
semantics of the reservation map is significantly different for two types
of mappings. Location differences are:
+
- For private mappings, the reservation map hangs off the the VMA structure.
Specifically, vma->vm_private_data. This reserve map is created at the
time the mapping (mmap(MAP_PRIVATE)) is created.
@@ -82,15 +99,15 @@ of mappings. Location differences are:
Creating Reservations
----------------------
+=====================
Reservations are created when a huge page backed shared memory segment is
created (shmget(SHM_HUGETLB)) or a mapping is created via mmap(MAP_HUGETLB).
-These operations result in a call to the routine hugetlb_reserve_pages()
+These operations result in a call to the routine hugetlb_reserve_pages()::
-int hugetlb_reserve_pages(struct inode *inode,
- long from, long to,
- struct vm_area_struct *vma,
- vm_flags_t vm_flags)
+ int hugetlb_reserve_pages(struct inode *inode,
+ long from, long to,
+ struct vm_area_struct *vma,
+ vm_flags_t vm_flags)
The first thing hugetlb_reserve_pages() does is check for the NORESERVE
flag was specified in either the shmget() or mmap() call. If NORESERVE
@@ -105,6 +122,7 @@ the 'from' and 'to' arguments have been adjusted by this offset.
One of the big differences between PRIVATE and SHARED mappings is the way
in which reservations are represented in the reservation map.
+
- For shared mappings, an entry in the reservation map indicates a reservation
exists or did exist for the corresponding page. As reservations are
consumed, the reservation map is not modified.
@@ -121,12 +139,13 @@ to indicate this VMA owns the reservations.
The reservation map is consulted to determine how many huge page reservations
are needed for the current mapping/segment. For private mappings, this is
always the value (to - from). However, for shared mappings it is possible that some reservations may already exist within the range (to - from). See the
-section "Reservation Map Modifications" for details on how this is accomplished.
+section :ref:`Reservation Map Modifications <resv_map_modifications>`
+for details on how this is accomplished.
The mapping may be associated with a subpool. If so, the subpool is consulted
to ensure there is sufficient space for the mapping. It is possible that the
subpool has set aside reservations that can be used for the mapping. See the
-section "Subpool Reservations" for more details.
+section :ref:`Subpool Reservations <sub_pool_resv>` for more details.
After consulting the reservation map and subpool, the number of needed new
reservations is known. The routine hugetlb_acct_memory() is called to check
@@ -135,9 +154,11 @@ calls into routines that potentially allocate and adjust surplus page counts.
However, within those routines the code is simply checking to ensure there
are enough free huge pages to accommodate the reservation. If there are,
the global reservation count resv_huge_pages is adjusted something like the
-following.
+following::
+
if (resv_needed <= (resv_huge_pages - free_huge_pages))
resv_huge_pages += resv_needed;
+
Note that the global lock hugetlb_lock is held when checking and adjusting
these counters.
@@ -152,14 +173,18 @@ If hugetlb_reserve_pages() was successful, the global reservation count and
reservation map associated with the mapping will be modified as required to
ensure reservations exist for the range 'from' - 'to'.
+.. _consume_resv:
Consuming Reservations/Allocating a Huge Page
----------------------------------------------
+=============================================
+
Reservations are consumed when huge pages associated with the reservations
are allocated and instantiated in the corresponding mapping. The allocation
-is performed within the routine alloc_huge_page().
-struct page *alloc_huge_page(struct vm_area_struct *vma,
- unsigned long addr, int avoid_reserve)
+is performed within the routine alloc_huge_page()::
+
+ struct page *alloc_huge_page(struct vm_area_struct *vma,
+ unsigned long addr, int avoid_reserve)
+
alloc_huge_page is passed a VMA pointer and a virtual address, so it can
consult the reservation map to determine if a reservation exists. In addition,
alloc_huge_page takes the argument avoid_reserve which indicates reserves
@@ -170,8 +195,9 @@ page are being allocated.
The helper routine vma_needs_reservation() is called to determine if a
reservation exists for the address within the mapping(vma). See the section
-"Reservation Map Helper Routines" for detailed information on what this
-routine does. The value returned from vma_needs_reservation() is generally
+:ref:`Reservation Map Helper Routines <resv_map_helpers>` for detailed
+information on what this routine does.
+The value returned from vma_needs_reservation() is generally
0 or 1. 0 if a reservation exists for the address, 1 if no reservation exists.
If a reservation does not exist, and there is a subpool associated with the
mapping the subpool is consulted to determine if it contains reservations.
@@ -180,21 +206,25 @@ However, in every case the avoid_reserve argument overrides the use of
a reservation for the allocation. After determining whether a reservation
exists and can be used for the allocation, the routine dequeue_huge_page_vma()
is called. This routine takes two arguments related to reservations:
+
- avoid_reserve, this is the same value/argument passed to alloc_huge_page()
- chg, even though this argument is of type long only the values 0 or 1 are
passed to dequeue_huge_page_vma. If the value is 0, it indicates a
reservation exists (see the section "Memory Policy and Reservations" for
possible issues). If the value is 1, it indicates a reservation does not
exist and the page must be taken from the global free pool if possible.
+
The free lists associated with the memory policy of the VMA are searched for
a free page. If a page is found, the value free_huge_pages is decremented
when the page is removed from the free list. If there was a reservation
-associated with the page, the following adjustments are made:
+associated with the page, the following adjustments are made::
+
SetPagePrivate(page); /* Indicates allocating this page consumed
* a reservation, and if an error is
* encountered such that the page must be
* freed, the reservation will be restored. */
resv_huge_pages--; /* Decrement the global reservation count */
+
Note, if no huge page can be found that satisfies the VMA's memory policy
an attempt will be made to allocate one using the buddy allocator. This
brings up the issue of surplus huge pages and overcommit which is beyond
@@ -222,12 +252,14 @@ mapping. In such cases, the reservation count and subpool free page count
will be off by one. This rare condition can be identified by comparing the
return value from vma_needs_reservation and vma_commit_reservation. If such
a race is detected, the subpool and global reserve counts are adjusted to
-compensate. See the section "Reservation Map Helper Routines" for more
+compensate. See the section
+:ref:`Reservation Map Helper Routines <resv_map_helpers>` for more
information on these routines.
Instantiate Huge Pages
-----------------------
+======================
+
After huge page allocation, the page is typically added to the page tables
of the allocating task. Before this, pages in a shared mapping are added
to the page cache and pages in private mappings are added to an anonymous
@@ -237,7 +269,8 @@ to the global reservation count (resv_huge_pages).
Freeing Huge Pages
-------------------
+==================
+
Huge page freeing is performed by the routine free_huge_page(). This routine
is the destructor for hugetlbfs compound pages. As a result, it is only
passed a pointer to the page struct. When a huge page is freed, reservation
@@ -247,7 +280,8 @@ on an error path where a global reserve count must be restored.
The page->private field points to any subpool associated with the page.
If the PagePrivate flag is set, it indicates the global reserve count should
-be adjusted (see the section "Consuming Reservations/Allocating a Huge Page"
+be adjusted (see the section
+:ref:`Consuming Reservations/Allocating a Huge Page <consume_resv>`
for information on how these are set).
The routine first calls hugepage_subpool_put_pages() for the page. If this
@@ -259,9 +293,11 @@ Therefore, the global resv_huge_pages counter is incremented in this case.
If the PagePrivate flag was set in the page, the global resv_huge_pages counter
will always be incremented.
+.. _sub_pool_resv:
Subpool Reservations
---------------------
+====================
+
There is a struct hstate associated with each huge page size. The hstate
tracks all huge pages of the specified size. A subpool represents a subset
of pages within a hstate that is associated with a mounted hugetlbfs
@@ -295,7 +331,8 @@ the global pools.
COW and Reservations
---------------------
+====================
+
Since shared mappings all point to and use the same underlying pages, the
biggest reservation concern for COW is private mappings. In this case,
two tasks can be pointing at the same previously allocated page. One task
@@ -326,30 +363,36 @@ faults on a non-present page. But, the original owner of the
mapping/reservation will behave as expected.
+.. _resv_map_modifications:
+
Reservation Map Modifications
------------------------------
+=============================
+
The following low level routines are used to make modifications to a
reservation map. Typically, these routines are not called directly. Rather,
a reservation map helper routine is called which calls one of these low level
routines. These low level routines are fairly well documented in the source
-code (mm/hugetlb.c). These routines are:
-long region_chg(struct resv_map *resv, long f, long t);
-long region_add(struct resv_map *resv, long f, long t);
-void region_abort(struct resv_map *resv, long f, long t);
-long region_count(struct resv_map *resv, long f, long t);
+code (mm/hugetlb.c). These routines are::
+
+ long region_chg(struct resv_map *resv, long f, long t);
+ long region_add(struct resv_map *resv, long f, long t);
+ void region_abort(struct resv_map *resv, long f, long t);
+ long region_count(struct resv_map *resv, long f, long t);
Operations on the reservation map typically involve two operations:
+
1) region_chg() is called to examine the reserve map and determine how
many pages in the specified range [f, t) are NOT currently represented.
The calling code performs global checks and allocations to determine if
there are enough huge pages for the operation to succeed.
-2a) If the operation can succeed, region_add() is called to actually modify
- the reservation map for the same range [f, t) previously passed to
- region_chg().
-2b) If the operation can not succeed, region_abort is called for the same range
- [f, t) to abort the operation.
+2)
+ a) If the operation can succeed, region_add() is called to actually modify
+ the reservation map for the same range [f, t) previously passed to
+ region_chg().
+ b) If the operation can not succeed, region_abort is called for the same
+ range [f, t) to abort the operation.
Note that this is a two step process where region_add() and region_abort()
are guaranteed to succeed after a prior call to region_chg() for the same
@@ -371,6 +414,7 @@ and make the appropriate adjustments.
The routine region_del() is called to remove regions from a reservation map.
It is typically called in the following situations:
+
- When a file in the hugetlbfs filesystem is being removed, the inode will
be released and the reservation map freed. Before freeing the reservation
map, all the individual file_region structures must be freed. In this case
@@ -384,6 +428,7 @@ It is typically called in the following situations:
removed, region_del() is called to remove the corresponding entry from the
reservation map. In this case, region_del is passed the range
[page_idx, page_idx + 1).
+
In every case, region_del() will return the number of pages removed from the
reservation map. In VERY rare cases, region_del() can fail. This can only
happen in the hole punch case where it has to split an existing file_region
@@ -403,9 +448,11 @@ outstanding (outstanding = (end - start) - region_count(resv, start, end)).
Since the mapping is going away, the subpool and global reservation counts
are decremented by the number of outstanding reservations.
+.. _resv_map_helpers:
Reservation Map Helper Routines
--------------------------------
+===============================
+
Several helper routines exist to query and modify the reservation maps.
These routines are only interested with reservations for a specific huge
page, so they just pass in an address instead of a range. In addition,
@@ -414,32 +461,40 @@ or shared) and the location of the reservation map (inode or VMA) can be
determined. These routines simply call the underlying routines described
in the section "Reservation Map Modifications". However, they do take into
account the 'opposite' meaning of reservation map entries for private and
-shared mappings and hide this detail from the caller.
+shared mappings and hide this detail from the caller::
+
+ long vma_needs_reservation(struct hstate *h,
+ struct vm_area_struct *vma,
+ unsigned long addr)
-long vma_needs_reservation(struct hstate *h,
- struct vm_area_struct *vma, unsigned long addr)
This routine calls region_chg() for the specified page. If no reservation
-exists, 1 is returned. If a reservation exists, 0 is returned.
+exists, 1 is returned. If a reservation exists, 0 is returned::
+
+ long vma_commit_reservation(struct hstate *h,
+ struct vm_area_struct *vma,
+ unsigned long addr)
-long vma_commit_reservation(struct hstate *h,
- struct vm_area_struct *vma, unsigned long addr)
This calls region_add() for the specified page. As in the case of region_chg
and region_add, this routine is to be called after a previous call to
vma_needs_reservation. It will add a reservation entry for the page. It
returns 1 if the reservation was added and 0 if not. The return value should
be compared with the return value of the previous call to
vma_needs_reservation. An unexpected difference indicates the reservation
-map was modified between calls.
+map was modified between calls::
+
+ void vma_end_reservation(struct hstate *h,
+ struct vm_area_struct *vma,
+ unsigned long addr)
-void vma_end_reservation(struct hstate *h,
- struct vm_area_struct *vma, unsigned long addr)
This calls region_abort() for the specified page. As in the case of region_chg
and region_abort, this routine is to be called after a previous call to
vma_needs_reservation. It will abort/end the in progress reservation add
-operation.
+operation::
+
+ long vma_add_reservation(struct hstate *h,
+ struct vm_area_struct *vma,
+ unsigned long addr)
-long vma_add_reservation(struct hstate *h,
- struct vm_area_struct *vma, unsigned long addr)
This is a special wrapper routine to help facilitate reservation cleanup
on error paths. It is only called from the routine restore_reserve_on_error().
This routine is used in conjunction with vma_needs_reservation in an attempt
@@ -453,8 +508,10 @@ be done on error paths.
Reservation Cleanup in Error Paths
-----------------------------------
-As mentioned in the section "Reservation Map Helper Routines", reservation
+==================================
+
+As mentioned in the section
+:ref:`Reservation Map Helper Routines <resv_map_helpers>`, reservation
map modifications are performed in two steps. First vma_needs_reservation
is called before a page is allocated. If the allocation is successful,
then vma_commit_reservation is called. If not, vma_end_reservation is called.
@@ -494,13 +551,14 @@ so that a reservation will not be leaked when the huge page is freed.
Reservations and Memory Policy
-------------------------------
+==============================
Per-node huge page lists existed in struct hstate when git was first used
to manage Linux code. The concept of reservations was added some time later.
When reservations were added, no attempt was made to take memory policy
into account. While cpusets are not exactly the same as memory policy, this
comment in hugetlb_acct_memory sums up the interaction between reservations
-and cpusets/memory policy.
+and cpusets/memory policy::
+
/*
* When cpuset is configured, it breaks the strict hugetlb page
* reservation as the accounting is done on a global variable. Such
@@ -525,5 +583,13 @@ of cpusets or memory policy there is no guarantee that huge pages will be
available on the required nodes. This is true even if there are a sufficient
number of global reservations.
+Hugetlbfs regression testing
+============================
+
+The most complete set of hugetlb tests are in the libhugetlbfs repository.
+If you modify any hugetlb related code, use the libhugetlbfs test suite
+to check for regressions. In addition, if you add any new hugetlb
+functionality, please add appropriate tests to libhugetlbfs.
+--
Mike Kravetz, 7 April 2017
diff --git a/Documentation/vm/hwpoison.txt b/Documentation/vm/hwpoison.rst
index e912d7eee769..09bd24a92784 100644
--- a/Documentation/vm/hwpoison.txt
+++ b/Documentation/vm/hwpoison.rst
@@ -1,7 +1,14 @@
+.. hwpoison:
+
+========
+hwpoison
+========
+
What is hwpoison?
+=================
Upcoming Intel CPUs have support for recovering from some memory errors
-(``MCA recovery''). This requires the OS to declare a page "poisoned",
+(``MCA recovery``). This requires the OS to declare a page "poisoned",
kill the processes associated with it and avoid using it in the future.
This patchkit implements the necessary infrastructure in the VM.
@@ -46,9 +53,10 @@ address. This in theory allows other applications to handle
memory failures too. The expection is that near all applications
won't do that, but some very specialized ones might.
----
+Failure recovery modes
+======================
-There are two (actually three) modi memory failure recovery can be in:
+There are two (actually three) modes memory failure recovery can be in:
vm.memory_failure_recovery sysctl set to zero:
All memory failures cause a panic. Do not attempt recovery.
@@ -67,9 +75,8 @@ late kill
This is best for memory error unaware applications and default
Note some pages are always handled as late kill.
----
-
-User control:
+User control
+============
vm.memory_failure_recovery
See sysctl.txt
@@ -79,11 +86,19 @@ vm.memory_failure_early_kill
PR_MCE_KILL
Set early/late kill mode/revert to system default
- arg1: PR_MCE_KILL_CLEAR: Revert to system default
- arg1: PR_MCE_KILL_SET: arg2 defines thread specific mode
- PR_MCE_KILL_EARLY: Early kill
- PR_MCE_KILL_LATE: Late kill
- PR_MCE_KILL_DEFAULT: Use system global default
+
+ arg1: PR_MCE_KILL_CLEAR:
+ Revert to system default
+ arg1: PR_MCE_KILL_SET:
+ arg2 defines thread specific mode
+
+ PR_MCE_KILL_EARLY:
+ Early kill
+ PR_MCE_KILL_LATE:
+ Late kill
+ PR_MCE_KILL_DEFAULT
+ Use system global default
+
Note that if you want to have a dedicated thread which handles
the SIGBUS(BUS_MCEERR_AO) on behalf of the process, you should
call prctl(PR_MCE_KILL_EARLY) on the designated thread. Otherwise,
@@ -92,77 +107,64 @@ PR_MCE_KILL
PR_MCE_KILL_GET
return current mode
+Testing
+=======
----
-
-Testing:
-
-madvise(MADV_HWPOISON, ....)
- (as root)
- Poison a page in the process for testing
-
+* madvise(MADV_HWPOISON, ....) (as root) - Poison a page in the
+ process for testing
-hwpoison-inject module through debugfs
+* hwpoison-inject module through debugfs ``/sys/kernel/debug/hwpoison/``
-/sys/kernel/debug/hwpoison/
+ corrupt-pfn
+ Inject hwpoison fault at PFN echoed into this file. This does
+ some early filtering to avoid corrupted unintended pages in test suites.
-corrupt-pfn
+ unpoison-pfn
+ Software-unpoison page at PFN echoed into this file. This way
+ a page can be reused again. This only works for Linux
+ injected failures, not for real memory failures.
-Inject hwpoison fault at PFN echoed into this file. This does
-some early filtering to avoid corrupted unintended pages in test suites.
+ Note these injection interfaces are not stable and might change between
+ kernel versions
-unpoison-pfn
+ corrupt-filter-dev-major, corrupt-filter-dev-minor
+ Only handle memory failures to pages associated with the file
+ system defined by block device major/minor. -1U is the
+ wildcard value. This should be only used for testing with
+ artificial injection.
-Software-unpoison page at PFN echoed into this file. This
-way a page can be reused again.
-This only works for Linux injected failures, not for real
-memory failures.
+ corrupt-filter-memcg
+ Limit injection to pages owned by memgroup. Specified by inode
+ number of the memcg.
-Note these injection interfaces are not stable and might change between
-kernel versions
+ Example::
-corrupt-filter-dev-major
-corrupt-filter-dev-minor
+ mkdir /sys/fs/cgroup/mem/hwpoison
-Only handle memory failures to pages associated with the file system defined
-by block device major/minor. -1U is the wildcard value.
-This should be only used for testing with artificial injection.
+ usemem -m 100 -s 1000 &
+ echo `jobs -p` > /sys/fs/cgroup/mem/hwpoison/tasks
-corrupt-filter-memcg
+ memcg_ino=$(ls -id /sys/fs/cgroup/mem/hwpoison | cut -f1 -d' ')
+ echo $memcg_ino > /debug/hwpoison/corrupt-filter-memcg
-Limit injection to pages owned by memgroup. Specified by inode number
-of the memcg.
+ page-types -p `pidof init` --hwpoison # shall do nothing
+ page-types -p `pidof usemem` --hwpoison # poison its pages
-Example:
- mkdir /sys/fs/cgroup/mem/hwpoison
+ corrupt-filter-flags-mask, corrupt-filter-flags-value
+ When specified, only poison pages if ((page_flags & mask) ==
+ value). This allows stress testing of many kinds of
+ pages. The page_flags are the same as in /proc/kpageflags. The
+ flag bits are defined in include/linux/kernel-page-flags.h and
+ documented in Documentation/admin-guide/mm/pagemap.rst
- usemem -m 100 -s 1000 &
- echo `jobs -p` > /sys/fs/cgroup/mem/hwpoison/tasks
+* Architecture specific MCE injector
- memcg_ino=$(ls -id /sys/fs/cgroup/mem/hwpoison | cut -f1 -d' ')
- echo $memcg_ino > /debug/hwpoison/corrupt-filter-memcg
+ x86 has mce-inject, mce-test
- page-types -p `pidof init` --hwpoison # shall do nothing
- page-types -p `pidof usemem` --hwpoison # poison its pages
+ Some portable hwpoison test programs in mce-test, see below.
-corrupt-filter-flags-mask
-corrupt-filter-flags-value
-
-When specified, only poison pages if ((page_flags & mask) == value).
-This allows stress testing of many kinds of pages. The page_flags
-are the same as in /proc/kpageflags. The flag bits are defined in
-include/linux/kernel-page-flags.h and documented in
-Documentation/vm/pagemap.txt
-
-Architecture specific MCE injector
-
-x86 has mce-inject, mce-test
-
-Some portable hwpoison test programs in mce-test, see blow.
-
----
-
-References:
+References
+==========
http://halobates.de/mce-lc09-2.pdf
Overview presentation from LinuxCon 09
@@ -174,14 +176,11 @@ git://git.kernel.org/pub/scm/utils/cpu/mce/mce-inject.git
x86 specific injector
----
-
-Limitations:
-
+Limitations
+===========
- Not all page types are supported and never will. Most kernel internal
-objects cannot be recovered, only LRU pages for now.
+ objects cannot be recovered, only LRU pages for now.
- Right now hugepage support is missing.
---
Andi Kleen, Oct 2009
-
diff --git a/Documentation/vm/index.rst b/Documentation/vm/index.rst
new file mode 100644
index 000000000000..c4ded22197ca
--- /dev/null
+++ b/Documentation/vm/index.rst
@@ -0,0 +1,50 @@
+=====================================
+Linux Memory Management Documentation
+=====================================
+
+This is a collection of documents about Linux memory management (mm) subsystem.
+
+User guides for MM features
+===========================
+
+The following documents provide guides for controlling and tuning
+various features of the Linux memory management
+
+.. toctree::
+ :maxdepth: 1
+
+ swap_numa
+ zswap
+
+Kernel developers MM documentation
+==================================
+
+The below documents describe MM internals with different level of
+details ranging from notes and mailing list responses to elaborate
+descriptions of data structures and algorithms.
+
+.. toctree::
+ :maxdepth: 1
+
+ active_mm
+ balance
+ cleancache
+ frontswap
+ highmem
+ hmm
+ hwpoison
+ hugetlbfs_reserv
+ ksm
+ mmu_notifier
+ numa
+ overcommit-accounting
+ page_migration
+ page_frags
+ page_owner
+ remap_file_pages
+ slub
+ split_page_table_lock
+ transhuge
+ unevictable-lru
+ z3fold
+ zsmalloc
diff --git a/Documentation/vm/ksm.rst b/Documentation/vm/ksm.rst
new file mode 100644
index 000000000000..d32016d9be2c
--- /dev/null
+++ b/Documentation/vm/ksm.rst
@@ -0,0 +1,87 @@
+.. _ksm:
+
+=======================
+Kernel Samepage Merging
+=======================
+
+KSM is a memory-saving de-duplication feature, enabled by CONFIG_KSM=y,
+added to the Linux kernel in 2.6.32. See ``mm/ksm.c`` for its implementation,
+and http://lwn.net/Articles/306704/ and http://lwn.net/Articles/330589/
+
+The userspace interface of KSM is described in :ref:`Documentation/admin-guide/mm/ksm.rst <admin_guide_ksm>`
+
+Design
+======
+
+Overview
+--------
+
+.. kernel-doc:: mm/ksm.c
+ :DOC: Overview
+
+Reverse mapping
+---------------
+KSM maintains reverse mapping information for KSM pages in the stable
+tree.
+
+If a KSM page is shared between less than ``max_page_sharing`` VMAs,
+the node of the stable tree that represents such KSM page points to a
+list of :c:type:`struct rmap_item` and the ``page->mapping`` of the
+KSM page points to the stable tree node.
+
+When the sharing passes this threshold, KSM adds a second dimension to
+the stable tree. The tree node becomes a "chain" that links one or
+more "dups". Each "dup" keeps reverse mapping information for a KSM
+page with ``page->mapping`` pointing to that "dup".
+
+Every "chain" and all "dups" linked into a "chain" enforce the
+invariant that they represent the same write protected memory content,
+even if each "dup" will be pointed by a different KSM page copy of
+that content.
+
+This way the stable tree lookup computational complexity is unaffected
+if compared to an unlimited list of reverse mappings. It is still
+enforced that there cannot be KSM page content duplicates in the
+stable tree itself.
+
+The deduplication limit enforced by ``max_page_sharing`` is required
+to avoid the virtual memory rmap lists to grow too large. The rmap
+walk has O(N) complexity where N is the number of rmap_items
+(i.e. virtual mappings) that are sharing the page, which is in turn
+capped by ``max_page_sharing``. So this effectively spreads the linear
+O(N) computational complexity from rmap walk context over different
+KSM pages. The ksmd walk over the stable_node "chains" is also O(N),
+but N is the number of stable_node "dups", not the number of
+rmap_items, so it has not a significant impact on ksmd performance. In
+practice the best stable_node "dup" candidate will be kept and found
+at the head of the "dups" list.
+
+High values of ``max_page_sharing`` result in faster memory merging
+(because there will be fewer stable_node dups queued into the
+stable_node chain->hlist to check for pruning) and higher
+deduplication factor at the expense of slower worst case for rmap
+walks for any KSM page which can happen during swapping, compaction,
+NUMA balancing and page migration.
+
+The ``stable_node_dups/stable_node_chains`` ratio is also affected by the
+``max_page_sharing`` tunable, and an high ratio may indicate fragmentation
+in the stable_node dups, which could be solved by introducing
+fragmentation algorithms in ksmd which would refile rmap_items from
+one stable_node dup to another stable_node dup, in order to free up
+stable_node "dups" with few rmap_items in them, but that may increase
+the ksmd CPU usage and possibly slowdown the readonly computations on
+the KSM pages of the applications.
+
+The whole list of stable_node "dups" linked in the stable_node
+"chains" is scanned periodically in order to prune stale stable_nodes.
+The frequency of such scans is defined by
+``stable_node_chains_prune_millisecs`` sysfs tunable.
+
+Reference
+---------
+.. kernel-doc:: mm/ksm.c
+ :functions: mm_slot ksm_scan stable_node rmap_item
+
+--
+Izik Eidus,
+Hugh Dickins, 17 Nov 2009
diff --git a/Documentation/vm/ksm.txt b/Documentation/vm/ksm.txt
deleted file mode 100644
index 6686bd267dc9..000000000000
--- a/Documentation/vm/ksm.txt
+++ /dev/null
@@ -1,178 +0,0 @@
-How to use the Kernel Samepage Merging feature
-----------------------------------------------
-
-KSM is a memory-saving de-duplication feature, enabled by CONFIG_KSM=y,
-added to the Linux kernel in 2.6.32. See mm/ksm.c for its implementation,
-and http://lwn.net/Articles/306704/ and http://lwn.net/Articles/330589/
-
-The KSM daemon ksmd periodically scans those areas of user memory which
-have been registered with it, looking for pages of identical content which
-can be replaced by a single write-protected page (which is automatically
-copied if a process later wants to update its content).
-
-KSM was originally developed for use with KVM (where it was known as
-Kernel Shared Memory), to fit more virtual machines into physical memory,
-by sharing the data common between them. But it can be useful to any
-application which generates many instances of the same data.
-
-KSM only merges anonymous (private) pages, never pagecache (file) pages.
-KSM's merged pages were originally locked into kernel memory, but can now
-be swapped out just like other user pages (but sharing is broken when they
-are swapped back in: ksmd must rediscover their identity and merge again).
-
-KSM only operates on those areas of address space which an application
-has advised to be likely candidates for merging, by using the madvise(2)
-system call: int madvise(addr, length, MADV_MERGEABLE).
-
-The app may call int madvise(addr, length, MADV_UNMERGEABLE) to cancel
-that advice and restore unshared pages: whereupon KSM unmerges whatever
-it merged in that range. Note: this unmerging call may suddenly require
-more memory than is available - possibly failing with EAGAIN, but more
-probably arousing the Out-Of-Memory killer.
-
-If KSM is not configured into the running kernel, madvise MADV_MERGEABLE
-and MADV_UNMERGEABLE simply fail with EINVAL. If the running kernel was
-built with CONFIG_KSM=y, those calls will normally succeed: even if the
-the KSM daemon is not currently running, MADV_MERGEABLE still registers
-the range for whenever the KSM daemon is started; even if the range
-cannot contain any pages which KSM could actually merge; even if
-MADV_UNMERGEABLE is applied to a range which was never MADV_MERGEABLE.
-
-If a region of memory must be split into at least one new MADV_MERGEABLE
-or MADV_UNMERGEABLE region, the madvise may return ENOMEM if the process
-will exceed vm.max_map_count (see Documentation/sysctl/vm.txt).
-
-Like other madvise calls, they are intended for use on mapped areas of
-the user address space: they will report ENOMEM if the specified range
-includes unmapped gaps (though working on the intervening mapped areas),
-and might fail with EAGAIN if not enough memory for internal structures.
-
-Applications should be considerate in their use of MADV_MERGEABLE,
-restricting its use to areas likely to benefit. KSM's scans may use a lot
-of processing power: some installations will disable KSM for that reason.
-
-The KSM daemon is controlled by sysfs files in /sys/kernel/mm/ksm/,
-readable by all but writable only by root:
-
-pages_to_scan - how many present pages to scan before ksmd goes to sleep
- e.g. "echo 100 > /sys/kernel/mm/ksm/pages_to_scan"
- Default: 100 (chosen for demonstration purposes)
-
-sleep_millisecs - how many milliseconds ksmd should sleep before next scan
- e.g. "echo 20 > /sys/kernel/mm/ksm/sleep_millisecs"
- Default: 20 (chosen for demonstration purposes)
-
-merge_across_nodes - specifies if pages from different numa nodes can be merged.
- When set to 0, ksm merges only pages which physically
- reside in the memory area of same NUMA node. That brings
- lower latency to access of shared pages. Systems with more
- nodes, at significant NUMA distances, are likely to benefit
- from the lower latency of setting 0. Smaller systems, which
- need to minimize memory usage, are likely to benefit from
- the greater sharing of setting 1 (default). You may wish to
- compare how your system performs under each setting, before
- deciding on which to use. merge_across_nodes setting can be
- changed only when there are no ksm shared pages in system:
- set run 2 to unmerge pages first, then to 1 after changing
- merge_across_nodes, to remerge according to the new setting.
- Default: 1 (merging across nodes as in earlier releases)
-
-run - set 0 to stop ksmd from running but keep merged pages,
- set 1 to run ksmd e.g. "echo 1 > /sys/kernel/mm/ksm/run",
- set 2 to stop ksmd and unmerge all pages currently merged,
- but leave mergeable areas registered for next run
- Default: 0 (must be changed to 1 to activate KSM,
- except if CONFIG_SYSFS is disabled)
-
-use_zero_pages - specifies whether empty pages (i.e. allocated pages
- that only contain zeroes) should be treated specially.
- When set to 1, empty pages are merged with the kernel
- zero page(s) instead of with each other as it would
- happen normally. This can improve the performance on
- architectures with coloured zero pages, depending on
- the workload. Care should be taken when enabling this
- setting, as it can potentially degrade the performance
- of KSM for some workloads, for example if the checksums
- of pages candidate for merging match the checksum of
- an empty page. This setting can be changed at any time,
- it is only effective for pages merged after the change.
- Default: 0 (normal KSM behaviour as in earlier releases)
-
-max_page_sharing - Maximum sharing allowed for each KSM page. This
- enforces a deduplication limit to avoid the virtual
- memory rmap lists to grow too large. The minimum
- value is 2 as a newly created KSM page will have at
- least two sharers. The rmap walk has O(N)
- complexity where N is the number of rmap_items
- (i.e. virtual mappings) that are sharing the page,
- which is in turn capped by max_page_sharing. So
- this effectively spread the the linear O(N)
- computational complexity from rmap walk context
- over different KSM pages. The ksmd walk over the
- stable_node "chains" is also O(N), but N is the
- number of stable_node "dups", not the number of
- rmap_items, so it has not a significant impact on
- ksmd performance. In practice the best stable_node
- "dup" candidate will be kept and found at the head
- of the "dups" list. The higher this value the
- faster KSM will merge the memory (because there
- will be fewer stable_node dups queued into the
- stable_node chain->hlist to check for pruning) and
- the higher the deduplication factor will be, but
- the slowest the worst case rmap walk could be for
- any given KSM page. Slowing down the rmap_walk
- means there will be higher latency for certain
- virtual memory operations happening during
- swapping, compaction, NUMA balancing and page
- migration, in turn decreasing responsiveness for
- the caller of those virtual memory operations. The
- scheduler latency of other tasks not involved with
- the VM operations doing the rmap walk is not
- affected by this parameter as the rmap walks are
- always schedule friendly themselves.
-
-stable_node_chains_prune_millisecs - How frequently to walk the whole
- list of stable_node "dups" linked in the
- stable_node "chains" in order to prune stale
- stable_nodes. Smaller milllisecs values will free
- up the KSM metadata with lower latency, but they
- will make ksmd use more CPU during the scan. This
- only applies to the stable_node chains so it's a
- noop if not a single KSM page hit the
- max_page_sharing yet (there would be no stable_node
- chains in such case).
-
-The effectiveness of KSM and MADV_MERGEABLE is shown in /sys/kernel/mm/ksm/:
-
-pages_shared - how many shared pages are being used
-pages_sharing - how many more sites are sharing them i.e. how much saved
-pages_unshared - how many pages unique but repeatedly checked for merging
-pages_volatile - how many pages changing too fast to be placed in a tree
-full_scans - how many times all mergeable areas have been scanned
-
-stable_node_chains - number of stable node chains allocated, this is
- effectively the number of KSM pages that hit the
- max_page_sharing limit
-stable_node_dups - number of stable node dups queued into the
- stable_node chains
-
-A high ratio of pages_sharing to pages_shared indicates good sharing, but
-a high ratio of pages_unshared to pages_sharing indicates wasted effort.
-pages_volatile embraces several different kinds of activity, but a high
-proportion there would also indicate poor use of madvise MADV_MERGEABLE.
-
-The maximum possible page_sharing/page_shared ratio is limited by the
-max_page_sharing tunable. To increase the ratio max_page_sharing must
-be increased accordingly.
-
-The stable_node_dups/stable_node_chains ratio is also affected by the
-max_page_sharing tunable, and an high ratio may indicate fragmentation
-in the stable_node dups, which could be solved by introducing
-fragmentation algorithms in ksmd which would refile rmap_items from
-one stable_node dup to another stable_node dup, in order to freeup
-stable_node "dups" with few rmap_items in them, but that may increase
-the ksmd CPU usage and possibly slowdown the readonly computations on
-the KSM pages of the applications.
-
-Izik Eidus,
-Hugh Dickins, 17 Nov 2009
diff --git a/Documentation/vm/mmu_notifier.rst b/Documentation/vm/mmu_notifier.rst
new file mode 100644
index 000000000000..47baa1cf28c5
--- /dev/null
+++ b/Documentation/vm/mmu_notifier.rst
@@ -0,0 +1,99 @@
+.. _mmu_notifier:
+
+When do you need to notify inside page table lock ?
+===================================================
+
+When clearing a pte/pmd we are given a choice to notify the event through
+(notify version of \*_clear_flush call mmu_notifier_invalidate_range) under
+the page table lock. But that notification is not necessary in all cases.
+
+For secondary TLB (non CPU TLB) like IOMMU TLB or device TLB (when device use
+thing like ATS/PASID to get the IOMMU to walk the CPU page table to access a
+process virtual address space). There is only 2 cases when you need to notify
+those secondary TLB while holding page table lock when clearing a pte/pmd:
+
+ A) page backing address is free before mmu_notifier_invalidate_range_end()
+ B) a page table entry is updated to point to a new page (COW, write fault
+ on zero page, __replace_page(), ...)
+
+Case A is obvious you do not want to take the risk for the device to write to
+a page that might now be used by some completely different task.
+
+Case B is more subtle. For correctness it requires the following sequence to
+happen:
+
+ - take page table lock
+ - clear page table entry and notify ([pmd/pte]p_huge_clear_flush_notify())
+ - set page table entry to point to new page
+
+If clearing the page table entry is not followed by a notify before setting
+the new pte/pmd value then you can break memory model like C11 or C++11 for
+the device.
+
+Consider the following scenario (device use a feature similar to ATS/PASID):
+
+Two address addrA and addrB such that \|addrA - addrB\| >= PAGE_SIZE we assume
+they are write protected for COW (other case of B apply too).
+
+::
+
+ [Time N] --------------------------------------------------------------------
+ CPU-thread-0 {try to write to addrA}
+ CPU-thread-1 {try to write to addrB}
+ CPU-thread-2 {}
+ CPU-thread-3 {}
+ DEV-thread-0 {read addrA and populate device TLB}
+ DEV-thread-2 {read addrB and populate device TLB}
+ [Time N+1] ------------------------------------------------------------------
+ CPU-thread-0 {COW_step0: {mmu_notifier_invalidate_range_start(addrA)}}
+ CPU-thread-1 {COW_step0: {mmu_notifier_invalidate_range_start(addrB)}}
+ CPU-thread-2 {}
+ CPU-thread-3 {}
+ DEV-thread-0 {}
+ DEV-thread-2 {}
+ [Time N+2] ------------------------------------------------------------------
+ CPU-thread-0 {COW_step1: {update page table to point to new page for addrA}}
+ CPU-thread-1 {COW_step1: {update page table to point to new page for addrB}}
+ CPU-thread-2 {}
+ CPU-thread-3 {}
+ DEV-thread-0 {}
+ DEV-thread-2 {}
+ [Time N+3] ------------------------------------------------------------------
+ CPU-thread-0 {preempted}
+ CPU-thread-1 {preempted}
+ CPU-thread-2 {write to addrA which is a write to new page}
+ CPU-thread-3 {}
+ DEV-thread-0 {}
+ DEV-thread-2 {}
+ [Time N+3] ------------------------------------------------------------------
+ CPU-thread-0 {preempted}
+ CPU-thread-1 {preempted}
+ CPU-thread-2 {}
+ CPU-thread-3 {write to addrB which is a write to new page}
+ DEV-thread-0 {}
+ DEV-thread-2 {}
+ [Time N+4] ------------------------------------------------------------------
+ CPU-thread-0 {preempted}
+ CPU-thread-1 {COW_step3: {mmu_notifier_invalidate_range_end(addrB)}}
+ CPU-thread-2 {}
+ CPU-thread-3 {}
+ DEV-thread-0 {}
+ DEV-thread-2 {}
+ [Time N+5] ------------------------------------------------------------------
+ CPU-thread-0 {preempted}
+ CPU-thread-1 {}
+ CPU-thread-2 {}
+ CPU-thread-3 {}
+ DEV-thread-0 {read addrA from old page}
+ DEV-thread-2 {read addrB from new page}
+
+So here because at time N+2 the clear page table entry was not pair with a
+notification to invalidate the secondary TLB, the device see the new value for
+addrB before seing the new value for addrA. This break total memory ordering
+for the device.
+
+When changing a pte to write protect or to point to a new write protected page
+with same content (KSM) it is fine to delay the mmu_notifier_invalidate_range
+call to mmu_notifier_invalidate_range_end() outside the page table lock. This
+is true even if the thread doing the page table update is preempted right after
+releasing page table lock but before call mmu_notifier_invalidate_range_end().
diff --git a/Documentation/vm/mmu_notifier.txt b/Documentation/vm/mmu_notifier.txt
deleted file mode 100644
index 23b462566bb7..000000000000
--- a/Documentation/vm/mmu_notifier.txt
+++ /dev/null
@@ -1,93 +0,0 @@
-When do you need to notify inside page table lock ?
-
-When clearing a pte/pmd we are given a choice to notify the event through
-(notify version of *_clear_flush call mmu_notifier_invalidate_range) under
-the page table lock. But that notification is not necessary in all cases.
-
-For secondary TLB (non CPU TLB) like IOMMU TLB or device TLB (when device use
-thing like ATS/PASID to get the IOMMU to walk the CPU page table to access a
-process virtual address space). There is only 2 cases when you need to notify
-those secondary TLB while holding page table lock when clearing a pte/pmd:
-
- A) page backing address is free before mmu_notifier_invalidate_range_end()
- B) a page table entry is updated to point to a new page (COW, write fault
- on zero page, __replace_page(), ...)
-
-Case A is obvious you do not want to take the risk for the device to write to
-a page that might now be used by some completely different task.
-
-Case B is more subtle. For correctness it requires the following sequence to
-happen:
- - take page table lock
- - clear page table entry and notify ([pmd/pte]p_huge_clear_flush_notify())
- - set page table entry to point to new page
-
-If clearing the page table entry is not followed by a notify before setting
-the new pte/pmd value then you can break memory model like C11 or C++11 for
-the device.
-
-Consider the following scenario (device use a feature similar to ATS/PASID):
-
-Two address addrA and addrB such that |addrA - addrB| >= PAGE_SIZE we assume
-they are write protected for COW (other case of B apply too).
-
-[Time N] --------------------------------------------------------------------
-CPU-thread-0 {try to write to addrA}
-CPU-thread-1 {try to write to addrB}
-CPU-thread-2 {}
-CPU-thread-3 {}
-DEV-thread-0 {read addrA and populate device TLB}
-DEV-thread-2 {read addrB and populate device TLB}
-[Time N+1] ------------------------------------------------------------------
-CPU-thread-0 {COW_step0: {mmu_notifier_invalidate_range_start(addrA)}}
-CPU-thread-1 {COW_step0: {mmu_notifier_invalidate_range_start(addrB)}}
-CPU-thread-2 {}
-CPU-thread-3 {}
-DEV-thread-0 {}
-DEV-thread-2 {}
-[Time N+2] ------------------------------------------------------------------
-CPU-thread-0 {COW_step1: {update page table to point to new page for addrA}}
-CPU-thread-1 {COW_step1: {update page table to point to new page for addrB}}
-CPU-thread-2 {}
-CPU-thread-3 {}
-DEV-thread-0 {}
-DEV-thread-2 {}
-[Time N+3] ------------------------------------------------------------------
-CPU-thread-0 {preempted}
-CPU-thread-1 {preempted}
-CPU-thread-2 {write to addrA which is a write to new page}
-CPU-thread-3 {}
-DEV-thread-0 {}
-DEV-thread-2 {}
-[Time N+3] ------------------------------------------------------------------
-CPU-thread-0 {preempted}
-CPU-thread-1 {preempted}
-CPU-thread-2 {}
-CPU-thread-3 {write to addrB which is a write to new page}
-DEV-thread-0 {}
-DEV-thread-2 {}
-[Time N+4] ------------------------------------------------------------------
-CPU-thread-0 {preempted}
-CPU-thread-1 {COW_step3: {mmu_notifier_invalidate_range_end(addrB)}}
-CPU-thread-2 {}
-CPU-thread-3 {}
-DEV-thread-0 {}
-DEV-thread-2 {}
-[Time N+5] ------------------------------------------------------------------
-CPU-thread-0 {preempted}
-CPU-thread-1 {}
-CPU-thread-2 {}
-CPU-thread-3 {}
-DEV-thread-0 {read addrA from old page}
-DEV-thread-2 {read addrB from new page}
-
-So here because at time N+2 the clear page table entry was not pair with a
-notification to invalidate the secondary TLB, the device see the new value for
-addrB before seing the new value for addrA. This break total memory ordering
-for the device.
-
-When changing a pte to write protect or to point to a new write protected page
-with same content (KSM) it is fine to delay the mmu_notifier_invalidate_range
-call to mmu_notifier_invalidate_range_end() outside the page table lock. This
-is true even if the thread doing the page table update is preempted right after
-releasing page table lock but before call mmu_notifier_invalidate_range_end().
diff --git a/Documentation/vm/numa b/Documentation/vm/numa.rst
index a31b85b9bb88..185d8a568168 100644
--- a/Documentation/vm/numa
+++ b/Documentation/vm/numa.rst
@@ -1,6 +1,10 @@
+.. _numa:
+
Started Nov 1999 by Kanoj Sarcar <kanoj@sgi.com>
+=============
What is NUMA?
+=============
This question can be answered from a couple of perspectives: the
hardware view and the Linux software view.
@@ -106,7 +110,7 @@ to improve NUMA locality using various CPU affinity command line interfaces,
such as taskset(1) and numactl(1), and program interfaces such as
sched_setaffinity(2). Further, one can modify the kernel's default local
allocation behavior using Linux NUMA memory policy.
-[see Documentation/vm/numa_memory_policy.txt.]
+[see Documentation/admin-guide/mm/numa_memory_policy.rst.]
System administrators can restrict the CPUs and nodes' memories that a non-
privileged user can specify in the scheduling or NUMA commands and functions
diff --git a/Documentation/vm/numa_memory_policy.txt b/Documentation/vm/numa_memory_policy.txt
deleted file mode 100644
index 622b927816e7..000000000000
--- a/Documentation/vm/numa_memory_policy.txt
+++ /dev/null
@@ -1,452 +0,0 @@
-
-What is Linux Memory Policy?
-
-In the Linux kernel, "memory policy" determines from which node the kernel will
-allocate memory in a NUMA system or in an emulated NUMA system. Linux has
-supported platforms with Non-Uniform Memory Access architectures since 2.4.?.
-The current memory policy support was added to Linux 2.6 around May 2004. This
-document attempts to describe the concepts and APIs of the 2.6 memory policy
-support.
-
-Memory policies should not be confused with cpusets
-(Documentation/cgroup-v1/cpusets.txt)
-which is an administrative mechanism for restricting the nodes from which
-memory may be allocated by a set of processes. Memory policies are a
-programming interface that a NUMA-aware application can take advantage of. When
-both cpusets and policies are applied to a task, the restrictions of the cpuset
-takes priority. See "MEMORY POLICIES AND CPUSETS" below for more details.
-
-MEMORY POLICY CONCEPTS
-
-Scope of Memory Policies
-
-The Linux kernel supports _scopes_ of memory policy, described here from
-most general to most specific:
-
- System Default Policy: this policy is "hard coded" into the kernel. It
- is the policy that governs all page allocations that aren't controlled
- by one of the more specific policy scopes discussed below. When the
- system is "up and running", the system default policy will use "local
- allocation" described below. However, during boot up, the system
- default policy will be set to interleave allocations across all nodes
- with "sufficient" memory, so as not to overload the initial boot node
- with boot-time allocations.
-
- Task/Process Policy: this is an optional, per-task policy. When defined
- for a specific task, this policy controls all page allocations made by or
- on behalf of the task that aren't controlled by a more specific scope.
- If a task does not define a task policy, then all page allocations that
- would have been controlled by the task policy "fall back" to the System
- Default Policy.
-
- The task policy applies to the entire address space of a task. Thus,
- it is inheritable, and indeed is inherited, across both fork()
- [clone() w/o the CLONE_VM flag] and exec*(). This allows a parent task
- to establish the task policy for a child task exec()'d from an
- executable image that has no awareness of memory policy. See the
- MEMORY POLICY APIS section, below, for an overview of the system call
- that a task may use to set/change its task/process policy.
-
- In a multi-threaded task, task policies apply only to the thread
- [Linux kernel task] that installs the policy and any threads
- subsequently created by that thread. Any sibling threads existing
- at the time a new task policy is installed retain their current
- policy.
-
- A task policy applies only to pages allocated after the policy is
- installed. Any pages already faulted in by the task when the task
- changes its task policy remain where they were allocated based on
- the policy at the time they were allocated.
-
- VMA Policy: A "VMA" or "Virtual Memory Area" refers to a range of a task's
- virtual address space. A task may define a specific policy for a range
- of its virtual address space. See the MEMORY POLICIES APIS section,
- below, for an overview of the mbind() system call used to set a VMA
- policy.
-
- A VMA policy will govern the allocation of pages that back this region of
- the address space. Any regions of the task's address space that don't
- have an explicit VMA policy will fall back to the task policy, which may
- itself fall back to the System Default Policy.
-
- VMA policies have a few complicating details:
-
- VMA policy applies ONLY to anonymous pages. These include pages
- allocated for anonymous segments, such as the task stack and heap, and
- any regions of the address space mmap()ed with the MAP_ANONYMOUS flag.
- If a VMA policy is applied to a file mapping, it will be ignored if
- the mapping used the MAP_SHARED flag. If the file mapping used the
- MAP_PRIVATE flag, the VMA policy will only be applied when an
- anonymous page is allocated on an attempt to write to the mapping--
- i.e., at Copy-On-Write.
-
- VMA policies are shared between all tasks that share a virtual address
- space--a.k.a. threads--independent of when the policy is installed; and
- they are inherited across fork(). However, because VMA policies refer
- to a specific region of a task's address space, and because the address
- space is discarded and recreated on exec*(), VMA policies are NOT
- inheritable across exec(). Thus, only NUMA-aware applications may
- use VMA policies.
-
- A task may install a new VMA policy on a sub-range of a previously
- mmap()ed region. When this happens, Linux splits the existing virtual
- memory area into 2 or 3 VMAs, each with it's own policy.
-
- By default, VMA policy applies only to pages allocated after the policy
- is installed. Any pages already faulted into the VMA range remain
- where they were allocated based on the policy at the time they were
- allocated. However, since 2.6.16, Linux supports page migration via
- the mbind() system call, so that page contents can be moved to match
- a newly installed policy.
-
- Shared Policy: Conceptually, shared policies apply to "memory objects"
- mapped shared into one or more tasks' distinct address spaces. An
- application installs a shared policies the same way as VMA policies--using
- the mbind() system call specifying a range of virtual addresses that map
- the shared object. However, unlike VMA policies, which can be considered
- to be an attribute of a range of a task's address space, shared policies
- apply directly to the shared object. Thus, all tasks that attach to the
- object share the policy, and all pages allocated for the shared object,
- by any task, will obey the shared policy.
-
- As of 2.6.22, only shared memory segments, created by shmget() or
- mmap(MAP_ANONYMOUS|MAP_SHARED), support shared policy. When shared
- policy support was added to Linux, the associated data structures were
- added to hugetlbfs shmem segments. At the time, hugetlbfs did not
- support allocation at fault time--a.k.a lazy allocation--so hugetlbfs
- shmem segments were never "hooked up" to the shared policy support.
- Although hugetlbfs segments now support lazy allocation, their support
- for shared policy has not been completed.
-
- As mentioned above [re: VMA policies], allocations of page cache
- pages for regular files mmap()ed with MAP_SHARED ignore any VMA
- policy installed on the virtual address range backed by the shared
- file mapping. Rather, shared page cache pages, including pages backing
- private mappings that have not yet been written by the task, follow
- task policy, if any, else System Default Policy.
-
- The shared policy infrastructure supports different policies on subset
- ranges of the shared object. However, Linux still splits the VMA of
- the task that installs the policy for each range of distinct policy.
- Thus, different tasks that attach to a shared memory segment can have
- different VMA configurations mapping that one shared object. This
- can be seen by examining the /proc/<pid>/numa_maps of tasks sharing
- a shared memory region, when one task has installed shared policy on
- one or more ranges of the region.
-
-Components of Memory Policies
-
- A Linux memory policy consists of a "mode", optional mode flags, and an
- optional set of nodes. The mode determines the behavior of the policy,
- the optional mode flags determine the behavior of the mode, and the
- optional set of nodes can be viewed as the arguments to the policy
- behavior.
-
- Internally, memory policies are implemented by a reference counted
- structure, struct mempolicy. Details of this structure will be discussed
- in context, below, as required to explain the behavior.
-
- Linux memory policy supports the following 4 behavioral modes:
-
- Default Mode--MPOL_DEFAULT: This mode is only used in the memory
- policy APIs. Internally, MPOL_DEFAULT is converted to the NULL
- memory policy in all policy scopes. Any existing non-default policy
- will simply be removed when MPOL_DEFAULT is specified. As a result,
- MPOL_DEFAULT means "fall back to the next most specific policy scope."
-
- For example, a NULL or default task policy will fall back to the
- system default policy. A NULL or default vma policy will fall
- back to the task policy.
-
- When specified in one of the memory policy APIs, the Default mode
- does not use the optional set of nodes.
-
- It is an error for the set of nodes specified for this policy to
- be non-empty.
-
- MPOL_BIND: This mode specifies that memory must come from the
- set of nodes specified by the policy. Memory will be allocated from
- the node in the set with sufficient free memory that is closest to
- the node where the allocation takes place.
-
- MPOL_PREFERRED: This mode specifies that the allocation should be
- attempted from the single node specified in the policy. If that
- allocation fails, the kernel will search other nodes, in order of
- increasing distance from the preferred node based on information
- provided by the platform firmware.
-
- Internally, the Preferred policy uses a single node--the
- preferred_node member of struct mempolicy. When the internal
- mode flag MPOL_F_LOCAL is set, the preferred_node is ignored and
- the policy is interpreted as local allocation. "Local" allocation
- policy can be viewed as a Preferred policy that starts at the node
- containing the cpu where the allocation takes place.
-
- It is possible for the user to specify that local allocation is
- always preferred by passing an empty nodemask with this mode.
- If an empty nodemask is passed, the policy cannot use the
- MPOL_F_STATIC_NODES or MPOL_F_RELATIVE_NODES flags described
- below.
-
- MPOL_INTERLEAVED: This mode specifies that page allocations be
- interleaved, on a page granularity, across the nodes specified in
- the policy. This mode also behaves slightly differently, based on
- the context where it is used:
-
- For allocation of anonymous pages and shared memory pages,
- Interleave mode indexes the set of nodes specified by the policy
- using the page offset of the faulting address into the segment
- [VMA] containing the address modulo the number of nodes specified
- by the policy. It then attempts to allocate a page, starting at
- the selected node, as if the node had been specified by a Preferred
- policy or had been selected by a local allocation. That is,
- allocation will follow the per node zonelist.
-
- For allocation of page cache pages, Interleave mode indexes the set
- of nodes specified by the policy using a node counter maintained
- per task. This counter wraps around to the lowest specified node
- after it reaches the highest specified node. This will tend to
- spread the pages out over the nodes specified by the policy based
- on the order in which they are allocated, rather than based on any
- page offset into an address range or file. During system boot up,
- the temporary interleaved system default policy works in this
- mode.
-
- Linux memory policy supports the following optional mode flags:
-
- MPOL_F_STATIC_NODES: This flag specifies that the nodemask passed by
- the user should not be remapped if the task or VMA's set of allowed
- nodes changes after the memory policy has been defined.
-
- Without this flag, anytime a mempolicy is rebound because of a
- change in the set of allowed nodes, the node (Preferred) or
- nodemask (Bind, Interleave) is remapped to the new set of
- allowed nodes. This may result in nodes being used that were
- previously undesired.
-
- With this flag, if the user-specified nodes overlap with the
- nodes allowed by the task's cpuset, then the memory policy is
- applied to their intersection. If the two sets of nodes do not
- overlap, the Default policy is used.
-
- For example, consider a task that is attached to a cpuset with
- mems 1-3 that sets an Interleave policy over the same set. If
- the cpuset's mems change to 3-5, the Interleave will now occur
- over nodes 3, 4, and 5. With this flag, however, since only node
- 3 is allowed from the user's nodemask, the "interleave" only
- occurs over that node. If no nodes from the user's nodemask are
- now allowed, the Default behavior is used.
-
- MPOL_F_STATIC_NODES cannot be combined with the
- MPOL_F_RELATIVE_NODES flag. It also cannot be used for
- MPOL_PREFERRED policies that were created with an empty nodemask
- (local allocation).
-
- MPOL_F_RELATIVE_NODES: This flag specifies that the nodemask passed
- by the user will be mapped relative to the set of the task or VMA's
- set of allowed nodes. The kernel stores the user-passed nodemask,
- and if the allowed nodes changes, then that original nodemask will
- be remapped relative to the new set of allowed nodes.
-
- Without this flag (and without MPOL_F_STATIC_NODES), anytime a
- mempolicy is rebound because of a change in the set of allowed
- nodes, the node (Preferred) or nodemask (Bind, Interleave) is
- remapped to the new set of allowed nodes. That remap may not
- preserve the relative nature of the user's passed nodemask to its
- set of allowed nodes upon successive rebinds: a nodemask of
- 1,3,5 may be remapped to 7-9 and then to 1-3 if the set of
- allowed nodes is restored to its original state.
-
- With this flag, the remap is done so that the node numbers from
- the user's passed nodemask are relative to the set of allowed
- nodes. In other words, if nodes 0, 2, and 4 are set in the user's
- nodemask, the policy will be effected over the first (and in the
- Bind or Interleave case, the third and fifth) nodes in the set of
- allowed nodes. The nodemask passed by the user represents nodes
- relative to task or VMA's set of allowed nodes.
-
- If the user's nodemask includes nodes that are outside the range
- of the new set of allowed nodes (for example, node 5 is set in
- the user's nodemask when the set of allowed nodes is only 0-3),
- then the remap wraps around to the beginning of the nodemask and,
- if not already set, sets the node in the mempolicy nodemask.
-
- For example, consider a task that is attached to a cpuset with
- mems 2-5 that sets an Interleave policy over the same set with
- MPOL_F_RELATIVE_NODES. If the cpuset's mems change to 3-7, the
- interleave now occurs over nodes 3,5-7. If the cpuset's mems
- then change to 0,2-3,5, then the interleave occurs over nodes
- 0,2-3,5.
-
- Thanks to the consistent remapping, applications preparing
- nodemasks to specify memory policies using this flag should
- disregard their current, actual cpuset imposed memory placement
- and prepare the nodemask as if they were always located on
- memory nodes 0 to N-1, where N is the number of memory nodes the
- policy is intended to manage. Let the kernel then remap to the
- set of memory nodes allowed by the task's cpuset, as that may
- change over time.
-
- MPOL_F_RELATIVE_NODES cannot be combined with the
- MPOL_F_STATIC_NODES flag. It also cannot be used for
- MPOL_PREFERRED policies that were created with an empty nodemask
- (local allocation).
-
-MEMORY POLICY REFERENCE COUNTING
-
-To resolve use/free races, struct mempolicy contains an atomic reference
-count field. Internal interfaces, mpol_get()/mpol_put() increment and
-decrement this reference count, respectively. mpol_put() will only free
-the structure back to the mempolicy kmem cache when the reference count
-goes to zero.
-
-When a new memory policy is allocated, its reference count is initialized
-to '1', representing the reference held by the task that is installing the
-new policy. When a pointer to a memory policy structure is stored in another
-structure, another reference is added, as the task's reference will be dropped
-on completion of the policy installation.
-
-During run-time "usage" of the policy, we attempt to minimize atomic operations
-on the reference count, as this can lead to cache lines bouncing between cpus
-and NUMA nodes. "Usage" here means one of the following:
-
-1) querying of the policy, either by the task itself [using the get_mempolicy()
- API discussed below] or by another task using the /proc/<pid>/numa_maps
- interface.
-
-2) examination of the policy to determine the policy mode and associated node
- or node lists, if any, for page allocation. This is considered a "hot
- path". Note that for MPOL_BIND, the "usage" extends across the entire
- allocation process, which may sleep during page reclaimation, because the
- BIND policy nodemask is used, by reference, to filter ineligible nodes.
-
-We can avoid taking an extra reference during the usages listed above as
-follows:
-
-1) we never need to get/free the system default policy as this is never
- changed nor freed, once the system is up and running.
-
-2) for querying the policy, we do not need to take an extra reference on the
- target task's task policy nor vma policies because we always acquire the
- task's mm's mmap_sem for read during the query. The set_mempolicy() and
- mbind() APIs [see below] always acquire the mmap_sem for write when
- installing or replacing task or vma policies. Thus, there is no possibility
- of a task or thread freeing a policy while another task or thread is
- querying it.
-
-3) Page allocation usage of task or vma policy occurs in the fault path where
- we hold them mmap_sem for read. Again, because replacing the task or vma
- policy requires that the mmap_sem be held for write, the policy can't be
- freed out from under us while we're using it for page allocation.
-
-4) Shared policies require special consideration. One task can replace a
- shared memory policy while another task, with a distinct mmap_sem, is
- querying or allocating a page based on the policy. To resolve this
- potential race, the shared policy infrastructure adds an extra reference
- to the shared policy during lookup while holding a spin lock on the shared
- policy management structure. This requires that we drop this extra
- reference when we're finished "using" the policy. We must drop the
- extra reference on shared policies in the same query/allocation paths
- used for non-shared policies. For this reason, shared policies are marked
- as such, and the extra reference is dropped "conditionally"--i.e., only
- for shared policies.
-
- Because of this extra reference counting, and because we must lookup
- shared policies in a tree structure under spinlock, shared policies are
- more expensive to use in the page allocation path. This is especially
- true for shared policies on shared memory regions shared by tasks running
- on different NUMA nodes. This extra overhead can be avoided by always
- falling back to task or system default policy for shared memory regions,
- or by prefaulting the entire shared memory region into memory and locking
- it down. However, this might not be appropriate for all applications.
-
-MEMORY POLICY APIs
-
-Linux supports 3 system calls for controlling memory policy. These APIS
-always affect only the calling task, the calling task's address space, or
-some shared object mapped into the calling task's address space.
-
- Note: the headers that define these APIs and the parameter data types
- for user space applications reside in a package that is not part of
- the Linux kernel. The kernel system call interfaces, with the 'sys_'
- prefix, are defined in <linux/syscalls.h>; the mode and flag
- definitions are defined in <linux/mempolicy.h>.
-
-Set [Task] Memory Policy:
-
- long set_mempolicy(int mode, const unsigned long *nmask,
- unsigned long maxnode);
-
- Set's the calling task's "task/process memory policy" to mode
- specified by the 'mode' argument and the set of nodes defined
- by 'nmask'. 'nmask' points to a bit mask of node ids containing
- at least 'maxnode' ids. Optional mode flags may be passed by
- combining the 'mode' argument with the flag (for example:
- MPOL_INTERLEAVE | MPOL_F_STATIC_NODES).
-
- See the set_mempolicy(2) man page for more details
-
-
-Get [Task] Memory Policy or Related Information
-
- long get_mempolicy(int *mode,
- const unsigned long *nmask, unsigned long maxnode,
- void *addr, int flags);
-
- Queries the "task/process memory policy" of the calling task, or
- the policy or location of a specified virtual address, depending
- on the 'flags' argument.
-
- See the get_mempolicy(2) man page for more details
-
-
-Install VMA/Shared Policy for a Range of Task's Address Space
-
- long mbind(void *start, unsigned long len, int mode,
- const unsigned long *nmask, unsigned long maxnode,
- unsigned flags);
-
- mbind() installs the policy specified by (mode, nmask, maxnodes) as
- a VMA policy for the range of the calling task's address space
- specified by the 'start' and 'len' arguments. Additional actions
- may be requested via the 'flags' argument.
-
- See the mbind(2) man page for more details.
-
-MEMORY POLICY COMMAND LINE INTERFACE
-
-Although not strictly part of the Linux implementation of memory policy,
-a command line tool, numactl(8), exists that allows one to:
-
-+ set the task policy for a specified program via set_mempolicy(2), fork(2) and
- exec(2)
-
-+ set the shared policy for a shared memory segment via mbind(2)
-
-The numactl(8) tool is packaged with the run-time version of the library
-containing the memory policy system call wrappers. Some distributions
-package the headers and compile-time libraries in a separate development
-package.
-
-
-MEMORY POLICIES AND CPUSETS
-
-Memory policies work within cpusets as described above. For memory policies
-that require a node or set of nodes, the nodes are restricted to the set of
-nodes whose memories are allowed by the cpuset constraints. If the nodemask
-specified for the policy contains nodes that are not allowed by the cpuset and
-MPOL_F_RELATIVE_NODES is not used, the intersection of the set of nodes
-specified for the policy and the set of nodes with memory is used. If the
-result is the empty set, the policy is considered invalid and cannot be
-installed. If MPOL_F_RELATIVE_NODES is used, the policy's nodes are mapped
-onto and folded into the task's set of allowed nodes as previously described.
-
-The interaction of memory policies and cpusets can be problematic when tasks
-in two cpusets share access to a memory region, such as shared memory segments
-created by shmget() of mmap() with the MAP_ANONYMOUS and MAP_SHARED flags, and
-any of the tasks install shared policy on the region, only nodes whose
-memories are allowed in both cpusets may be used in the policies. Obtaining
-this information requires "stepping outside" the memory policy APIs to use the
-cpuset information and requires that one know in what cpusets other task might
-be attaching to the shared region. Furthermore, if the cpusets' allowed
-memory sets are disjoint, "local" allocation is the only valid policy.
diff --git a/Documentation/vm/overcommit-accounting b/Documentation/vm/overcommit-accounting
deleted file mode 100644
index cbfaaa674118..000000000000
--- a/Documentation/vm/overcommit-accounting
+++ /dev/null
@@ -1,80 +0,0 @@
-The Linux kernel supports the following overcommit handling modes
-
-0 - Heuristic overcommit handling. Obvious overcommits of
- address space are refused. Used for a typical system. It
- ensures a seriously wild allocation fails while allowing
- overcommit to reduce swap usage. root is allowed to
- allocate slightly more memory in this mode. This is the
- default.
-
-1 - Always overcommit. Appropriate for some scientific
- applications. Classic example is code using sparse arrays
- and just relying on the virtual memory consisting almost
- entirely of zero pages.
-
-2 - Don't overcommit. The total address space commit
- for the system is not permitted to exceed swap + a
- configurable amount (default is 50%) of physical RAM.
- Depending on the amount you use, in most situations
- this means a process will not be killed while accessing
- pages but will receive errors on memory allocation as
- appropriate.
-
- Useful for applications that want to guarantee their
- memory allocations will be available in the future
- without having to initialize every page.
-
-The overcommit policy is set via the sysctl `vm.overcommit_memory'.
-
-The overcommit amount can be set via `vm.overcommit_ratio' (percentage)
-or `vm.overcommit_kbytes' (absolute value).
-
-The current overcommit limit and amount committed are viewable in
-/proc/meminfo as CommitLimit and Committed_AS respectively.
-
-Gotchas
--------
-
-The C language stack growth does an implicit mremap. If you want absolute
-guarantees and run close to the edge you MUST mmap your stack for the
-largest size you think you will need. For typical stack usage this does
-not matter much but it's a corner case if you really really care
-
-In mode 2 the MAP_NORESERVE flag is ignored.
-
-
-How It Works
-------------
-
-The overcommit is based on the following rules
-
-For a file backed map
- SHARED or READ-only - 0 cost (the file is the map not swap)
- PRIVATE WRITABLE - size of mapping per instance
-
-For an anonymous or /dev/zero map
- SHARED - size of mapping
- PRIVATE READ-only - 0 cost (but of little use)
- PRIVATE WRITABLE - size of mapping per instance
-
-Additional accounting
- Pages made writable copies by mmap
- shmfs memory drawn from the same pool
-
-Status
-------
-
-o We account mmap memory mappings
-o We account mprotect changes in commit
-o We account mremap changes in size
-o We account brk
-o We account munmap
-o We report the commit status in /proc
-o Account and check on fork
-o Review stack handling/building on exec
-o SHMfs accounting
-o Implement actual limit enforcement
-
-To Do
------
-o Account ptrace pages (this is hard)
diff --git a/Documentation/vm/overcommit-accounting.rst b/Documentation/vm/overcommit-accounting.rst
new file mode 100644
index 000000000000..0dd54bbe4afa
--- /dev/null
+++ b/Documentation/vm/overcommit-accounting.rst
@@ -0,0 +1,87 @@
+.. _overcommit_accounting:
+
+=====================
+Overcommit Accounting
+=====================
+
+The Linux kernel supports the following overcommit handling modes
+
+0
+ Heuristic overcommit handling. Obvious overcommits of address
+ space are refused. Used for a typical system. It ensures a
+ seriously wild allocation fails while allowing overcommit to
+ reduce swap usage. root is allowed to allocate slightly more
+ memory in this mode. This is the default.
+
+1
+ Always overcommit. Appropriate for some scientific
+ applications. Classic example is code using sparse arrays and
+ just relying on the virtual memory consisting almost entirely
+ of zero pages.
+
+2
+ Don't overcommit. The total address space commit for the
+ system is not permitted to exceed swap + a configurable amount
+ (default is 50%) of physical RAM. Depending on the amount you
+ use, in most situations this means a process will not be
+ killed while accessing pages but will receive errors on memory
+ allocation as appropriate.
+
+ Useful for applications that want to guarantee their memory
+ allocations will be available in the future without having to
+ initialize every page.
+
+The overcommit policy is set via the sysctl ``vm.overcommit_memory``.
+
+The overcommit amount can be set via ``vm.overcommit_ratio`` (percentage)
+or ``vm.overcommit_kbytes`` (absolute value).
+
+The current overcommit limit and amount committed are viewable in
+``/proc/meminfo`` as CommitLimit and Committed_AS respectively.
+
+Gotchas
+=======
+
+The C language stack growth does an implicit mremap. If you want absolute
+guarantees and run close to the edge you MUST mmap your stack for the
+largest size you think you will need. For typical stack usage this does
+not matter much but it's a corner case if you really really care
+
+In mode 2 the MAP_NORESERVE flag is ignored.
+
+
+How It Works
+============
+
+The overcommit is based on the following rules
+
+For a file backed map
+ | SHARED or READ-only - 0 cost (the file is the map not swap)
+ | PRIVATE WRITABLE - size of mapping per instance
+
+For an anonymous or ``/dev/zero`` map
+ | SHARED - size of mapping
+ | PRIVATE READ-only - 0 cost (but of little use)
+ | PRIVATE WRITABLE - size of mapping per instance
+
+Additional accounting
+ | Pages made writable copies by mmap
+ | shmfs memory drawn from the same pool
+
+Status
+======
+
+* We account mmap memory mappings
+* We account mprotect changes in commit
+* We account mremap changes in size
+* We account brk
+* We account munmap
+* We report the commit status in /proc
+* Account and check on fork
+* Review stack handling/building on exec
+* SHMfs accounting
+* Implement actual limit enforcement
+
+To Do
+=====
+* Account ptrace pages (this is hard)
diff --git a/Documentation/vm/page_frags b/Documentation/vm/page_frags.rst
index a6714565dbf9..637cc49d1b2f 100644
--- a/Documentation/vm/page_frags
+++ b/Documentation/vm/page_frags.rst
@@ -1,5 +1,8 @@
+.. _page_frags:
+
+==============
Page fragments
---------------
+==============
A page fragment is an arbitrary-length arbitrary-offset area of memory
which resides within a 0 or higher order compound page. Multiple
diff --git a/Documentation/vm/page_migration b/Documentation/vm/page_migration.rst
index 496868072e24..f68d61335abb 100644
--- a/Documentation/vm/page_migration
+++ b/Documentation/vm/page_migration.rst
@@ -1,5 +1,8 @@
+.. _page_migration:
+
+==============
Page migration
---------------
+==============
Page migration allows the moving of the physical location of pages between
nodes in a numa system while the process is running. This means that the
@@ -20,7 +23,7 @@ Page migration functions are provided by the numactl package by Andi Kleen
(a version later than 0.9.3 is required. Get it from
ftp://oss.sgi.com/www/projects/libnuma/download/). numactl provides libnuma
which provides an interface similar to other numa functionality for page
-migration. cat /proc/<pid>/numa_maps allows an easy review of where the
+migration. cat ``/proc/<pid>/numa_maps`` allows an easy review of where the
pages of a process are located. See also the numa_maps documentation in the
proc(5) man page.
@@ -56,8 +59,8 @@ description for those trying to use migrate_pages() from the kernel
(for userspace usage see the Andi Kleen's numactl package mentioned above)
and then a low level description of how the low level details work.
-A. In kernel use of migrate_pages()
------------------------------------
+In kernel use of migrate_pages()
+================================
1. Remove pages from the LRU.
@@ -78,8 +81,8 @@ A. In kernel use of migrate_pages()
the new page for each page that is considered for
moving.
-B. How migrate_pages() works
-----------------------------
+How migrate_pages() works
+=========================
migrate_pages() does several passes over its list of pages. A page is moved
if all references to a page are removable at the time. The page has
@@ -142,8 +145,8 @@ Steps:
20. The new page is moved to the LRU and can be scanned by the swapper
etc again.
-C. Non-LRU page migration
--------------------------
+Non-LRU page migration
+======================
Although original migration aimed for reducing the latency of memory access
for NUMA, compaction who want to create high-order page is also main customer.
@@ -164,89 +167,91 @@ migration path.
If a driver want to make own pages movable, it should define three functions
which are function pointers of struct address_space_operations.
-1. bool (*isolate_page) (struct page *page, isolate_mode_t mode);
+1. ``bool (*isolate_page) (struct page *page, isolate_mode_t mode);``
-What VM expects on isolate_page function of driver is to return *true*
-if driver isolates page successfully. On returing true, VM marks the page
-as PG_isolated so concurrent isolation in several CPUs skip the page
-for isolation. If a driver cannot isolate the page, it should return *false*.
+ What VM expects on isolate_page function of driver is to return *true*
+ if driver isolates page successfully. On returing true, VM marks the page
+ as PG_isolated so concurrent isolation in several CPUs skip the page
+ for isolation. If a driver cannot isolate the page, it should return *false*.
-Once page is successfully isolated, VM uses page.lru fields so driver
-shouldn't expect to preserve values in that fields.
+ Once page is successfully isolated, VM uses page.lru fields so driver
+ shouldn't expect to preserve values in that fields.
-2. int (*migratepage) (struct address_space *mapping,
- struct page *newpage, struct page *oldpage, enum migrate_mode);
+2. ``int (*migratepage) (struct address_space *mapping,``
+| ``struct page *newpage, struct page *oldpage, enum migrate_mode);``
-After isolation, VM calls migratepage of driver with isolated page.
-The function of migratepage is to move content of the old page to new page
-and set up fields of struct page newpage. Keep in mind that you should
-indicate to the VM the oldpage is no longer movable via __ClearPageMovable()
-under page_lock if you migrated the oldpage successfully and returns
-MIGRATEPAGE_SUCCESS. If driver cannot migrate the page at the moment, driver
-can return -EAGAIN. On -EAGAIN, VM will retry page migration in a short time
-because VM interprets -EAGAIN as "temporal migration failure". On returning
-any error except -EAGAIN, VM will give up the page migration without retrying
-in this time.
+ After isolation, VM calls migratepage of driver with isolated page.
+ The function of migratepage is to move content of the old page to new page
+ and set up fields of struct page newpage. Keep in mind that you should
+ indicate to the VM the oldpage is no longer movable via __ClearPageMovable()
+ under page_lock if you migrated the oldpage successfully and returns
+ MIGRATEPAGE_SUCCESS. If driver cannot migrate the page at the moment, driver
+ can return -EAGAIN. On -EAGAIN, VM will retry page migration in a short time
+ because VM interprets -EAGAIN as "temporal migration failure". On returning
+ any error except -EAGAIN, VM will give up the page migration without retrying
+ in this time.
-Driver shouldn't touch page.lru field VM using in the functions.
+ Driver shouldn't touch page.lru field VM using in the functions.
-3. void (*putback_page)(struct page *);
+3. ``void (*putback_page)(struct page *);``
-If migration fails on isolated page, VM should return the isolated page
-to the driver so VM calls driver's putback_page with migration failed page.
-In this function, driver should put the isolated page back to the own data
-structure.
+ If migration fails on isolated page, VM should return the isolated page
+ to the driver so VM calls driver's putback_page with migration failed page.
+ In this function, driver should put the isolated page back to the own data
+ structure.
4. non-lru movable page flags
-There are two page flags for supporting non-lru movable page.
+ There are two page flags for supporting non-lru movable page.
-* PG_movable
+ * PG_movable
-Driver should use the below function to make page movable under page_lock.
+ Driver should use the below function to make page movable under page_lock::
void __SetPageMovable(struct page *page, struct address_space *mapping)
-It needs argument of address_space for registering migration family functions
-which will be called by VM. Exactly speaking, PG_movable is not a real flag of
-struct page. Rather than, VM reuses page->mapping's lower bits to represent it.
+ It needs argument of address_space for registering migration
+ family functions which will be called by VM. Exactly speaking,
+ PG_movable is not a real flag of struct page. Rather than, VM
+ reuses page->mapping's lower bits to represent it.
+::
#define PAGE_MAPPING_MOVABLE 0x2
page->mapping = page->mapping | PAGE_MAPPING_MOVABLE;
-so driver shouldn't access page->mapping directly. Instead, driver should
-use page_mapping which mask off the low two bits of page->mapping under
-page lock so it can get right struct address_space.
-
-For testing of non-lru movable page, VM supports __PageMovable function.
-However, it doesn't guarantee to identify non-lru movable page because
-page->mapping field is unified with other variables in struct page.
-As well, if driver releases the page after isolation by VM, page->mapping
-doesn't have stable value although it has PAGE_MAPPING_MOVABLE
-(Look at __ClearPageMovable). But __PageMovable is cheap to catch whether
-page is LRU or non-lru movable once the page has been isolated. Because
-LRU pages never can have PAGE_MAPPING_MOVABLE in page->mapping. It is also
-good for just peeking to test non-lru movable pages before more expensive
-checking with lock_page in pfn scanning to select victim.
-
-For guaranteeing non-lru movable page, VM provides PageMovable function.
-Unlike __PageMovable, PageMovable functions validates page->mapping and
-mapping->a_ops->isolate_page under lock_page. The lock_page prevents sudden
-destroying of page->mapping.
-
-Driver using __SetPageMovable should clear the flag via __ClearMovablePage
-under page_lock before the releasing the page.
-
-* PG_isolated
-
-To prevent concurrent isolation among several CPUs, VM marks isolated page
-as PG_isolated under lock_page. So if a CPU encounters PG_isolated non-lru
-movable page, it can skip it. Driver doesn't need to manipulate the flag
-because VM will set/clear it automatically. Keep in mind that if driver
-sees PG_isolated page, it means the page have been isolated by VM so it
-shouldn't touch page.lru field.
-PG_isolated is alias with PG_reclaim flag so driver shouldn't use the flag
-for own purpose.
+ so driver shouldn't access page->mapping directly. Instead, driver should
+ use page_mapping which mask off the low two bits of page->mapping under
+ page lock so it can get right struct address_space.
+
+ For testing of non-lru movable page, VM supports __PageMovable function.
+ However, it doesn't guarantee to identify non-lru movable page because
+ page->mapping field is unified with other variables in struct page.
+ As well, if driver releases the page after isolation by VM, page->mapping
+ doesn't have stable value although it has PAGE_MAPPING_MOVABLE
+ (Look at __ClearPageMovable). But __PageMovable is cheap to catch whether
+ page is LRU or non-lru movable once the page has been isolated. Because
+ LRU pages never can have PAGE_MAPPING_MOVABLE in page->mapping. It is also
+ good for just peeking to test non-lru movable pages before more expensive
+ checking with lock_page in pfn scanning to select victim.
+
+ For guaranteeing non-lru movable page, VM provides PageMovable function.
+ Unlike __PageMovable, PageMovable functions validates page->mapping and
+ mapping->a_ops->isolate_page under lock_page. The lock_page prevents sudden
+ destroying of page->mapping.
+
+ Driver using __SetPageMovable should clear the flag via __ClearMovablePage
+ under page_lock before the releasing the page.
+
+ * PG_isolated
+
+ To prevent concurrent isolation among several CPUs, VM marks isolated page
+ as PG_isolated under lock_page. So if a CPU encounters PG_isolated non-lru
+ movable page, it can skip it. Driver doesn't need to manipulate the flag
+ because VM will set/clear it automatically. Keep in mind that if driver
+ sees PG_isolated page, it means the page have been isolated by VM so it
+ shouldn't touch page.lru field.
+ PG_isolated is alias with PG_reclaim flag so driver shouldn't use the flag
+ for own purpose.
Christoph Lameter, May 8, 2006.
Minchan Kim, Mar 28, 2016.
diff --git a/Documentation/vm/page_owner.txt b/Documentation/vm/page_owner.rst
index ffff1439076a..0ed5ab8c7ab4 100644
--- a/Documentation/vm/page_owner.txt
+++ b/Documentation/vm/page_owner.rst
@@ -1,7 +1,11 @@
+.. _page_owner:
+
+==================================================
page owner: Tracking about who allocated each page
------------------------------------------------------------
+==================================================
-* Introduction
+Introduction
+============
page owner is for the tracking about who allocated each page.
It can be used to debug memory leak or to find a memory hogger.
@@ -34,13 +38,15 @@ not affect to allocation performance, especially if the static keys jump
label patching functionality is available. Following is the kernel's code
size change due to this facility.
-- Without page owner
+- Without page owner::
+
text data bss dec hex filename
- 40662 1493 644 42799 a72f mm/page_alloc.o
+ 40662 1493 644 42799 a72f mm/page_alloc.o
+
+- With page owner::
-- With page owner
text data bss dec hex filename
- 40892 1493 644 43029 a815 mm/page_alloc.o
+ 40892 1493 644 43029 a815 mm/page_alloc.o
1427 24 8 1459 5b3 mm/page_ext.o
2722 50 0 2772 ad4 mm/page_owner.o
@@ -62,21 +68,23 @@ are catched and marked, although they are mostly allocated from struct
page extension feature. Anyway, after that, no page is left in
un-tracking state.
-* Usage
+Usage
+=====
+
+1) Build user-space helper::
-1) Build user-space helper
cd tools/vm
make page_owner_sort
-2) Enable page owner
- Add "page_owner=on" to boot cmdline.
+2) Enable page owner: add "page_owner=on" to boot cmdline.
3) Do the job what you want to debug
-4) Analyze information from page owner
+4) Analyze information from page owner::
+
cat /sys/kernel/debug/page_owner > page_owner_full.txt
grep -v ^PFN page_owner_full.txt > page_owner.txt
./page_owner_sort page_owner.txt sorted_page_owner.txt
- See the result about who allocated each page
- in the sorted_page_owner.txt.
+ See the result about who allocated each page
+ in the ``sorted_page_owner.txt``.
diff --git a/Documentation/vm/remap_file_pages.txt b/Documentation/vm/remap_file_pages.rst
index f609142f406a..7bef6718e3a9 100644
--- a/Documentation/vm/remap_file_pages.txt
+++ b/Documentation/vm/remap_file_pages.rst
@@ -1,3 +1,9 @@
+.. _remap_file_pages:
+
+==============================
+remap_file_pages() system call
+==============================
+
The remap_file_pages() system call is used to create a nonlinear mapping,
that is, a mapping in which the pages of the file are mapped into a
nonsequential order in memory. The advantage of using remap_file_pages()
diff --git a/Documentation/vm/slub.rst b/Documentation/vm/slub.rst
new file mode 100644
index 000000000000..3a775fd64e2d
--- /dev/null
+++ b/Documentation/vm/slub.rst
@@ -0,0 +1,361 @@
+.. _slub:
+
+==========================
+Short users guide for SLUB
+==========================
+
+The basic philosophy of SLUB is very different from SLAB. SLAB
+requires rebuilding the kernel to activate debug options for all
+slab caches. SLUB always includes full debugging but it is off by default.
+SLUB can enable debugging only for selected slabs in order to avoid
+an impact on overall system performance which may make a bug more
+difficult to find.
+
+In order to switch debugging on one can add an option ``slub_debug``
+to the kernel command line. That will enable full debugging for
+all slabs.
+
+Typically one would then use the ``slabinfo`` command to get statistical
+data and perform operation on the slabs. By default ``slabinfo`` only lists
+slabs that have data in them. See "slabinfo -h" for more options when
+running the command. ``slabinfo`` can be compiled with
+::
+
+ gcc -o slabinfo tools/vm/slabinfo.c
+
+Some of the modes of operation of ``slabinfo`` require that slub debugging
+be enabled on the command line. F.e. no tracking information will be
+available without debugging on and validation can only partially
+be performed if debugging was not switched on.
+
+Some more sophisticated uses of slub_debug:
+-------------------------------------------
+
+Parameters may be given to ``slub_debug``. If none is specified then full
+debugging is enabled. Format:
+
+slub_debug=<Debug-Options>
+ Enable options for all slabs
+slub_debug=<Debug-Options>,<slab name>
+ Enable options only for select slabs
+
+
+Possible debug options are::
+
+ F Sanity checks on (enables SLAB_DEBUG_CONSISTENCY_CHECKS
+ Sorry SLAB legacy issues)
+ Z Red zoning
+ P Poisoning (object and padding)
+ U User tracking (free and alloc)
+ T Trace (please only use on single slabs)
+ A Toggle failslab filter mark for the cache
+ O Switch debugging off for caches that would have
+ caused higher minimum slab orders
+ - Switch all debugging off (useful if the kernel is
+ configured with CONFIG_SLUB_DEBUG_ON)
+
+F.e. in order to boot just with sanity checks and red zoning one would specify::
+
+ slub_debug=FZ
+
+Trying to find an issue in the dentry cache? Try::
+
+ slub_debug=,dentry
+
+to only enable debugging on the dentry cache.
+
+Red zoning and tracking may realign the slab. We can just apply sanity checks
+to the dentry cache with::
+
+ slub_debug=F,dentry
+
+Debugging options may require the minimum possible slab order to increase as
+a result of storing the metadata (for example, caches with PAGE_SIZE object
+sizes). This has a higher liklihood of resulting in slab allocation errors
+in low memory situations or if there's high fragmentation of memory. To
+switch off debugging for such caches by default, use::
+
+ slub_debug=O
+
+In case you forgot to enable debugging on the kernel command line: It is
+possible to enable debugging manually when the kernel is up. Look at the
+contents of::
+
+ /sys/kernel/slab/<slab name>/
+
+Look at the writable files. Writing 1 to them will enable the
+corresponding debug option. All options can be set on a slab that does
+not contain objects. If the slab already contains objects then sanity checks
+and tracing may only be enabled. The other options may cause the realignment
+of objects.
+
+Careful with tracing: It may spew out lots of information and never stop if
+used on the wrong slab.
+
+Slab merging
+============
+
+If no debug options are specified then SLUB may merge similar slabs together
+in order to reduce overhead and increase cache hotness of objects.
+``slabinfo -a`` displays which slabs were merged together.
+
+Slab validation
+===============
+
+SLUB can validate all object if the kernel was booted with slub_debug. In
+order to do so you must have the ``slabinfo`` tool. Then you can do
+::
+
+ slabinfo -v
+
+which will test all objects. Output will be generated to the syslog.
+
+This also works in a more limited way if boot was without slab debug.
+In that case ``slabinfo -v`` simply tests all reachable objects. Usually
+these are in the cpu slabs and the partial slabs. Full slabs are not
+tracked by SLUB in a non debug situation.
+
+Getting more performance
+========================
+
+To some degree SLUB's performance is limited by the need to take the
+list_lock once in a while to deal with partial slabs. That overhead is
+governed by the order of the allocation for each slab. The allocations
+can be influenced by kernel parameters:
+
+.. slub_min_objects=x (default 4)
+.. slub_min_order=x (default 0)
+.. slub_max_order=x (default 3 (PAGE_ALLOC_COSTLY_ORDER))
+
+``slub_min_objects``
+ allows to specify how many objects must at least fit into one
+ slab in order for the allocation order to be acceptable. In
+ general slub will be able to perform this number of
+ allocations on a slab without consulting centralized resources
+ (list_lock) where contention may occur.
+
+``slub_min_order``
+ specifies a minim order of slabs. A similar effect like
+ ``slub_min_objects``.
+
+``slub_max_order``
+ specified the order at which ``slub_min_objects`` should no
+ longer be checked. This is useful to avoid SLUB trying to
+ generate super large order pages to fit ``slub_min_objects``
+ of a slab cache with large object sizes into one high order
+ page. Setting command line parameter
+ ``debug_guardpage_minorder=N`` (N > 0), forces setting
+ ``slub_max_order`` to 0, what cause minimum possible order of
+ slabs allocation.
+
+SLUB Debug output
+=================
+
+Here is a sample of slub debug output::
+
+ ====================================================================
+ BUG kmalloc-8: Redzone overwritten
+ --------------------------------------------------------------------
+
+ INFO: 0xc90f6d28-0xc90f6d2b. First byte 0x00 instead of 0xcc
+ INFO: Slab 0xc528c530 flags=0x400000c3 inuse=61 fp=0xc90f6d58
+ INFO: Object 0xc90f6d20 @offset=3360 fp=0xc90f6d58
+ INFO: Allocated in get_modalias+0x61/0xf5 age=53 cpu=1 pid=554
+
+ Bytes b4 0xc90f6d10: 00 00 00 00 00 00 00 00 5a 5a 5a 5a 5a 5a 5a 5a ........ZZZZZZZZ
+ Object 0xc90f6d20: 31 30 31 39 2e 30 30 35 1019.005
+ Redzone 0xc90f6d28: 00 cc cc cc .
+ Padding 0xc90f6d50: 5a 5a 5a 5a 5a 5a 5a 5a ZZZZZZZZ
+
+ [<c010523d>] dump_trace+0x63/0x1eb
+ [<c01053df>] show_trace_log_lvl+0x1a/0x2f
+ [<c010601d>] show_trace+0x12/0x14
+ [<c0106035>] dump_stack+0x16/0x18
+ [<c017e0fa>] object_err+0x143/0x14b
+ [<c017e2cc>] check_object+0x66/0x234
+ [<c017eb43>] __slab_free+0x239/0x384
+ [<c017f446>] kfree+0xa6/0xc6
+ [<c02e2335>] get_modalias+0xb9/0xf5
+ [<c02e23b7>] dmi_dev_uevent+0x27/0x3c
+ [<c027866a>] dev_uevent+0x1ad/0x1da
+ [<c0205024>] kobject_uevent_env+0x20a/0x45b
+ [<c020527f>] kobject_uevent+0xa/0xf
+ [<c02779f1>] store_uevent+0x4f/0x58
+ [<c027758e>] dev_attr_store+0x29/0x2f
+ [<c01bec4f>] sysfs_write_file+0x16e/0x19c
+ [<c0183ba7>] vfs_write+0xd1/0x15a
+ [<c01841d7>] sys_write+0x3d/0x72
+ [<c0104112>] sysenter_past_esp+0x5f/0x99
+ [<b7f7b410>] 0xb7f7b410
+ =======================
+
+ FIX kmalloc-8: Restoring Redzone 0xc90f6d28-0xc90f6d2b=0xcc
+
+If SLUB encounters a corrupted object (full detection requires the kernel
+to be booted with slub_debug) then the following output will be dumped
+into the syslog:
+
+1. Description of the problem encountered
+
+ This will be a message in the system log starting with::
+
+ ===============================================
+ BUG <slab cache affected>: <What went wrong>
+ -----------------------------------------------
+
+ INFO: <corruption start>-<corruption_end> <more info>
+ INFO: Slab <address> <slab information>
+ INFO: Object <address> <object information>
+ INFO: Allocated in <kernel function> age=<jiffies since alloc> cpu=<allocated by
+ cpu> pid=<pid of the process>
+ INFO: Freed in <kernel function> age=<jiffies since free> cpu=<freed by cpu>
+ pid=<pid of the process>
+
+ (Object allocation / free information is only available if SLAB_STORE_USER is
+ set for the slab. slub_debug sets that option)
+
+2. The object contents if an object was involved.
+
+ Various types of lines can follow the BUG SLUB line:
+
+ Bytes b4 <address> : <bytes>
+ Shows a few bytes before the object where the problem was detected.
+ Can be useful if the corruption does not stop with the start of the
+ object.
+
+ Object <address> : <bytes>
+ The bytes of the object. If the object is inactive then the bytes
+ typically contain poison values. Any non-poison value shows a
+ corruption by a write after free.
+
+ Redzone <address> : <bytes>
+ The Redzone following the object. The Redzone is used to detect
+ writes after the object. All bytes should always have the same
+ value. If there is any deviation then it is due to a write after
+ the object boundary.
+
+ (Redzone information is only available if SLAB_RED_ZONE is set.
+ slub_debug sets that option)
+
+ Padding <address> : <bytes>
+ Unused data to fill up the space in order to get the next object
+ properly aligned. In the debug case we make sure that there are
+ at least 4 bytes of padding. This allows the detection of writes
+ before the object.
+
+3. A stackdump
+
+ The stackdump describes the location where the error was detected. The cause
+ of the corruption is may be more likely found by looking at the function that
+ allocated or freed the object.
+
+4. Report on how the problem was dealt with in order to ensure the continued
+ operation of the system.
+
+ These are messages in the system log beginning with::
+
+ FIX <slab cache affected>: <corrective action taken>
+
+ In the above sample SLUB found that the Redzone of an active object has
+ been overwritten. Here a string of 8 characters was written into a slab that
+ has the length of 8 characters. However, a 8 character string needs a
+ terminating 0. That zero has overwritten the first byte of the Redzone field.
+ After reporting the details of the issue encountered the FIX SLUB message
+ tells us that SLUB has restored the Redzone to its proper value and then
+ system operations continue.
+
+Emergency operations
+====================
+
+Minimal debugging (sanity checks alone) can be enabled by booting with::
+
+ slub_debug=F
+
+This will be generally be enough to enable the resiliency features of slub
+which will keep the system running even if a bad kernel component will
+keep corrupting objects. This may be important for production systems.
+Performance will be impacted by the sanity checks and there will be a
+continual stream of error messages to the syslog but no additional memory
+will be used (unlike full debugging).
+
+No guarantees. The kernel component still needs to be fixed. Performance
+may be optimized further by locating the slab that experiences corruption
+and enabling debugging only for that cache
+
+I.e.::
+
+ slub_debug=F,dentry
+
+If the corruption occurs by writing after the end of the object then it
+may be advisable to enable a Redzone to avoid corrupting the beginning
+of other objects::
+
+ slub_debug=FZ,dentry
+
+Extended slabinfo mode and plotting
+===================================
+
+The ``slabinfo`` tool has a special 'extended' ('-X') mode that includes:
+ - Slabcache Totals
+ - Slabs sorted by size (up to -N <num> slabs, default 1)
+ - Slabs sorted by loss (up to -N <num> slabs, default 1)
+
+Additionally, in this mode ``slabinfo`` does not dynamically scale
+sizes (G/M/K) and reports everything in bytes (this functionality is
+also available to other slabinfo modes via '-B' option) which makes
+reporting more precise and accurate. Moreover, in some sense the `-X'
+mode also simplifies the analysis of slabs' behaviour, because its
+output can be plotted using the ``slabinfo-gnuplot.sh`` script. So it
+pushes the analysis from looking through the numbers (tons of numbers)
+to something easier -- visual analysis.
+
+To generate plots:
+
+a) collect slabinfo extended records, for example::
+
+ while [ 1 ]; do slabinfo -X >> FOO_STATS; sleep 1; done
+
+b) pass stats file(-s) to ``slabinfo-gnuplot.sh`` script::
+
+ slabinfo-gnuplot.sh FOO_STATS [FOO_STATS2 .. FOO_STATSN]
+
+ The ``slabinfo-gnuplot.sh`` script will pre-processes the collected records
+ and generates 3 png files (and 3 pre-processing cache files) per STATS
+ file:
+ - Slabcache Totals: FOO_STATS-totals.png
+ - Slabs sorted by size: FOO_STATS-slabs-by-size.png
+ - Slabs sorted by loss: FOO_STATS-slabs-by-loss.png
+
+Another use case, when ``slabinfo-gnuplot.sh`` can be useful, is when you
+need to compare slabs' behaviour "prior to" and "after" some code
+modification. To help you out there, ``slabinfo-gnuplot.sh`` script
+can 'merge' the `Slabcache Totals` sections from different
+measurements. To visually compare N plots:
+
+a) Collect as many STATS1, STATS2, .. STATSN files as you need::
+
+ while [ 1 ]; do slabinfo -X >> STATS<X>; sleep 1; done
+
+b) Pre-process those STATS files::
+
+ slabinfo-gnuplot.sh STATS1 STATS2 .. STATSN
+
+c) Execute ``slabinfo-gnuplot.sh`` in '-t' mode, passing all of the
+ generated pre-processed \*-totals::
+
+ slabinfo-gnuplot.sh -t STATS1-totals STATS2-totals .. STATSN-totals
+
+ This will produce a single plot (png file).
+
+ Plots, expectedly, can be large so some fluctuations or small spikes
+ can go unnoticed. To deal with that, ``slabinfo-gnuplot.sh`` has two
+ options to 'zoom-in'/'zoom-out':
+
+ a) ``-s %d,%d`` -- overwrites the default image width and heigh
+ b) ``-r %d,%d`` -- specifies a range of samples to use (for example,
+ in ``slabinfo -X >> FOO_STATS; sleep 1;`` case, using a ``-r
+ 40,60`` range will plot only samples collected between 40th and
+ 60th seconds).
+
+Christoph Lameter, May 30, 2007
+Sergey Senozhatsky, October 23, 2015
diff --git a/Documentation/vm/slub.txt b/Documentation/vm/slub.txt
deleted file mode 100644
index 84652419bff2..000000000000
--- a/Documentation/vm/slub.txt
+++ /dev/null
@@ -1,342 +0,0 @@
-Short users guide for SLUB
---------------------------
-
-The basic philosophy of SLUB is very different from SLAB. SLAB
-requires rebuilding the kernel to activate debug options for all
-slab caches. SLUB always includes full debugging but it is off by default.
-SLUB can enable debugging only for selected slabs in order to avoid
-an impact on overall system performance which may make a bug more
-difficult to find.
-
-In order to switch debugging on one can add an option "slub_debug"
-to the kernel command line. That will enable full debugging for
-all slabs.
-
-Typically one would then use the "slabinfo" command to get statistical
-data and perform operation on the slabs. By default slabinfo only lists
-slabs that have data in them. See "slabinfo -h" for more options when
-running the command. slabinfo can be compiled with
-
-gcc -o slabinfo tools/vm/slabinfo.c
-
-Some of the modes of operation of slabinfo require that slub debugging
-be enabled on the command line. F.e. no tracking information will be
-available without debugging on and validation can only partially
-be performed if debugging was not switched on.
-
-Some more sophisticated uses of slub_debug:
--------------------------------------------
-
-Parameters may be given to slub_debug. If none is specified then full
-debugging is enabled. Format:
-
-slub_debug=<Debug-Options> Enable options for all slabs
-slub_debug=<Debug-Options>,<slab name>
- Enable options only for select slabs
-
-Possible debug options are
- F Sanity checks on (enables SLAB_DEBUG_CONSISTENCY_CHECKS
- Sorry SLAB legacy issues)
- Z Red zoning
- P Poisoning (object and padding)
- U User tracking (free and alloc)
- T Trace (please only use on single slabs)
- A Toggle failslab filter mark for the cache
- O Switch debugging off for caches that would have
- caused higher minimum slab orders
- - Switch all debugging off (useful if the kernel is
- configured with CONFIG_SLUB_DEBUG_ON)
-
-F.e. in order to boot just with sanity checks and red zoning one would specify:
-
- slub_debug=FZ
-
-Trying to find an issue in the dentry cache? Try
-
- slub_debug=,dentry
-
-to only enable debugging on the dentry cache.
-
-Red zoning and tracking may realign the slab. We can just apply sanity checks
-to the dentry cache with
-
- slub_debug=F,dentry
-
-Debugging options may require the minimum possible slab order to increase as
-a result of storing the metadata (for example, caches with PAGE_SIZE object
-sizes). This has a higher liklihood of resulting in slab allocation errors
-in low memory situations or if there's high fragmentation of memory. To
-switch off debugging for such caches by default, use
-
- slub_debug=O
-
-In case you forgot to enable debugging on the kernel command line: It is
-possible to enable debugging manually when the kernel is up. Look at the
-contents of:
-
-/sys/kernel/slab/<slab name>/
-
-Look at the writable files. Writing 1 to them will enable the
-corresponding debug option. All options can be set on a slab that does
-not contain objects. If the slab already contains objects then sanity checks
-and tracing may only be enabled. The other options may cause the realignment
-of objects.
-
-Careful with tracing: It may spew out lots of information and never stop if
-used on the wrong slab.
-
-Slab merging
-------------
-
-If no debug options are specified then SLUB may merge similar slabs together
-in order to reduce overhead and increase cache hotness of objects.
-slabinfo -a displays which slabs were merged together.
-
-Slab validation
----------------
-
-SLUB can validate all object if the kernel was booted with slub_debug. In
-order to do so you must have the slabinfo tool. Then you can do
-
-slabinfo -v
-
-which will test all objects. Output will be generated to the syslog.
-
-This also works in a more limited way if boot was without slab debug.
-In that case slabinfo -v simply tests all reachable objects. Usually
-these are in the cpu slabs and the partial slabs. Full slabs are not
-tracked by SLUB in a non debug situation.
-
-Getting more performance
-------------------------
-
-To some degree SLUB's performance is limited by the need to take the
-list_lock once in a while to deal with partial slabs. That overhead is
-governed by the order of the allocation for each slab. The allocations
-can be influenced by kernel parameters:
-
-slub_min_objects=x (default 4)
-slub_min_order=x (default 0)
-slub_max_order=x (default 3 (PAGE_ALLOC_COSTLY_ORDER))
-
-slub_min_objects allows to specify how many objects must at least fit
-into one slab in order for the allocation order to be acceptable.
-In general slub will be able to perform this number of allocations
-on a slab without consulting centralized resources (list_lock) where
-contention may occur.
-
-slub_min_order specifies a minim order of slabs. A similar effect like
-slub_min_objects.
-
-slub_max_order specified the order at which slub_min_objects should no
-longer be checked. This is useful to avoid SLUB trying to generate
-super large order pages to fit slub_min_objects of a slab cache with
-large object sizes into one high order page. Setting command line
-parameter debug_guardpage_minorder=N (N > 0), forces setting
-slub_max_order to 0, what cause minimum possible order of slabs
-allocation.
-
-SLUB Debug output
------------------
-
-Here is a sample of slub debug output:
-
-====================================================================
-BUG kmalloc-8: Redzone overwritten
---------------------------------------------------------------------
-
-INFO: 0xc90f6d28-0xc90f6d2b. First byte 0x00 instead of 0xcc
-INFO: Slab 0xc528c530 flags=0x400000c3 inuse=61 fp=0xc90f6d58
-INFO: Object 0xc90f6d20 @offset=3360 fp=0xc90f6d58
-INFO: Allocated in get_modalias+0x61/0xf5 age=53 cpu=1 pid=554
-
-Bytes b4 0xc90f6d10: 00 00 00 00 00 00 00 00 5a 5a 5a 5a 5a 5a 5a 5a ........ZZZZZZZZ
- Object 0xc90f6d20: 31 30 31 39 2e 30 30 35 1019.005
- Redzone 0xc90f6d28: 00 cc cc cc .
- Padding 0xc90f6d50: 5a 5a 5a 5a 5a 5a 5a 5a ZZZZZZZZ
-
- [<c010523d>] dump_trace+0x63/0x1eb
- [<c01053df>] show_trace_log_lvl+0x1a/0x2f
- [<c010601d>] show_trace+0x12/0x14
- [<c0106035>] dump_stack+0x16/0x18
- [<c017e0fa>] object_err+0x143/0x14b
- [<c017e2cc>] check_object+0x66/0x234
- [<c017eb43>] __slab_free+0x239/0x384
- [<c017f446>] kfree+0xa6/0xc6
- [<c02e2335>] get_modalias+0xb9/0xf5
- [<c02e23b7>] dmi_dev_uevent+0x27/0x3c
- [<c027866a>] dev_uevent+0x1ad/0x1da
- [<c0205024>] kobject_uevent_env+0x20a/0x45b
- [<c020527f>] kobject_uevent+0xa/0xf
- [<c02779f1>] store_uevent+0x4f/0x58
- [<c027758e>] dev_attr_store+0x29/0x2f
- [<c01bec4f>] sysfs_write_file+0x16e/0x19c
- [<c0183ba7>] vfs_write+0xd1/0x15a
- [<c01841d7>] sys_write+0x3d/0x72
- [<c0104112>] sysenter_past_esp+0x5f/0x99
- [<b7f7b410>] 0xb7f7b410
- =======================
-
-FIX kmalloc-8: Restoring Redzone 0xc90f6d28-0xc90f6d2b=0xcc
-
-If SLUB encounters a corrupted object (full detection requires the kernel
-to be booted with slub_debug) then the following output will be dumped
-into the syslog:
-
-1. Description of the problem encountered
-
-This will be a message in the system log starting with
-
-===============================================
-BUG <slab cache affected>: <What went wrong>
------------------------------------------------
-
-INFO: <corruption start>-<corruption_end> <more info>
-INFO: Slab <address> <slab information>
-INFO: Object <address> <object information>
-INFO: Allocated in <kernel function> age=<jiffies since alloc> cpu=<allocated by
- cpu> pid=<pid of the process>
-INFO: Freed in <kernel function> age=<jiffies since free> cpu=<freed by cpu>
- pid=<pid of the process>
-
-(Object allocation / free information is only available if SLAB_STORE_USER is
-set for the slab. slub_debug sets that option)
-
-2. The object contents if an object was involved.
-
-Various types of lines can follow the BUG SLUB line:
-
-Bytes b4 <address> : <bytes>
- Shows a few bytes before the object where the problem was detected.
- Can be useful if the corruption does not stop with the start of the
- object.
-
-Object <address> : <bytes>
- The bytes of the object. If the object is inactive then the bytes
- typically contain poison values. Any non-poison value shows a
- corruption by a write after free.
-
-Redzone <address> : <bytes>
- The Redzone following the object. The Redzone is used to detect
- writes after the object. All bytes should always have the same
- value. If there is any deviation then it is due to a write after
- the object boundary.
-
- (Redzone information is only available if SLAB_RED_ZONE is set.
- slub_debug sets that option)
-
-Padding <address> : <bytes>
- Unused data to fill up the space in order to get the next object
- properly aligned. In the debug case we make sure that there are
- at least 4 bytes of padding. This allows the detection of writes
- before the object.
-
-3. A stackdump
-
-The stackdump describes the location where the error was detected. The cause
-of the corruption is may be more likely found by looking at the function that
-allocated or freed the object.
-
-4. Report on how the problem was dealt with in order to ensure the continued
-operation of the system.
-
-These are messages in the system log beginning with
-
-FIX <slab cache affected>: <corrective action taken>
-
-In the above sample SLUB found that the Redzone of an active object has
-been overwritten. Here a string of 8 characters was written into a slab that
-has the length of 8 characters. However, a 8 character string needs a
-terminating 0. That zero has overwritten the first byte of the Redzone field.
-After reporting the details of the issue encountered the FIX SLUB message
-tells us that SLUB has restored the Redzone to its proper value and then
-system operations continue.
-
-Emergency operations:
----------------------
-
-Minimal debugging (sanity checks alone) can be enabled by booting with
-
- slub_debug=F
-
-This will be generally be enough to enable the resiliency features of slub
-which will keep the system running even if a bad kernel component will
-keep corrupting objects. This may be important for production systems.
-Performance will be impacted by the sanity checks and there will be a
-continual stream of error messages to the syslog but no additional memory
-will be used (unlike full debugging).
-
-No guarantees. The kernel component still needs to be fixed. Performance
-may be optimized further by locating the slab that experiences corruption
-and enabling debugging only for that cache
-
-I.e.
-
- slub_debug=F,dentry
-
-If the corruption occurs by writing after the end of the object then it
-may be advisable to enable a Redzone to avoid corrupting the beginning
-of other objects.
-
- slub_debug=FZ,dentry
-
-Extended slabinfo mode and plotting
------------------------------------
-
-The slabinfo tool has a special 'extended' ('-X') mode that includes:
- - Slabcache Totals
- - Slabs sorted by size (up to -N <num> slabs, default 1)
- - Slabs sorted by loss (up to -N <num> slabs, default 1)
-
-Additionally, in this mode slabinfo does not dynamically scale sizes (G/M/K)
-and reports everything in bytes (this functionality is also available to
-other slabinfo modes via '-B' option) which makes reporting more precise and
-accurate. Moreover, in some sense the `-X' mode also simplifies the analysis
-of slabs' behaviour, because its output can be plotted using the
-slabinfo-gnuplot.sh script. So it pushes the analysis from looking through
-the numbers (tons of numbers) to something easier -- visual analysis.
-
-To generate plots:
-a) collect slabinfo extended records, for example:
-
- while [ 1 ]; do slabinfo -X >> FOO_STATS; sleep 1; done
-
-b) pass stats file(-s) to slabinfo-gnuplot.sh script:
- slabinfo-gnuplot.sh FOO_STATS [FOO_STATS2 .. FOO_STATSN]
-
-The slabinfo-gnuplot.sh script will pre-processes the collected records
-and generates 3 png files (and 3 pre-processing cache files) per STATS
-file:
- - Slabcache Totals: FOO_STATS-totals.png
- - Slabs sorted by size: FOO_STATS-slabs-by-size.png
- - Slabs sorted by loss: FOO_STATS-slabs-by-loss.png
-
-Another use case, when slabinfo-gnuplot can be useful, is when you need
-to compare slabs' behaviour "prior to" and "after" some code modification.
-To help you out there, slabinfo-gnuplot.sh script can 'merge' the
-`Slabcache Totals` sections from different measurements. To visually
-compare N plots:
-
-a) Collect as many STATS1, STATS2, .. STATSN files as you need
- while [ 1 ]; do slabinfo -X >> STATS<X>; sleep 1; done
-
-b) Pre-process those STATS files
- slabinfo-gnuplot.sh STATS1 STATS2 .. STATSN
-
-c) Execute slabinfo-gnuplot.sh in '-t' mode, passing all of the
-generated pre-processed *-totals
- slabinfo-gnuplot.sh -t STATS1-totals STATS2-totals .. STATSN-totals
-
-This will produce a single plot (png file).
-
-Plots, expectedly, can be large so some fluctuations or small spikes
-can go unnoticed. To deal with that, `slabinfo-gnuplot.sh' has two
-options to 'zoom-in'/'zoom-out':
- a) -s %d,%d overwrites the default image width and heigh
- b) -r %d,%d specifies a range of samples to use (for example,
- in `slabinfo -X >> FOO_STATS; sleep 1;' case, using
- a "-r 40,60" range will plot only samples collected
- between 40th and 60th seconds).
-
-Christoph Lameter, May 30, 2007
-Sergey Senozhatsky, October 23, 2015
diff --git a/Documentation/vm/split_page_table_lock b/Documentation/vm/split_page_table_lock.rst
index 62842a857dab..889b00be469f 100644
--- a/Documentation/vm/split_page_table_lock
+++ b/Documentation/vm/split_page_table_lock.rst
@@ -1,3 +1,6 @@
+.. _split_page_table_lock:
+
+=====================
Split page table lock
=====================
@@ -11,6 +14,7 @@ access to the table. At the moment we use split lock for PTE and PMD
tables. Access to higher level tables protected by mm->page_table_lock.
There are helpers to lock/unlock a table and other accessor functions:
+
- pte_offset_map_lock()
maps pte and takes PTE table lock, returns pointer to the taken
lock;
@@ -34,12 +38,13 @@ Split page table lock for PMD tables is enabled, if it's enabled for PTE
tables and the architecture supports it (see below).
Hugetlb and split page table lock
----------------------------------
+=================================
Hugetlb can support several page sizes. We use split lock only for PMD
level, but not for PUD.
Hugetlb-specific helpers:
+
- huge_pte_lock()
takes pmd split lock for PMD_SIZE page, mm->page_table_lock
otherwise;
@@ -47,7 +52,7 @@ Hugetlb-specific helpers:
returns pointer to table lock;
Support of split page table lock by an architecture
----------------------------------------------------
+===================================================
There's no need in special enabling of PTE split page table lock:
everything required is done by pgtable_page_ctor() and pgtable_page_dtor(),
@@ -73,7 +78,7 @@ NOTE: pgtable_page_ctor() and pgtable_pmd_page_ctor() can fail -- it must
be handled properly.
page->ptl
----------
+=========
page->ptl is used to access split page table lock, where 'page' is struct
page of page containing the table. It shares storage with page->private
@@ -81,6 +86,7 @@ page of page containing the table. It shares storage with page->private
To avoid increasing size of struct page and have best performance, we use a
trick:
+
- if spinlock_t fits into long, we use page->ptr as spinlock, so we
can avoid indirect access and save a cache line.
- if size of spinlock_t is bigger then size of long, we use page->ptl as
diff --git a/Documentation/vm/swap_numa.txt b/Documentation/vm/swap_numa.rst
index d5960c9124f5..e0466f2db8fa 100644
--- a/Documentation/vm/swap_numa.txt
+++ b/Documentation/vm/swap_numa.rst
@@ -1,5 +1,8 @@
+.. _swap_numa:
+
+===========================================
Automatically bind swap device to numa node
--------------------------------------------
+===========================================
If the system has more than one swap device and swap device has the node
information, we can make use of this information to decide which swap
@@ -7,15 +10,16 @@ device to use in get_swap_pages() to get better performance.
How to use this feature
------------------------
+=======================
Swap device has priority and that decides the order of it to be used. To make
use of automatically binding, there is no need to manipulate priority settings
for swap devices. e.g. on a 2 node machine, assume 2 swap devices swapA and
swapB, with swapA attached to node 0 and swapB attached to node 1, are going
-to be swapped on. Simply swapping them on by doing:
-# swapon /dev/swapA
-# swapon /dev/swapB
+to be swapped on. Simply swapping them on by doing::
+
+ # swapon /dev/swapA
+ # swapon /dev/swapB
Then node 0 will use the two swap devices in the order of swapA then swapB and
node 1 will use the two swap devices in the order of swapB then swapA. Note
@@ -24,32 +28,39 @@ that the order of them being swapped on doesn't matter.
A more complex example on a 4 node machine. Assume 6 swap devices are going to
be swapped on: swapA and swapB are attached to node 0, swapC is attached to
node 1, swapD and swapE are attached to node 2 and swapF is attached to node3.
-The way to swap them on is the same as above:
-# swapon /dev/swapA
-# swapon /dev/swapB
-# swapon /dev/swapC
-# swapon /dev/swapD
-# swapon /dev/swapE
-# swapon /dev/swapF
-
-Then node 0 will use them in the order of:
-swapA/swapB -> swapC -> swapD -> swapE -> swapF
+The way to swap them on is the same as above::
+
+ # swapon /dev/swapA
+ # swapon /dev/swapB
+ # swapon /dev/swapC
+ # swapon /dev/swapD
+ # swapon /dev/swapE
+ # swapon /dev/swapF
+
+Then node 0 will use them in the order of::
+
+ swapA/swapB -> swapC -> swapD -> swapE -> swapF
+
swapA and swapB will be used in a round robin mode before any other swap device.
-node 1 will use them in the order of:
-swapC -> swapA -> swapB -> swapD -> swapE -> swapF
+node 1 will use them in the order of::
+
+ swapC -> swapA -> swapB -> swapD -> swapE -> swapF
+
+node 2 will use them in the order of::
+
+ swapD/swapE -> swapA -> swapB -> swapC -> swapF
-node 2 will use them in the order of:
-swapD/swapE -> swapA -> swapB -> swapC -> swapF
Similaly, swapD and swapE will be used in a round robin mode before any
other swap devices.
-node 3 will use them in the order of:
-swapF -> swapA -> swapB -> swapC -> swapD -> swapE
+node 3 will use them in the order of::
+
+ swapF -> swapA -> swapB -> swapC -> swapD -> swapE
Implementation details
-----------------------
+======================
The current code uses a priority based list, swap_avail_list, to decide
which swap device to use and if multiple swap devices share the same
diff --git a/Documentation/vm/transhuge.rst b/Documentation/vm/transhuge.rst
new file mode 100644
index 000000000000..a8cf6809e36e
--- /dev/null
+++ b/Documentation/vm/transhuge.rst
@@ -0,0 +1,197 @@
+.. _transhuge:
+
+============================
+Transparent Hugepage Support
+============================
+
+This document describes design principles Transparent Hugepage (THP)
+Support and its interaction with other parts of the memory management.
+
+Design principles
+=================
+
+- "graceful fallback": mm components which don't have transparent hugepage
+ knowledge fall back to breaking huge pmd mapping into table of ptes and,
+ if necessary, split a transparent hugepage. Therefore these components
+ can continue working on the regular pages or regular pte mappings.
+
+- if a hugepage allocation fails because of memory fragmentation,
+ regular pages should be gracefully allocated instead and mixed in
+ the same vma without any failure or significant delay and without
+ userland noticing
+
+- if some task quits and more hugepages become available (either
+ immediately in the buddy or through the VM), guest physical memory
+ backed by regular pages should be relocated on hugepages
+ automatically (with khugepaged)
+
+- it doesn't require memory reservation and in turn it uses hugepages
+ whenever possible (the only possible reservation here is kernelcore=
+ to avoid unmovable pages to fragment all the memory but such a tweak
+ is not specific to transparent hugepage support and it's a generic
+ feature that applies to all dynamic high order allocations in the
+ kernel)
+
+get_user_pages and follow_page
+==============================
+
+get_user_pages and follow_page if run on a hugepage, will return the
+head or tail pages as usual (exactly as they would do on
+hugetlbfs). Most gup users will only care about the actual physical
+address of the page and its temporary pinning to release after the I/O
+is complete, so they won't ever notice the fact the page is huge. But
+if any driver is going to mangle over the page structure of the tail
+page (like for checking page->mapping or other bits that are relevant
+for the head page and not the tail page), it should be updated to jump
+to check head page instead. Taking reference on any head/tail page would
+prevent page from being split by anyone.
+
+.. note::
+ these aren't new constraints to the GUP API, and they match the
+ same constrains that applies to hugetlbfs too, so any driver capable
+ of handling GUP on hugetlbfs will also work fine on transparent
+ hugepage backed mappings.
+
+In case you can't handle compound pages if they're returned by
+follow_page, the FOLL_SPLIT bit can be specified as parameter to
+follow_page, so that it will split the hugepages before returning
+them. Migration for example passes FOLL_SPLIT as parameter to
+follow_page because it's not hugepage aware and in fact it can't work
+at all on hugetlbfs (but it instead works fine on transparent
+hugepages thanks to FOLL_SPLIT). migration simply can't deal with
+hugepages being returned (as it's not only checking the pfn of the
+page and pinning it during the copy but it pretends to migrate the
+memory in regular page sizes and with regular pte/pmd mappings).
+
+Graceful fallback
+=================
+
+Code walking pagetables but unaware about huge pmds can simply call
+split_huge_pmd(vma, pmd, addr) where the pmd is the one returned by
+pmd_offset. It's trivial to make the code transparent hugepage aware
+by just grepping for "pmd_offset" and adding split_huge_pmd where
+missing after pmd_offset returns the pmd. Thanks to the graceful
+fallback design, with a one liner change, you can avoid to write
+hundred if not thousand of lines of complex code to make your code
+hugepage aware.
+
+If you're not walking pagetables but you run into a physical hugepage
+but you can't handle it natively in your code, you can split it by
+calling split_huge_page(page). This is what the Linux VM does before
+it tries to swapout the hugepage for example. split_huge_page() can fail
+if the page is pinned and you must handle this correctly.
+
+Example to make mremap.c transparent hugepage aware with a one liner
+change::
+
+ diff --git a/mm/mremap.c b/mm/mremap.c
+ --- a/mm/mremap.c
+ +++ b/mm/mremap.c
+ @@ -41,6 +41,7 @@ static pmd_t *get_old_pmd(struct mm_stru
+ return NULL;
+
+ pmd = pmd_offset(pud, addr);
+ + split_huge_pmd(vma, pmd, addr);
+ if (pmd_none_or_clear_bad(pmd))
+ return NULL;
+
+Locking in hugepage aware code
+==============================
+
+We want as much code as possible hugepage aware, as calling
+split_huge_page() or split_huge_pmd() has a cost.
+
+To make pagetable walks huge pmd aware, all you need to do is to call
+pmd_trans_huge() on the pmd returned by pmd_offset. You must hold the
+mmap_sem in read (or write) mode to be sure an huge pmd cannot be
+created from under you by khugepaged (khugepaged collapse_huge_page
+takes the mmap_sem in write mode in addition to the anon_vma lock). If
+pmd_trans_huge returns false, you just fallback in the old code
+paths. If instead pmd_trans_huge returns true, you have to take the
+page table lock (pmd_lock()) and re-run pmd_trans_huge. Taking the
+page table lock will prevent the huge pmd to be converted into a
+regular pmd from under you (split_huge_pmd can run in parallel to the
+pagetable walk). If the second pmd_trans_huge returns false, you
+should just drop the page table lock and fallback to the old code as
+before. Otherwise you can proceed to process the huge pmd and the
+hugepage natively. Once finished you can drop the page table lock.
+
+Refcounts and transparent huge pages
+====================================
+
+Refcounting on THP is mostly consistent with refcounting on other compound
+pages:
+
+ - get_page()/put_page() and GUP operate in head page's ->_refcount.
+
+ - ->_refcount in tail pages is always zero: get_page_unless_zero() never
+ succeed on tail pages.
+
+ - map/unmap of the pages with PTE entry increment/decrement ->_mapcount
+ on relevant sub-page of the compound page.
+
+ - map/unmap of the whole compound page accounted in compound_mapcount
+ (stored in first tail page). For file huge pages, we also increment
+ ->_mapcount of all sub-pages in order to have race-free detection of
+ last unmap of subpages.
+
+PageDoubleMap() indicates that the page is *possibly* mapped with PTEs.
+
+For anonymous pages PageDoubleMap() also indicates ->_mapcount in all
+subpages is offset up by one. This additional reference is required to
+get race-free detection of unmap of subpages when we have them mapped with
+both PMDs and PTEs.
+
+This is optimization required to lower overhead of per-subpage mapcount
+tracking. The alternative is alter ->_mapcount in all subpages on each
+map/unmap of the whole compound page.
+
+For anonymous pages, we set PG_double_map when a PMD of the page got split
+for the first time, but still have PMD mapping. The additional references
+go away with last compound_mapcount.
+
+File pages get PG_double_map set on first map of the page with PTE and
+goes away when the page gets evicted from page cache.
+
+split_huge_page internally has to distribute the refcounts in the head
+page to the tail pages before clearing all PG_head/tail bits from the page
+structures. It can be done easily for refcounts taken by page table
+entries. But we don't have enough information on how to distribute any
+additional pins (i.e. from get_user_pages). split_huge_page() fails any
+requests to split pinned huge page: it expects page count to be equal to
+sum of mapcount of all sub-pages plus one (split_huge_page caller must
+have reference for head page).
+
+split_huge_page uses migration entries to stabilize page->_refcount and
+page->_mapcount of anonymous pages. File pages just got unmapped.
+
+We safe against physical memory scanners too: the only legitimate way
+scanner can get reference to a page is get_page_unless_zero().
+
+All tail pages have zero ->_refcount until atomic_add(). This prevents the
+scanner from getting a reference to the tail page up to that point. After the
+atomic_add() we don't care about the ->_refcount value. We already known how
+many references should be uncharged from the head page.
+
+For head page get_page_unless_zero() will succeed and we don't mind. It's
+clear where reference should go after split: it will stay on head page.
+
+Note that split_huge_pmd() doesn't have any limitation on refcounting:
+pmd can be split at any point and never fails.
+
+Partial unmap and deferred_split_huge_page()
+============================================
+
+Unmapping part of THP (with munmap() or other way) is not going to free
+memory immediately. Instead, we detect that a subpage of THP is not in use
+in page_remove_rmap() and queue the THP for splitting if memory pressure
+comes. Splitting will free up unused subpages.
+
+Splitting the page right away is not an option due to locking context in
+the place where we can detect partial unmap. It's also might be
+counterproductive since in many cases partial unmap happens during exit(2) if
+a THP crosses a VMA boundary.
+
+Function deferred_split_huge_page() is used to queue page for splitting.
+The splitting itself will happen when we get memory pressure via shrinker
+interface.
diff --git a/Documentation/vm/transhuge.txt b/Documentation/vm/transhuge.txt
deleted file mode 100644
index 4dde03b44ad1..000000000000
--- a/Documentation/vm/transhuge.txt
+++ /dev/null
@@ -1,527 +0,0 @@
-= Transparent Hugepage Support =
-
-== Objective ==
-
-Performance critical computing applications dealing with large memory
-working sets are already running on top of libhugetlbfs and in turn
-hugetlbfs. Transparent Hugepage Support is an alternative means of
-using huge pages for the backing of virtual memory with huge pages
-that supports the automatic promotion and demotion of page sizes and
-without the shortcomings of hugetlbfs.
-
-Currently it only works for anonymous memory mappings and tmpfs/shmem.
-But in the future it can expand to other filesystems.
-
-The reason applications are running faster is because of two
-factors. The first factor is almost completely irrelevant and it's not
-of significant interest because it'll also have the downside of
-requiring larger clear-page copy-page in page faults which is a
-potentially negative effect. The first factor consists in taking a
-single page fault for each 2M virtual region touched by userland (so
-reducing the enter/exit kernel frequency by a 512 times factor). This
-only matters the first time the memory is accessed for the lifetime of
-a memory mapping. The second long lasting and much more important
-factor will affect all subsequent accesses to the memory for the whole
-runtime of the application. The second factor consist of two
-components: 1) the TLB miss will run faster (especially with
-virtualization using nested pagetables but almost always also on bare
-metal without virtualization) and 2) a single TLB entry will be
-mapping a much larger amount of virtual memory in turn reducing the
-number of TLB misses. With virtualization and nested pagetables the
-TLB can be mapped of larger size only if both KVM and the Linux guest
-are using hugepages but a significant speedup already happens if only
-one of the two is using hugepages just because of the fact the TLB
-miss is going to run faster.
-
-== Design ==
-
-- "graceful fallback": mm components which don't have transparent hugepage
- knowledge fall back to breaking huge pmd mapping into table of ptes and,
- if necessary, split a transparent hugepage. Therefore these components
- can continue working on the regular pages or regular pte mappings.
-
-- if a hugepage allocation fails because of memory fragmentation,
- regular pages should be gracefully allocated instead and mixed in
- the same vma without any failure or significant delay and without
- userland noticing
-
-- if some task quits and more hugepages become available (either
- immediately in the buddy or through the VM), guest physical memory
- backed by regular pages should be relocated on hugepages
- automatically (with khugepaged)
-
-- it doesn't require memory reservation and in turn it uses hugepages
- whenever possible (the only possible reservation here is kernelcore=
- to avoid unmovable pages to fragment all the memory but such a tweak
- is not specific to transparent hugepage support and it's a generic
- feature that applies to all dynamic high order allocations in the
- kernel)
-
-Transparent Hugepage Support maximizes the usefulness of free memory
-if compared to the reservation approach of hugetlbfs by allowing all
-unused memory to be used as cache or other movable (or even unmovable
-entities). It doesn't require reservation to prevent hugepage
-allocation failures to be noticeable from userland. It allows paging
-and all other advanced VM features to be available on the
-hugepages. It requires no modifications for applications to take
-advantage of it.
-
-Applications however can be further optimized to take advantage of
-this feature, like for example they've been optimized before to avoid
-a flood of mmap system calls for every malloc(4k). Optimizing userland
-is by far not mandatory and khugepaged already can take care of long
-lived page allocations even for hugepage unaware applications that
-deals with large amounts of memory.
-
-In certain cases when hugepages are enabled system wide, application
-may end up allocating more memory resources. An application may mmap a
-large region but only touch 1 byte of it, in that case a 2M page might
-be allocated instead of a 4k page for no good. This is why it's
-possible to disable hugepages system-wide and to only have them inside
-MADV_HUGEPAGE madvise regions.
-
-Embedded systems should enable hugepages only inside madvise regions
-to eliminate any risk of wasting any precious byte of memory and to
-only run faster.
-
-Applications that gets a lot of benefit from hugepages and that don't
-risk to lose memory by using hugepages, should use
-madvise(MADV_HUGEPAGE) on their critical mmapped regions.
-
-== sysfs ==
-
-Transparent Hugepage Support for anonymous memory can be entirely disabled
-(mostly for debugging purposes) or only enabled inside MADV_HUGEPAGE
-regions (to avoid the risk of consuming more memory resources) or enabled
-system wide. This can be achieved with one of:
-
-echo always >/sys/kernel/mm/transparent_hugepage/enabled
-echo madvise >/sys/kernel/mm/transparent_hugepage/enabled
-echo never >/sys/kernel/mm/transparent_hugepage/enabled
-
-It's also possible to limit defrag efforts in the VM to generate
-anonymous hugepages in case they're not immediately free to madvise
-regions or to never try to defrag memory and simply fallback to regular
-pages unless hugepages are immediately available. Clearly if we spend CPU
-time to defrag memory, we would expect to gain even more by the fact we
-use hugepages later instead of regular pages. This isn't always
-guaranteed, but it may be more likely in case the allocation is for a
-MADV_HUGEPAGE region.
-
-echo always >/sys/kernel/mm/transparent_hugepage/defrag
-echo defer >/sys/kernel/mm/transparent_hugepage/defrag
-echo defer+madvise >/sys/kernel/mm/transparent_hugepage/defrag
-echo madvise >/sys/kernel/mm/transparent_hugepage/defrag
-echo never >/sys/kernel/mm/transparent_hugepage/defrag
-
-"always" means that an application requesting THP will stall on allocation
-failure and directly reclaim pages and compact memory in an effort to
-allocate a THP immediately. This may be desirable for virtual machines
-that benefit heavily from THP use and are willing to delay the VM start
-to utilise them.
-
-"defer" means that an application will wake kswapd in the background
-to reclaim pages and wake kcompactd to compact memory so that THP is
-available in the near future. It's the responsibility of khugepaged
-to then install the THP pages later.
-
-"defer+madvise" will enter direct reclaim and compaction like "always", but
-only for regions that have used madvise(MADV_HUGEPAGE); all other regions
-will wake kswapd in the background to reclaim pages and wake kcompactd to
-compact memory so that THP is available in the near future.
-
-"madvise" will enter direct reclaim like "always" but only for regions
-that are have used madvise(MADV_HUGEPAGE). This is the default behaviour.
-
-"never" should be self-explanatory.
-
-By default kernel tries to use huge zero page on read page fault to
-anonymous mapping. It's possible to disable huge zero page by writing 0
-or enable it back by writing 1:
-
-echo 0 >/sys/kernel/mm/transparent_hugepage/use_zero_page
-echo 1 >/sys/kernel/mm/transparent_hugepage/use_zero_page
-
-Some userspace (such as a test program, or an optimized memory allocation
-library) may want to know the size (in bytes) of a transparent hugepage:
-
-cat /sys/kernel/mm/transparent_hugepage/hpage_pmd_size
-
-khugepaged will be automatically started when
-transparent_hugepage/enabled is set to "always" or "madvise, and it'll
-be automatically shutdown if it's set to "never".
-
-khugepaged runs usually at low frequency so while one may not want to
-invoke defrag algorithms synchronously during the page faults, it
-should be worth invoking defrag at least in khugepaged. However it's
-also possible to disable defrag in khugepaged by writing 0 or enable
-defrag in khugepaged by writing 1:
-
-echo 0 >/sys/kernel/mm/transparent_hugepage/khugepaged/defrag
-echo 1 >/sys/kernel/mm/transparent_hugepage/khugepaged/defrag
-
-You can also control how many pages khugepaged should scan at each
-pass:
-
-/sys/kernel/mm/transparent_hugepage/khugepaged/pages_to_scan
-
-and how many milliseconds to wait in khugepaged between each pass (you
-can set this to 0 to run khugepaged at 100% utilization of one core):
-
-/sys/kernel/mm/transparent_hugepage/khugepaged/scan_sleep_millisecs
-
-and how many milliseconds to wait in khugepaged if there's an hugepage
-allocation failure to throttle the next allocation attempt.
-
-/sys/kernel/mm/transparent_hugepage/khugepaged/alloc_sleep_millisecs
-
-The khugepaged progress can be seen in the number of pages collapsed:
-
-/sys/kernel/mm/transparent_hugepage/khugepaged/pages_collapsed
-
-for each pass:
-
-/sys/kernel/mm/transparent_hugepage/khugepaged/full_scans
-
-max_ptes_none specifies how many extra small pages (that are
-not already mapped) can be allocated when collapsing a group
-of small pages into one large page.
-
-/sys/kernel/mm/transparent_hugepage/khugepaged/max_ptes_none
-
-A higher value leads to use additional memory for programs.
-A lower value leads to gain less thp performance. Value of
-max_ptes_none can waste cpu time very little, you can
-ignore it.
-
-max_ptes_swap specifies how many pages can be brought in from
-swap when collapsing a group of pages into a transparent huge page.
-
-/sys/kernel/mm/transparent_hugepage/khugepaged/max_ptes_swap
-
-A higher value can cause excessive swap IO and waste
-memory. A lower value can prevent THPs from being
-collapsed, resulting fewer pages being collapsed into
-THPs, and lower memory access performance.
-
-== Boot parameter ==
-
-You can change the sysfs boot time defaults of Transparent Hugepage
-Support by passing the parameter "transparent_hugepage=always" or
-"transparent_hugepage=madvise" or "transparent_hugepage=never"
-(without "") to the kernel command line.
-
-== Hugepages in tmpfs/shmem ==
-
-You can control hugepage allocation policy in tmpfs with mount option
-"huge=". It can have following values:
-
- - "always":
- Attempt to allocate huge pages every time we need a new page;
-
- - "never":
- Do not allocate huge pages;
-
- - "within_size":
- Only allocate huge page if it will be fully within i_size.
- Also respect fadvise()/madvise() hints;
-
- - "advise:
- Only allocate huge pages if requested with fadvise()/madvise();
-
-The default policy is "never".
-
-"mount -o remount,huge= /mountpoint" works fine after mount: remounting
-huge=never will not attempt to break up huge pages at all, just stop more
-from being allocated.
-
-There's also sysfs knob to control hugepage allocation policy for internal
-shmem mount: /sys/kernel/mm/transparent_hugepage/shmem_enabled. The mount
-is used for SysV SHM, memfds, shared anonymous mmaps (of /dev/zero or
-MAP_ANONYMOUS), GPU drivers' DRM objects, Ashmem.
-
-In addition to policies listed above, shmem_enabled allows two further
-values:
-
- - "deny":
- For use in emergencies, to force the huge option off from
- all mounts;
- - "force":
- Force the huge option on for all - very useful for testing;
-
-== Need of application restart ==
-
-The transparent_hugepage/enabled values and tmpfs mount option only affect
-future behavior. So to make them effective you need to restart any
-application that could have been using hugepages. This also applies to the
-regions registered in khugepaged.
-
-== Monitoring usage ==
-
-The number of anonymous transparent huge pages currently used by the
-system is available by reading the AnonHugePages field in /proc/meminfo.
-To identify what applications are using anonymous transparent huge pages,
-it is necessary to read /proc/PID/smaps and count the AnonHugePages fields
-for each mapping.
-
-The number of file transparent huge pages mapped to userspace is available
-by reading ShmemPmdMapped and ShmemHugePages fields in /proc/meminfo.
-To identify what applications are mapping file transparent huge pages, it
-is necessary to read /proc/PID/smaps and count the FileHugeMapped fields
-for each mapping.
-
-Note that reading the smaps file is expensive and reading it
-frequently will incur overhead.
-
-There are a number of counters in /proc/vmstat that may be used to
-monitor how successfully the system is providing huge pages for use.
-
-thp_fault_alloc is incremented every time a huge page is successfully
- allocated to handle a page fault. This applies to both the
- first time a page is faulted and for COW faults.
-
-thp_collapse_alloc is incremented by khugepaged when it has found
- a range of pages to collapse into one huge page and has
- successfully allocated a new huge page to store the data.
-
-thp_fault_fallback is incremented if a page fault fails to allocate
- a huge page and instead falls back to using small pages.
-
-thp_collapse_alloc_failed is incremented if khugepaged found a range
- of pages that should be collapsed into one huge page but failed
- the allocation.
-
-thp_file_alloc is incremented every time a file huge page is successfully
- allocated.
-
-thp_file_mapped is incremented every time a file huge page is mapped into
- user address space.
-
-thp_split_page is incremented every time a huge page is split into base
- pages. This can happen for a variety of reasons but a common
- reason is that a huge page is old and is being reclaimed.
- This action implies splitting all PMD the page mapped with.
-
-thp_split_page_failed is incremented if kernel fails to split huge
- page. This can happen if the page was pinned by somebody.
-
-thp_deferred_split_page is incremented when a huge page is put onto split
- queue. This happens when a huge page is partially unmapped and
- splitting it would free up some memory. Pages on split queue are
- going to be split under memory pressure.
-
-thp_split_pmd is incremented every time a PMD split into table of PTEs.
- This can happen, for instance, when application calls mprotect() or
- munmap() on part of huge page. It doesn't split huge page, only
- page table entry.
-
-thp_zero_page_alloc is incremented every time a huge zero page is
- successfully allocated. It includes allocations which where
- dropped due race with other allocation. Note, it doesn't count
- every map of the huge zero page, only its allocation.
-
-thp_zero_page_alloc_failed is incremented if kernel fails to allocate
- huge zero page and falls back to using small pages.
-
-As the system ages, allocating huge pages may be expensive as the
-system uses memory compaction to copy data around memory to free a
-huge page for use. There are some counters in /proc/vmstat to help
-monitor this overhead.
-
-compact_stall is incremented every time a process stalls to run
- memory compaction so that a huge page is free for use.
-
-compact_success is incremented if the system compacted memory and
- freed a huge page for use.
-
-compact_fail is incremented if the system tries to compact memory
- but failed.
-
-compact_pages_moved is incremented each time a page is moved. If
- this value is increasing rapidly, it implies that the system
- is copying a lot of data to satisfy the huge page allocation.
- It is possible that the cost of copying exceeds any savings
- from reduced TLB misses.
-
-compact_pagemigrate_failed is incremented when the underlying mechanism
- for moving a page failed.
-
-compact_blocks_moved is incremented each time memory compaction examines
- a huge page aligned range of pages.
-
-It is possible to establish how long the stalls were using the function
-tracer to record how long was spent in __alloc_pages_nodemask and
-using the mm_page_alloc tracepoint to identify which allocations were
-for huge pages.
-
-== get_user_pages and follow_page ==
-
-get_user_pages and follow_page if run on a hugepage, will return the
-head or tail pages as usual (exactly as they would do on
-hugetlbfs). Most gup users will only care about the actual physical
-address of the page and its temporary pinning to release after the I/O
-is complete, so they won't ever notice the fact the page is huge. But
-if any driver is going to mangle over the page structure of the tail
-page (like for checking page->mapping or other bits that are relevant
-for the head page and not the tail page), it should be updated to jump
-to check head page instead. Taking reference on any head/tail page would
-prevent page from being split by anyone.
-
-NOTE: these aren't new constraints to the GUP API, and they match the
-same constrains that applies to hugetlbfs too, so any driver capable
-of handling GUP on hugetlbfs will also work fine on transparent
-hugepage backed mappings.
-
-In case you can't handle compound pages if they're returned by
-follow_page, the FOLL_SPLIT bit can be specified as parameter to
-follow_page, so that it will split the hugepages before returning
-them. Migration for example passes FOLL_SPLIT as parameter to
-follow_page because it's not hugepage aware and in fact it can't work
-at all on hugetlbfs (but it instead works fine on transparent
-hugepages thanks to FOLL_SPLIT). migration simply can't deal with
-hugepages being returned (as it's not only checking the pfn of the
-page and pinning it during the copy but it pretends to migrate the
-memory in regular page sizes and with regular pte/pmd mappings).
-
-== Optimizing the applications ==
-
-To be guaranteed that the kernel will map a 2M page immediately in any
-memory region, the mmap region has to be hugepage naturally
-aligned. posix_memalign() can provide that guarantee.
-
-== Hugetlbfs ==
-
-You can use hugetlbfs on a kernel that has transparent hugepage
-support enabled just fine as always. No difference can be noted in
-hugetlbfs other than there will be less overall fragmentation. All
-usual features belonging to hugetlbfs are preserved and
-unaffected. libhugetlbfs will also work fine as usual.
-
-== Graceful fallback ==
-
-Code walking pagetables but unaware about huge pmds can simply call
-split_huge_pmd(vma, pmd, addr) where the pmd is the one returned by
-pmd_offset. It's trivial to make the code transparent hugepage aware
-by just grepping for "pmd_offset" and adding split_huge_pmd where
-missing after pmd_offset returns the pmd. Thanks to the graceful
-fallback design, with a one liner change, you can avoid to write
-hundred if not thousand of lines of complex code to make your code
-hugepage aware.
-
-If you're not walking pagetables but you run into a physical hugepage
-but you can't handle it natively in your code, you can split it by
-calling split_huge_page(page). This is what the Linux VM does before
-it tries to swapout the hugepage for example. split_huge_page() can fail
-if the page is pinned and you must handle this correctly.
-
-Example to make mremap.c transparent hugepage aware with a one liner
-change:
-
-diff --git a/mm/mremap.c b/mm/mremap.c
---- a/mm/mremap.c
-+++ b/mm/mremap.c
-@@ -41,6 +41,7 @@ static pmd_t *get_old_pmd(struct mm_stru
- return NULL;
-
- pmd = pmd_offset(pud, addr);
-+ split_huge_pmd(vma, pmd, addr);
- if (pmd_none_or_clear_bad(pmd))
- return NULL;
-
-== Locking in hugepage aware code ==
-
-We want as much code as possible hugepage aware, as calling
-split_huge_page() or split_huge_pmd() has a cost.
-
-To make pagetable walks huge pmd aware, all you need to do is to call
-pmd_trans_huge() on the pmd returned by pmd_offset. You must hold the
-mmap_sem in read (or write) mode to be sure an huge pmd cannot be
-created from under you by khugepaged (khugepaged collapse_huge_page
-takes the mmap_sem in write mode in addition to the anon_vma lock). If
-pmd_trans_huge returns false, you just fallback in the old code
-paths. If instead pmd_trans_huge returns true, you have to take the
-page table lock (pmd_lock()) and re-run pmd_trans_huge. Taking the
-page table lock will prevent the huge pmd to be converted into a
-regular pmd from under you (split_huge_pmd can run in parallel to the
-pagetable walk). If the second pmd_trans_huge returns false, you
-should just drop the page table lock and fallback to the old code as
-before. Otherwise you can proceed to process the huge pmd and the
-hugepage natively. Once finished you can drop the page table lock.
-
-== Refcounts and transparent huge pages ==
-
-Refcounting on THP is mostly consistent with refcounting on other compound
-pages:
-
- - get_page()/put_page() and GUP operate in head page's ->_refcount.
-
- - ->_refcount in tail pages is always zero: get_page_unless_zero() never
- succeed on tail pages.
-
- - map/unmap of the pages with PTE entry increment/decrement ->_mapcount
- on relevant sub-page of the compound page.
-
- - map/unmap of the whole compound page accounted in compound_mapcount
- (stored in first tail page). For file huge pages, we also increment
- ->_mapcount of all sub-pages in order to have race-free detection of
- last unmap of subpages.
-
-PageDoubleMap() indicates that the page is *possibly* mapped with PTEs.
-
-For anonymous pages PageDoubleMap() also indicates ->_mapcount in all
-subpages is offset up by one. This additional reference is required to
-get race-free detection of unmap of subpages when we have them mapped with
-both PMDs and PTEs.
-
-This is optimization required to lower overhead of per-subpage mapcount
-tracking. The alternative is alter ->_mapcount in all subpages on each
-map/unmap of the whole compound page.
-
-For anonymous pages, we set PG_double_map when a PMD of the page got split
-for the first time, but still have PMD mapping. The additional references
-go away with last compound_mapcount.
-
-File pages get PG_double_map set on first map of the page with PTE and
-goes away when the page gets evicted from page cache.
-
-split_huge_page internally has to distribute the refcounts in the head
-page to the tail pages before clearing all PG_head/tail bits from the page
-structures. It can be done easily for refcounts taken by page table
-entries. But we don't have enough information on how to distribute any
-additional pins (i.e. from get_user_pages). split_huge_page() fails any
-requests to split pinned huge page: it expects page count to be equal to
-sum of mapcount of all sub-pages plus one (split_huge_page caller must
-have reference for head page).
-
-split_huge_page uses migration entries to stabilize page->_refcount and
-page->_mapcount of anonymous pages. File pages just got unmapped.
-
-We safe against physical memory scanners too: the only legitimate way
-scanner can get reference to a page is get_page_unless_zero().
-
-All tail pages have zero ->_refcount until atomic_add(). This prevents the
-scanner from getting a reference to the tail page up to that point. After the
-atomic_add() we don't care about the ->_refcount value. We already known how
-many references should be uncharged from the head page.
-
-For head page get_page_unless_zero() will succeed and we don't mind. It's
-clear where reference should go after split: it will stay on head page.
-
-Note that split_huge_pmd() doesn't have any limitation on refcounting:
-pmd can be split at any point and never fails.
-
-== Partial unmap and deferred_split_huge_page() ==
-
-Unmapping part of THP (with munmap() or other way) is not going to free
-memory immediately. Instead, we detect that a subpage of THP is not in use
-in page_remove_rmap() and queue the THP for splitting if memory pressure
-comes. Splitting will free up unused subpages.
-
-Splitting the page right away is not an option due to locking context in
-the place where we can detect partial unmap. It's also might be
-counterproductive since in many cases partial unmap happens during exit(2) if
-a THP crosses a VMA boundary.
-
-Function deferred_split_huge_page() is used to queue page for splitting.
-The splitting itself will happen when we get memory pressure via shrinker
-interface.
diff --git a/Documentation/vm/unevictable-lru.txt b/Documentation/vm/unevictable-lru.rst
index e14718572476..fdd84cb8d511 100644
--- a/Documentation/vm/unevictable-lru.txt
+++ b/Documentation/vm/unevictable-lru.rst
@@ -1,37 +1,13 @@
- ==============================
- UNEVICTABLE LRU INFRASTRUCTURE
- ==============================
-
-========
-CONTENTS
-========
-
- (*) The Unevictable LRU
-
- - The unevictable page list.
- - Memory control group interaction.
- - Marking address spaces unevictable.
- - Detecting Unevictable Pages.
- - vmscan's handling of unevictable pages.
-
- (*) mlock()'d pages.
-
- - History.
- - Basic management.
- - mlock()/mlockall() system call handling.
- - Filtering special vmas.
- - munlock()/munlockall() system call handling.
- - Migrating mlocked pages.
- - Compacting mlocked pages.
- - mmap(MAP_LOCKED) system call handling.
- - munmap()/exit()/exec() system call handling.
- - try_to_unmap().
- - try_to_munlock() reverse map scan.
- - Page reclaim in shrink_*_list().
+.. _unevictable_lru:
+==============================
+Unevictable LRU Infrastructure
+==============================
-============
-INTRODUCTION
+.. contents:: :local:
+
+
+Introduction
============
This document describes the Linux memory manager's "Unevictable LRU"
@@ -46,8 +22,8 @@ details - the "what does it do?" - by reading the code. One hopes that the
descriptions below add value by provide the answer to "why does it do that?".
-===================
-THE UNEVICTABLE LRU
+
+The Unevictable LRU
===================
The Unevictable LRU facility adds an additional LRU list to track unevictable
@@ -66,17 +42,17 @@ completely unresponsive.
The unevictable list addresses the following classes of unevictable pages:
- (*) Those owned by ramfs.
+ * Those owned by ramfs.
- (*) Those mapped into SHM_LOCK'd shared memory regions.
+ * Those mapped into SHM_LOCK'd shared memory regions.
- (*) Those mapped into VM_LOCKED [mlock()ed] VMAs.
+ * Those mapped into VM_LOCKED [mlock()ed] VMAs.
The infrastructure may also be able to handle other conditions that make pages
unevictable, either by definition or by circumstance, in the future.
-THE UNEVICTABLE PAGE LIST
+The Unevictable Page List
-------------------------
The Unevictable LRU infrastructure consists of an additional, per-zone, LRU list
@@ -118,7 +94,7 @@ the unevictable list when one task has the page isolated from the LRU and other
tasks are changing the "evictability" state of the page.
-MEMORY CONTROL GROUP INTERACTION
+Memory Control Group Interaction
--------------------------------
The unevictable LRU facility interacts with the memory control group [aka
@@ -144,7 +120,9 @@ effects:
the control group to thrash or to OOM-kill tasks.
-MARKING ADDRESS SPACES UNEVICTABLE
+.. _mark_addr_space_unevict:
+
+Marking Address Spaces Unevictable
----------------------------------
For facilities such as ramfs none of the pages attached to the address space
@@ -152,15 +130,15 @@ may be evicted. To prevent eviction of any such pages, the AS_UNEVICTABLE
address space flag is provided, and this can be manipulated by a filesystem
using a number of wrapper functions:
- (*) void mapping_set_unevictable(struct address_space *mapping);
+ * ``void mapping_set_unevictable(struct address_space *mapping);``
Mark the address space as being completely unevictable.
- (*) void mapping_clear_unevictable(struct address_space *mapping);
+ * ``void mapping_clear_unevictable(struct address_space *mapping);``
Mark the address space as being evictable.
- (*) int mapping_unevictable(struct address_space *mapping);
+ * ``int mapping_unevictable(struct address_space *mapping);``
Query the address space, and return true if it is completely
unevictable.
@@ -177,12 +155,13 @@ These are currently used in two places in the kernel:
ensure they're in memory.
-DETECTING UNEVICTABLE PAGES
+Detecting Unevictable Pages
---------------------------
The function page_evictable() in vmscan.c determines whether a page is
-evictable or not using the query function outlined above [see section "Marking
-address spaces unevictable"] to check the AS_UNEVICTABLE flag.
+evictable or not using the query function outlined above [see section
+:ref:`Marking address spaces unevictable <mark_addr_space_unevict>`]
+to check the AS_UNEVICTABLE flag.
For address spaces that are so marked after being populated (as SHM regions
might be), the lock action (eg: SHM_LOCK) can be lazy, and need not populate
@@ -202,7 +181,7 @@ flag, PG_mlocked (as wrapped by PageMlocked()), which is set when a page is
faulted into a VM_LOCKED vma, or found in a vma being VM_LOCKED.
-VMSCAN'S HANDLING OF UNEVICTABLE PAGES
+Vmscan's Handling of Unevictable Pages
--------------------------------------
If unevictable pages are culled in the fault path, or moved to the unevictable
@@ -233,8 +212,7 @@ extra evictabilty checks should not occur in the majority of calls to
putback_lru_page().
-=============
-MLOCKED PAGES
+MLOCKED Pages
=============
The unevictable page list is also useful for mlock(), in addition to ramfs and
@@ -242,7 +220,7 @@ SYSV SHM. Note that mlock() is only available in CONFIG_MMU=y situations; in
NOMMU situations, all mappings are effectively mlocked.
-HISTORY
+History
-------
The "Unevictable mlocked Pages" infrastructure is based on work originally
@@ -263,7 +241,7 @@ replaced by walking the reverse map to determine whether any VM_LOCKED VMAs
mapped the page. More on this below.
-BASIC MANAGEMENT
+Basic Management
----------------
mlocked pages - pages mapped into a VM_LOCKED VMA - are a class of unevictable
@@ -304,10 +282,10 @@ mlocked pages become unlocked and rescued from the unevictable list when:
(4) before a page is COW'd in a VM_LOCKED VMA.
-mlock()/mlockall() SYSTEM CALL HANDLING
+mlock()/mlockall() System Call Handling
---------------------------------------
-Both [do_]mlock() and [do_]mlockall() system call handlers call mlock_fixup()
+Both [do\_]mlock() and [do\_]mlockall() system call handlers call mlock_fixup()
for each VMA in the range specified by the call. In the case of mlockall(),
this is the entire active address space of the task. Note that mlock_fixup()
is used for both mlocking and munlocking a range of memory. A call to mlock()
@@ -351,7 +329,7 @@ mlock_vma_page() is unable to isolate the page from the LRU, vmscan will handle
it later if and when it attempts to reclaim the page.
-FILTERING SPECIAL VMAS
+Filtering Special VMAs
----------------------
mlock_fixup() filters several classes of "special" VMAs:
@@ -379,8 +357,9 @@ VM_LOCKED flag. Therefore, we won't have to deal with them later during
munlock(), munmap() or task exit. Neither does mlock_fixup() account these
VMAs against the task's "locked_vm".
+.. _munlock_munlockall_handling:
-munlock()/munlockall() SYSTEM CALL HANDLING
+munlock()/munlockall() System Call Handling
-------------------------------------------
The munlock() and munlockall() system calls are handled by the same functions -
@@ -426,7 +405,7 @@ This is fine, because we'll catch it later if and if vmscan tries to reclaim
the page. This should be relatively rare.
-MIGRATING MLOCKED PAGES
+Migrating MLOCKED Pages
-----------------------
A page that is being migrated has been isolated from the LRU lists and is held
@@ -451,7 +430,7 @@ list because of a race between munlock and migration, page migration uses the
putback_lru_page() function to add migrated pages back to the LRU.
-COMPACTING MLOCKED PAGES
+Compacting MLOCKED Pages
------------------------
The unevictable LRU can be scanned for compactable regions and the default
@@ -461,7 +440,7 @@ unevictable LRU is enabled, the work of compaction is mostly handled by
the page migration code and the same work flow as described in MIGRATING
MLOCKED PAGES will apply.
-MLOCKING TRANSPARENT HUGE PAGES
+MLOCKING Transparent Huge Pages
-------------------------------
A transparent huge page is represented by a single entry on an LRU list.
@@ -483,7 +462,7 @@ to unevictable LRU and the rest can be reclaimed.
See also comment in follow_trans_huge_pmd().
-mmap(MAP_LOCKED) SYSTEM CALL HANDLING
+mmap(MAP_LOCKED) System Call Handling
-------------------------------------
In addition the mlock()/mlockall() system calls, an application can request
@@ -514,7 +493,7 @@ memory range accounted as locked_vm, as the protections could be changed later
and pages allocated into that region.
-munmap()/exit()/exec() SYSTEM CALL HANDLING
+munmap()/exit()/exec() System Call Handling
-------------------------------------------
When unmapping an mlocked region of memory, whether by an explicit call to
@@ -568,16 +547,18 @@ munlock or munmap system calls, mm teardown (munlock_vma_pages_all), reclaim,
holepunching, and truncation of file pages and their anonymous COWed pages.
-try_to_munlock() REVERSE MAP SCAN
+try_to_munlock() Reverse Map Scan
---------------------------------
- [!] TODO/FIXME: a better name might be page_mlocked() - analogous to the
- page_referenced() reverse map walker.
+.. warning::
+ [!] TODO/FIXME: a better name might be page_mlocked() - analogous to the
+ page_referenced() reverse map walker.
-When munlock_vma_page() [see section "munlock()/munlockall() System Call
-Handling" above] tries to munlock a page, it needs to determine whether or not
-the page is mapped by any VM_LOCKED VMA without actually attempting to unmap
-all PTEs from the page. For this purpose, the unevictable/mlock infrastructure
+When munlock_vma_page() [see section :ref:`munlock()/munlockall() System Call
+Handling <munlock_munlockall_handling>` above] tries to munlock a
+page, it needs to determine whether or not the page is mapped by any
+VM_LOCKED VMA without actually attempting to unmap all PTEs from the
+page. For this purpose, the unevictable/mlock infrastructure
introduced a variant of try_to_unmap() called try_to_munlock().
try_to_munlock() calls the same functions as try_to_unmap() for anonymous and
@@ -595,7 +576,7 @@ large region or tearing down a large address space that has been mlocked via
mlockall(), overall this is a fairly rare event.
-PAGE RECLAIM IN shrink_*_list()
+Page Reclaim in shrink_*_list()
-------------------------------
shrink_active_list() culls any obviously unevictable pages - i.e.
diff --git a/Documentation/vm/z3fold.txt b/Documentation/vm/z3fold.rst
index 38e4dac810b6..224e3c61d686 100644
--- a/Documentation/vm/z3fold.txt
+++ b/Documentation/vm/z3fold.rst
@@ -1,5 +1,8 @@
+.. _z3fold:
+
+======
z3fold
-------
+======
z3fold is a special purpose allocator for storing compressed pages.
It is designed to store up to three compressed pages per physical page.
@@ -7,6 +10,7 @@ It is a zbud derivative which allows for higher compression
ratio keeping the simplicity and determinism of its predecessor.
The main differences between z3fold and zbud are:
+
* unlike zbud, z3fold allows for up to PAGE_SIZE allocations
* z3fold can hold up to 3 compressed pages in its page
* z3fold doesn't export any API itself and is thus intended to be used
diff --git a/Documentation/vm/zsmalloc.txt b/Documentation/vm/zsmalloc.rst
index 64ed63c4f69d..6e79893d6132 100644
--- a/Documentation/vm/zsmalloc.txt
+++ b/Documentation/vm/zsmalloc.rst
@@ -1,5 +1,8 @@
+.. _zsmalloc:
+
+========
zsmalloc
---------
+========
This allocator is designed for use with zram. Thus, the allocator is
supposed to work well under low memory conditions. In particular, it
@@ -31,40 +34,49 @@ be mapped using zs_map_object() to get a usable pointer and subsequently
unmapped using zs_unmap_object().
stat
-----
+====
With CONFIG_ZSMALLOC_STAT, we could see zsmalloc internal information via
-/sys/kernel/debug/zsmalloc/<user name>. Here is a sample of stat output:
+``/sys/kernel/debug/zsmalloc/<user name>``. Here is a sample of stat output::
-# cat /sys/kernel/debug/zsmalloc/zram0/classes
+ # cat /sys/kernel/debug/zsmalloc/zram0/classes
class size almost_full almost_empty obj_allocated obj_used pages_used pages_per_zspage
- ..
- ..
+ ...
+ ...
9 176 0 1 186 129 8 4
10 192 1 0 2880 2872 135 3
11 208 0 1 819 795 42 2
12 224 0 1 219 159 12 4
- ..
- ..
+ ...
+ ...
+
+class
+ index
+size
+ object size zspage stores
+almost_empty
+ the number of ZS_ALMOST_EMPTY zspages(see below)
+almost_full
+ the number of ZS_ALMOST_FULL zspages(see below)
+obj_allocated
+ the number of objects allocated
+obj_used
+ the number of objects allocated to the user
+pages_used
+ the number of pages allocated for the class
+pages_per_zspage
+ the number of 0-order pages to make a zspage
-class: index
-size: object size zspage stores
-almost_empty: the number of ZS_ALMOST_EMPTY zspages(see below)
-almost_full: the number of ZS_ALMOST_FULL zspages(see below)
-obj_allocated: the number of objects allocated
-obj_used: the number of objects allocated to the user
-pages_used: the number of pages allocated for the class
-pages_per_zspage: the number of 0-order pages to make a zspage
+We assign a zspage to ZS_ALMOST_EMPTY fullness group when n <= N / f, where
-We assign a zspage to ZS_ALMOST_EMPTY fullness group when:
- n <= N / f, where
-n = number of allocated objects
-N = total number of objects zspage can store
-f = fullness_threshold_frac(ie, 4 at the moment)
+* n = number of allocated objects
+* N = total number of objects zspage can store
+* f = fullness_threshold_frac(ie, 4 at the moment)
Similarly, we assign zspage to:
- ZS_ALMOST_FULL when n > N / f
- ZS_EMPTY when n == 0
- ZS_FULL when n == N
+
+* ZS_ALMOST_FULL when n > N / f
+* ZS_EMPTY when n == 0
+* ZS_FULL when n == N
diff --git a/Documentation/vm/zswap.txt b/Documentation/vm/zswap.rst
index 0b3a1148f9f0..1444ecd40911 100644
--- a/Documentation/vm/zswap.txt
+++ b/Documentation/vm/zswap.rst
@@ -1,4 +1,11 @@
-Overview:
+.. _zswap:
+
+=====
+zswap
+=====
+
+Overview
+========
Zswap is a lightweight compressed cache for swap pages. It takes pages that are
in the process of being swapped out and attempts to compress them into a
@@ -7,32 +14,34 @@ for potentially reduced swap I/O.  This trade-off can also result in a
significant performance improvement if reads from the compressed cache are
faster than reads from a swap device.
-NOTE: Zswap is a new feature as of v3.11 and interacts heavily with memory
-reclaim. This interaction has not been fully explored on the large set of
-potential configurations and workloads that exist. For this reason, zswap
-is a work in progress and should be considered experimental.
+.. note::
+ Zswap is a new feature as of v3.11 and interacts heavily with memory
+ reclaim. This interaction has not been fully explored on the large set of
+ potential configurations and workloads that exist. For this reason, zswap
+ is a work in progress and should be considered experimental.
+
+ Some potential benefits:
-Some potential benefits:
* Desktop/laptop users with limited RAM capacities can mitigate the
-    performance impact of swapping.
+ performance impact of swapping.
* Overcommitted guests that share a common I/O resource can
-    dramatically reduce their swap I/O pressure, avoiding heavy handed I/O
- throttling by the hypervisor. This allows more work to get done with less
- impact to the guest workload and guests sharing the I/O subsystem
+ dramatically reduce their swap I/O pressure, avoiding heavy handed I/O
+ throttling by the hypervisor. This allows more work to get done with less
+ impact to the guest workload and guests sharing the I/O subsystem
* Users with SSDs as swap devices can extend the life of the device by
-    drastically reducing life-shortening writes.
+ drastically reducing life-shortening writes.
Zswap evicts pages from compressed cache on an LRU basis to the backing swap
device when the compressed pool reaches its size limit. This requirement had
been identified in prior community discussions.
Zswap is disabled by default but can be enabled at boot time by setting
-the "enabled" attribute to 1 at boot time. ie: zswap.enabled=1. Zswap
+the ``enabled`` attribute to 1 at boot time. ie: ``zswap.enabled=1``. Zswap
can also be enabled and disabled at runtime using the sysfs interface.
An example command to enable zswap at runtime, assuming sysfs is mounted
-at /sys, is:
+at ``/sys``, is::
-echo 1 > /sys/module/zswap/parameters/enabled
+ echo 1 > /sys/module/zswap/parameters/enabled
When zswap is disabled at runtime it will stop storing pages that are
being swapped out. However, it will _not_ immediately write out or fault
@@ -43,7 +52,8 @@ pages out of the compressed pool, a swapoff on the swap device(s) will
fault back into memory all swapped out pages, including those in the
compressed pool.
-Design:
+Design
+======
Zswap receives pages for compression through the Frontswap API and is able to
evict pages from its own compressed pool on an LRU basis and write them back to
@@ -53,12 +63,12 @@ Zswap makes use of zpool for the managing the compressed memory pool. Each
allocation in zpool is not directly accessible by address. Rather, a handle is
returned by the allocation routine and that handle must be mapped before being
accessed. The compressed memory pool grows on demand and shrinks as compressed
-pages are freed. The pool is not preallocated. By default, a zpool of type
-zbud is created, but it can be selected at boot time by setting the "zpool"
-attribute, e.g. zswap.zpool=zbud. It can also be changed at runtime using the
-sysfs "zpool" attribute, e.g.
+pages are freed. The pool is not preallocated. By default, a zpool
+of type zbud is created, but it can be selected at boot time by
+setting the ``zpool`` attribute, e.g. ``zswap.zpool=zbud``. It can
+also be changed at runtime using the sysfs ``zpool`` attribute, e.g.::
-echo zbud > /sys/module/zswap/parameters/zpool
+ echo zbud > /sys/module/zswap/parameters/zpool
The zbud type zpool allocates exactly 1 page to store 2 compressed pages, which
means the compression ratio will always be 2:1 or worse (because of half-full
@@ -83,14 +93,16 @@ via frontswap, to free the compressed entry.
Zswap seeks to be simple in its policies. Sysfs attributes allow for one user
controlled policy:
+
* max_pool_percent - The maximum percentage of memory that the compressed
- pool can occupy.
+ pool can occupy.
-The default compressor is lzo, but it can be selected at boot time by setting
-the “compressor†attribute, e.g. zswap.compressor=lzo. It can also be changed
-at runtime using the sysfs "compressor" attribute, e.g.
+The default compressor is lzo, but it can be selected at boot time by
+setting the ``compressor`` attribute, e.g. ``zswap.compressor=lzo``.
+It can also be changed at runtime using the sysfs "compressor"
+attribute, e.g.::
-echo lzo > /sys/module/zswap/parameters/compressor
+ echo lzo > /sys/module/zswap/parameters/compressor
When the zpool and/or compressor parameter is changed at runtime, any existing
compressed pages are not modified; they are left in their own zpool. When a
@@ -106,11 +118,12 @@ compressed length of the page is set to zero and the pattern or same-filled
value is stored.
Same-value filled pages identification feature is enabled by default and can be
-disabled at boot time by setting the "same_filled_pages_enabled" attribute to 0,
-e.g. zswap.same_filled_pages_enabled=0. It can also be enabled and disabled at
-runtime using the sysfs "same_filled_pages_enabled" attribute, e.g.
+disabled at boot time by setting the ``same_filled_pages_enabled`` attribute
+to 0, e.g. ``zswap.same_filled_pages_enabled=0``. It can also be enabled and
+disabled at runtime using the sysfs ``same_filled_pages_enabled``
+attribute, e.g.::
-echo 1 > /sys/module/zswap/parameters/same_filled_pages_enabled
+ echo 1 > /sys/module/zswap/parameters/same_filled_pages_enabled
When zswap same-filled page identification is disabled at runtime, it will stop
checking for the same-value filled pages during store operation. However, the
diff --git a/Documentation/x86/x86_64/boot-options.txt b/Documentation/x86/x86_64/boot-options.txt
index b297c48389b9..8d109ef67ab6 100644
--- a/Documentation/x86/x86_64/boot-options.txt
+++ b/Documentation/x86/x86_64/boot-options.txt
@@ -187,9 +187,9 @@ PCI
IOMMU (input/output memory management unit)
- Currently four x86-64 PCI-DMA mapping implementations exist:
+ Multiple x86-64 PCI-DMA mapping implementations exist, for example:
- 1. <arch/x86_64/kernel/pci-nommu.c>: use no hardware/software IOMMU at all
+ 1. <lib/dma-direct.c>: use no hardware/software IOMMU at all
(e.g. because you have < 3 GB memory).
Kernel boot message: "PCI-DMA: Disabling IOMMU"
@@ -208,7 +208,7 @@ IOMMU (input/output memory management unit)
Kernel boot message: "PCI-DMA: Using Calgary IOMMU"
iommu=[<size>][,noagp][,off][,force][,noforce][,leak[=<nr_of_leak_pages>]
- [,memaper[=<order>]][,merge][,forcesac][,fullflush][,nomerge]
+ [,memaper[=<order>]][,merge][,fullflush][,nomerge]
[,noaperture][,calgary]
General iommu options:
@@ -235,14 +235,7 @@ IOMMU (input/output memory management unit)
(experimental).
nomerge Don't do scatter-gather (SG) merging.
noaperture Ask the IOMMU not to touch the aperture for AGP.
- forcesac Force single-address cycle (SAC) mode for masks <40bits
- (experimental).
noagp Don't initialize the AGP driver and use full aperture.
- allowdac Allow double-address cycle (DAC) mode, i.e. DMA >4GB.
- DAC is used with 32-bit PCI to push a 64-bit address in
- two cycles. When off all DMA over >4GB is forced through
- an IOMMU or software bounce buffering.
- nodac Forbid DAC mode, i.e. DMA >4GB.
panic Always panic when IOMMU overflows.
calgary Use the Calgary IOMMU if it is available
diff --git a/LICENSES/exceptions/Linux-syscall-note b/LICENSES/exceptions/Linux-syscall-note
index 6b60b61be4e9..9abdad71fafd 100644
--- a/LICENSES/exceptions/Linux-syscall-note
+++ b/LICENSES/exceptions/Linux-syscall-note
@@ -1,6 +1,6 @@
SPDX-Exception-Identifier: Linux-syscall-note
SPDX-URL: https://spdx.org/licenses/Linux-syscall-note.html
-SPDX-Licenses: GPL-2.0, GPL-2.0+, GPL-1.0+, LGPL-2.0, LGPL-2.0+, LGPL-2.1, LGPL-2.1+
+SPDX-Licenses: GPL-2.0, GPL-2.0+, GPL-1.0+, LGPL-2.0, LGPL-2.0+, LGPL-2.1, LGPL-2.1+, GPL-2.0-only, GPL-2.0-or-later
Usage-Guide:
This exception is used together with one of the above SPDX-Licenses
to mark user space API (uapi) header files so they can be included
diff --git a/LICENSES/other/Apache-2.0 b/LICENSES/other/Apache-2.0
new file mode 100644
index 000000000000..7cd903f573e5
--- /dev/null
+++ b/LICENSES/other/Apache-2.0
@@ -0,0 +1,183 @@
+Valid-License-Identifier: Apache-2.0
+SPDX-URL: https://spdx.org/licenses/Apache-2.0.html
+Usage-Guide:
+ To use the Apache License version 2.0 put the following SPDX tag/value
+ pair into a comment according to the placement guidelines in the
+ licensing rules documentation:
+ SPDX-License-Identifier: Apache-2.0
+License-Text:
+
+Apache License
+
+Version 2.0, January 2004
+
+http://www.apache.org/licenses/
+
+TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+1. Definitions.
+
+"License" shall mean the terms and conditions for use, reproduction, and
+distribution as defined by Sections 1 through 9 of this document.
+
+"Licensor" shall mean the copyright owner or entity authorized by the
+copyright owner that is granting the License.
+
+"Legal Entity" shall mean the union of the acting entity and all other
+entities that control, are controlled by, or are under common control with
+that entity. For the purposes of this definition, "control" means (i) the
+power, direct or indirect, to cause the direction or management of such
+entity, whether by contract or otherwise, or (ii) ownership of fifty
+percent (50%) or more of the outstanding shares, or (iii) beneficial
+ownership of such entity.
+
+"You" (or "Your") shall mean an individual or Legal Entity exercising
+permissions granted by this License.
+
+"Source" form shall mean the preferred form for making modifications,
+including but not limited to software source code, documentation source,
+and configuration files.
+
+"Object" form shall mean any form resulting from mechanical transformation
+or translation of a Source form, including but not limited to compiled
+object code, generated documentation, and conversions to other media types.
+
+"Work" shall mean the work of authorship, whether in Source or Object form,
+made available under the License, as indicated by a copyright notice that
+is included in or attached to the work (an example is provided in the
+Appendix below).
+
+"Derivative Works" shall mean any work, whether in Source or Object form,
+that is based on (or derived from) the Work and for which the editorial
+revisions, annotations, elaborations, or other modifications represent, as
+a whole, an original work of authorship. For the purposes of this License,
+Derivative Works shall not include works that remain separable from, or
+merely link (or bind by name) to the interfaces of, the Work and Derivative
+Works thereof.
+
+"Contribution" shall mean any work of authorship, including the original
+version of the Work and any modifications or additions to that Work or
+Derivative Works thereof, that is intentionally submitted to Licensor for
+inclusion in the Work by the copyright owner or by an individual or Legal
+Entity authorized to submit on behalf of the copyright owner. For the
+purposes of this definition, "submitted" means any form of electronic,
+verbal, or written communication sent to the Licensor or its
+representatives, including but not limited to communication on electronic
+mailing lists, source code control systems, and issue tracking systems that
+are managed by, or on behalf of, the Licensor for the purpose of discussing
+and improving the Work, but excluding communication that is conspicuously
+marked or otherwise designated in writing by the copyright owner as "Not a
+Contribution."
+
+"Contributor" shall mean Licensor and any individual or Legal Entity on
+behalf of whom a Contribution has been received by Licensor and
+subsequently incorporated within the Work.
+
+2. Grant of Copyright License. Subject to the terms and conditions of this
+ License, each Contributor hereby grants to You a perpetual, worldwide,
+ non-exclusive, no-charge, royalty-free, irrevocable copyright license to
+ reproduce, prepare Derivative Works of, publicly display, publicly
+ perform, sublicense, and distribute the Work and such Derivative Works
+ in Source or Object form.
+
+3. Grant of Patent License. Subject to the terms and conditions of this
+ License, each Contributor hereby grants to You a perpetual, worldwide,
+ non-exclusive, no-charge, royalty-free, irrevocable (except as stated in
+ this section) patent license to make, have made, use, offer to sell,
+ sell, import, and otherwise transfer the Work, where such license
+ applies only to those patent claims licensable by such Contributor that
+ are necessarily infringed by their Contribution(s) alone or by
+ combination of their Contribution(s) with the Work to which such
+ Contribution(s) was submitted. If You institute patent litigation
+ against any entity (including a cross-claim or counterclaim in a
+ lawsuit) alleging that the Work or a Contribution incorporated within
+ the Work constitutes direct or contributory patent infringement, then
+ any patent licenses granted to You under this License for that Work
+ shall terminate as of the date such litigation is filed.
+
+4. Redistribution. You may reproduce and distribute copies of the Work or
+ Derivative Works thereof in any medium, with or without modifications,
+ and in Source or Object form, provided that You meet the following
+ conditions:
+
+ a. You must give any other recipients of the Work or Derivative Works a
+ copy of this License; and
+
+ b. You must cause any modified files to carry prominent notices stating
+ that You changed the files; and
+
+ c. You must retain, in the Source form of any Derivative Works that You
+ distribute, all copyright, patent, trademark, and attribution notices
+ from the Source form of the Work, excluding those notices that do not
+ pertain to any part of the Derivative Works; and
+
+ d. If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained within
+ such NOTICE file, excluding those notices that do not pertain to any
+ part of the Derivative Works, in at least one of the following
+ places: within a NOTICE text file distributed as part of the
+ Derivative Works; within the Source form or documentation, if
+ provided along with the Derivative Works; or, within a display
+ generated by the Derivative Works, if and wherever such third-party
+ notices normally appear. The contents of the NOTICE file are for
+ informational purposes only and do not modify the License. You may
+ add Your own attribution notices within Derivative Works that You
+ distribute, alongside or as an addendum to the NOTICE text from the
+ Work, provided that such additional attribution notices cannot be
+ construed as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and may
+ provide additional or different license terms and conditions for use,
+ reproduction, or distribution of Your modifications, or for any such
+ Derivative Works as a whole, provided Your use, reproduction, and
+ distribution of the Work otherwise complies with the conditions stated
+ in this License.
+
+5. Submission of Contributions. Unless You explicitly state otherwise, any
+ Contribution intentionally submitted for inclusion in the Work by You to
+ the Licensor shall be under the terms and conditions of this License,
+ without any additional terms or conditions. Notwithstanding the above,
+ nothing herein shall supersede or modify the terms of any separate
+ license agreement you may have executed with Licensor regarding such
+ Contributions.
+
+6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+7. Disclaimer of Warranty. Unless required by applicable law or agreed to
+ in writing, Licensor provides the Work (and each Contributor provides
+ its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+ OF ANY KIND, either express or implied, including, without limitation,
+ any warranties or conditions of TITLE, NON-INFRINGEMENT,
+ MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely
+ responsible for determining the appropriateness of using or
+ redistributing the Work and assume any risks associated with Your
+ exercise of permissions under this License.
+
+8. Limitation of Liability. In no event and under no legal theory, whether
+ in tort (including negligence), contract, or otherwise, unless required
+ by applicable law (such as deliberate and grossly negligent acts) or
+ agreed to in writing, shall any Contributor be liable to You for
+ damages, including any direct, indirect, special, incidental, or
+ consequential damages of any character arising as a result of this
+ License or out of the use or inability to use the Work (including but
+ not limited to damages for loss of goodwill, work stoppage, computer
+ failure or malfunction, or any and all other commercial damages or
+ losses), even if such Contributor has been advised of the possibility of
+ such damages.
+
+9. Accepting Warranty or Additional Liability. While redistributing the
+ Work or Derivative Works thereof, You may choose to offer, and charge a
+ fee for, acceptance of support, warranty, indemnity, or other liability
+ obligations and/or rights consistent with this License. However, in
+ accepting such obligations, You may act only on Your own behalf and on
+ Your sole responsibility, not on behalf of any other Contributor, and
+ only if You agree to indemnify, defend, and hold each Contributor
+ harmless for any liability incurred by, or claims asserted against, such
+ Contributor by reason of your accepting any such warranty or additional
+ liability.
+
+END OF TERMS AND CONDITIONS
diff --git a/LICENSES/other/CC-BY-SA-4.0 b/LICENSES/other/CC-BY-SA-4.0
new file mode 100644
index 000000000000..f9158e831e79
--- /dev/null
+++ b/LICENSES/other/CC-BY-SA-4.0
@@ -0,0 +1,397 @@
+Valid-License-Identifier: CC-BY-SA-4.0
+SPDX-URL: https://spdx.org/licenses/CC-BY-SA-4.0
+Usage-Guide:
+ To use the Creative Commons Attribution Share Alike 4.0 International
+ license put the following SPDX tag/value pair into a comment according to
+ the placement guidelines in the licensing rules documentation:
+ SPDX-License-Identifier: CC-BY-SA-4.0
+License-Text:
+
+Creative Commons Attribution-ShareAlike 4.0 International
+
+Creative Commons Corporation ("Creative Commons") is not a law firm and
+does not provide legal services or legal advice. Distribution of Creative
+Commons public licenses does not create a lawyer-client or other
+relationship. Creative Commons makes its licenses and related information
+available on an "as-is" basis. Creative Commons gives no warranties
+regarding its licenses, any material licensed under their terms and
+conditions, or any related information. Creative Commons disclaims all
+liability for damages resulting from their use to the fullest extent
+possible.
+
+Using Creative Commons Public Licenses
+
+Creative Commons public licenses provide a standard set of terms and
+conditions that creators and other rights holders may use to share original
+works of authorship and other material subject to copyright and certain
+other rights specified in the public license below. The following
+considerations are for informational purposes only, are not exhaustive, and
+do not form part of our licenses.
+
+Considerations for licensors: Our public licenses are intended for use by
+those authorized to give the public permission to use material in ways
+otherwise restricted by copyright and certain other rights. Our licenses
+are irrevocable. Licensors should read and understand the terms and
+conditions of the license they choose before applying it. Licensors should
+also secure all rights necessary before applying our licenses so that the
+public can reuse the material as expected. Licensors should clearly mark
+any material not subject to the license. This includes other CC-licensed
+material, or material used under an exception or limitation to
+copyright. More considerations for licensors :
+wiki.creativecommons.org/Considerations_for_licensors
+
+Considerations for the public: By using one of our public licenses, a
+licensor grants the public permission to use the licensed material under
+specified terms and conditions. If the licensor's permission is not
+necessary for any reason - for example, because of any applicable exception
+or limitation to copyright - then that use is not regulated by the
+license. Our licenses grant only permissions under copyright and certain
+other rights that a licensor has authority to grant. Use of the licensed
+material may still be restricted for other reasons, including because
+others have copyright or other rights in the material. A licensor may make
+special requests, such as asking that all changes be marked or described.
+
+Although not required by our licenses, you are encouraged to respect those
+requests where reasonable. More considerations for the public :
+wiki.creativecommons.org/Considerations_for_licensees
+
+Creative Commons Attribution-ShareAlike 4.0 International Public License
+
+By exercising the Licensed Rights (defined below), You accept and agree to
+be bound by the terms and conditions of this Creative Commons
+Attribution-ShareAlike 4.0 International Public License ("Public
+License"). To the extent this Public License may be interpreted as a
+contract, You are granted the Licensed Rights in consideration of Your
+acceptance of these terms and conditions, and the Licensor grants You such
+rights in consideration of benefits the Licensor receives from making the
+Licensed Material available under these terms and conditions.
+
+Section 1 - Definitions.
+
+ a. Adapted Material means material subject to Copyright and Similar
+ Rights that is derived from or based upon the Licensed Material and
+ in which the Licensed Material is translated, altered, arranged,
+ transformed, or otherwise modified in a manner requiring permission
+ under the Copyright and Similar Rights held by the Licensor. For
+ purposes of this Public License, where the Licensed Material is a
+ musical work, performance, or sound recording, Adapted Material is
+ always produced where the Licensed Material is synched in timed
+ relation with a moving image.
+
+ b. Adapter's License means the license You apply to Your Copyright and
+ Similar Rights in Your contributions to Adapted Material in
+ accordance with the terms and conditions of this Public License.
+
+ c. BY-SA Compatible License means a license listed at
+ creativecommons.org/compatiblelicenses, approved by Creative Commons
+ as essentially the equivalent of this Public License.
+
+ d. Copyright and Similar Rights means copyright and/or similar rights
+ closely related to copyright including, without limitation,
+ performance, broadcast, sound recording, and Sui Generis Database
+ Rights, without regard to how the rights are labeled or
+ categorized. For purposes of this Public License, the rights
+ specified in Section 2(b)(1)-(2) are not Copyright and Similar
+ Rights.
+
+ e. Effective Technological Measures means those measures that, in the
+ absence of proper authority, may not be circumvented under laws
+ fulfilling obligations under Article 11 of the WIPO Copyright Treaty
+ adopted on December 20, 1996, and/or similar international
+ agreements.
+
+ f. Exceptions and Limitations means fair use, fair dealing, and/or any
+ other exception or limitation to Copyright and Similar Rights that
+ applies to Your use of the Licensed Material.
+
+ g. License Elements means the license attributes listed in the name of
+ a Creative Commons Public License. The License Elements of this
+ Public License are Attribution and ShareAlike.
+
+ h. Licensed Material means the artistic or literary work, database, or
+ other material to which the Licensor applied this Public License.
+
+ i. Licensed Rights means the rights granted to You subject to the terms
+ and conditions of this Public License, which are limited to all
+ Copyright and Similar Rights that apply to Your use of the Licensed
+ Material and that the Licensor has authority to license.
+
+ j. Licensor means the individual(s) or entity(ies) granting rights
+ under this Public License.
+
+ k. Share means to provide material to the public by any means or
+ process that requires permission under the Licensed Rights, such as
+ reproduction, public display, public performance, distribution,
+ dissemination, communication, or importation, and to make material
+ available to the public including in ways that members of the public
+ may access the material from a place and at a time individually
+ chosen by them.
+
+ l. Sui Generis Database Rights means rights other than copyright
+ resulting from Directive 96/9/EC of the European Parliament and of
+ the Council of 11 March 1996 on the legal protection of databases,
+ as amended and/or succeeded, as well as other essentially equivalent
+ rights anywhere in the world. m. You means the individual or entity
+ exercising the Licensed Rights under this Public License. Your has a
+ corresponding meaning.
+
+Section 2 - Scope.
+
+ a. License grant.
+
+ 1. Subject to the terms and conditions of this Public License, the
+ Licensor hereby grants You a worldwide, royalty-free,
+ non-sublicensable, non-exclusive, irrevocable license to
+ exercise the Licensed Rights in the Licensed Material to:
+
+ A. reproduce and Share the Licensed Material, in whole or in part; and
+
+ B. produce, reproduce, and Share Adapted Material.
+
+ 2. Exceptions and Limitations. For the avoidance of doubt, where
+ Exceptions and Limitations apply to Your use, this Public
+ License does not apply, and You do not need to comply with its
+ terms and conditions.
+
+ 3. Term. The term of this Public License is specified in Section 6(a).
+
+ 4. Media and formats; technical modifications allowed. The Licensor
+ authorizes You to exercise the Licensed Rights in all media and
+ formats whether now known or hereafter created, and to make
+ technical modifications necessary to do so. The Licensor waives
+ and/or agrees not to assert any right or authority to forbid You
+ from making technical modifications necessary to exercise the
+ Licensed Rights, including technical modifications necessary to
+ circumvent Effective Technological Measures. For purposes of
+ this Public License, simply making modifications authorized by
+ this Section 2(a)(4) never produces Adapted Material.
+
+ 5. Downstream recipients.
+
+ A. Offer from the Licensor - Licensed Material. Every recipient
+ of the Licensed Material automatically receives an offer
+ from the Licensor to exercise the Licensed Rights under the
+ terms and conditions of this Public License.
+
+ B. Additional offer from the Licensor - Adapted Material. Every
+ recipient of Adapted Material from You automatically
+ receives an offer from the Licensor to exercise the Licensed
+ Rights in the Adapted Material under the conditions of the
+ Adapter's License You apply.
+
+ C. No downstream restrictions. You may not offer or impose any
+ additional or different terms or conditions on, or apply any
+ Effective Technological Measures to, the Licensed Material
+ if doing so restricts exercise of the Licensed Rights by any
+ recipient of the Licensed Material.
+
+ 6. No endorsement. Nothing in this Public License constitutes or
+ may be construed as permission to assert or imply that You are,
+ or that Your use of the Licensed Material is, connected with, or
+ sponsored, endorsed, or granted official status by, the Licensor
+ or others designated to receive attribution as provided in
+ Section 3(a)(1)(A)(i).
+
+ b. Other rights.
+
+ 1. Moral rights, such as the right of integrity, are not licensed
+ under this Public License, nor are publicity, privacy, and/or
+ other similar personality rights; however, to the extent
+ possible, the Licensor waives and/or agrees not to assert any
+ such rights held by the Licensor to the limited extent necessary
+ to allow You to exercise the Licensed Rights, but not otherwise.
+
+ 2. Patent and trademark rights are not licensed under this Public
+ License.
+
+ 3. To the extent possible, the Licensor waives any right to collect
+ royalties from You for the exercise of the Licensed Rights,
+ whether directly or through a collecting society under any
+ voluntary or waivable statutory or compulsory licensing
+ scheme. In all other cases the Licensor expressly reserves any
+ right to collect such royalties.
+
+Section 3 - License Conditions.
+
+Your exercise of the Licensed Rights is expressly made subject to the
+following conditions.
+
+ a. Attribution.
+
+ 1. If You Share the Licensed Material (including in modified form),
+ You must:
+
+ A. retain the following if it is supplied by the Licensor with
+ the Licensed Material:
+
+ i. identification of the creator(s) of the Licensed
+ Material and any others designated to receive
+ attribution, in any reasonable manner requested by the
+ Licensor (including by pseudonym if designated);
+
+ ii. a copyright notice;
+
+ iii. a notice that refers to this Public License;
+
+ iv. a notice that refers to the disclaimer of warranties;
+
+ v. a URI or hyperlink to the Licensed Material to the extent reasonably practicable;
+
+ B. indicate if You modified the Licensed Material and retain an
+ indication of any previous modifications; and
+
+ C. indicate the Licensed Material is licensed under this Public
+ License, and include the text of, or the URI or hyperlink to,
+ this Public License.
+
+ 2. You may satisfy the conditions in Section 3(a)(1) in any
+ reasonable manner based on the medium, means, and context in
+ which You Share the Licensed Material. For example, it may be
+ reasonable to satisfy the conditions by providing a URI or
+ hyperlink to a resource that includes the required information.
+
+ 3. If requested by the Licensor, You must remove any of the
+ information required by Section 3(a)(1)(A) to the extent
+ reasonably practicable. b. ShareAlike.In addition to the
+ conditions in Section 3(a), if You Share Adapted Material You
+ produce, the following conditions also apply.
+
+ 1. The Adapter's License You apply must be a Creative Commons
+ license with the same License Elements, this version or
+ later, or a BY-SA Compatible License.
+
+ 2. You must include the text of, or the URI or hyperlink to, the
+ Adapter's License You apply. You may satisfy this condition
+ in any reasonable manner based on the medium, means, and
+ context in which You Share Adapted Material.
+
+ 3. You may not offer or impose any additional or different terms
+ or conditions on, or apply any Effective Technological
+ Measures to, Adapted Material that restrict exercise of the
+ rights granted under the Adapter's License You apply.
+
+Section 4 - Sui Generis Database Rights.
+
+Where the Licensed Rights include Sui Generis Database Rights that apply to
+Your use of the Licensed Material:
+
+ a. for the avoidance of doubt, Section 2(a)(1) grants You the right to
+ extract, reuse, reproduce, and Share all or a substantial portion of
+ the contents of the database;
+
+ b. if You include all or a substantial portion of the database contents
+ in a database in which You have Sui Generis Database Rights, then
+ the database in which You have Sui Generis Database Rights (but not
+ its individual contents) is Adapted Material, including for purposes
+ of Section 3(b); and
+
+ c. You must comply with the conditions in Section 3(a) if You Share all
+ or a substantial portion of the contents of the database.
+
+ For the avoidance of doubt, this Section 4 supplements and does not
+ replace Your obligations under this Public License where the Licensed
+ Rights include other Copyright and Similar Rights.
+
+Section 5 - Disclaimer of Warranties and Limitation of Liability.
+
+ a. Unless otherwise separately undertaken by the Licensor, to the
+ extent possible, the Licensor offers the Licensed Material as-is and
+ as-available, and makes no representations or warranties of any kind
+ concerning the Licensed Material, whether express, implied,
+ statutory, or other. This includes, without limitation, warranties
+ of title, merchantability, fitness for a particular purpose,
+ non-infringement, absence of latent or other defects, accuracy, or
+ the presence or absence of errors, whether or not known or
+ discoverable. Where disclaimers of warranties are not allowed in
+ full or in part, this disclaimer may not apply to You.
+
+ b. To the extent possible, in no event will the Licensor be liable to
+ You on any legal theory (including, without limitation, negligence)
+ or otherwise for any direct, special, indirect, incidental,
+ consequential, punitive, exemplary, or other losses, costs,
+ expenses, or damages arising out of this Public License or use of
+ the Licensed Material, even if the Licensor has been advised of the
+ possibility of such losses, costs, expenses, or damages. Where a
+ limitation of liability is not allowed in full or in part, this
+ limitation may not apply to You.
+
+ c. The disclaimer of warranties and limitation of liability provided
+ above shall be interpreted in a manner that, to the extent possible,
+ most closely approximates an absolute disclaimer and waiver of all
+ liability.
+
+Section 6 - Term and Termination.
+
+ a. This Public License applies for the term of the Copyright and
+ Similar Rights licensed here. However, if You fail to comply with
+ this Public License, then Your rights under this Public License
+ terminate automatically.
+
+ b. Where Your right to use the Licensed Material has terminated under
+ Section 6(a), it reinstates:
+
+ 1. automatically as of the date the violation is cured, provided it
+ is cured within 30 days of Your discovery of the violation; or
+
+ 2. upon express reinstatement by the Licensor.
+
+ c. For the avoidance of doubt, this Section 6(b) does not affect any
+ right the Licensor may have to seek remedies for Your violations of
+ this Public License.
+
+ d. For the avoidance of doubt, the Licensor may also offer the Licensed
+ Material under separate terms or conditions or stop distributing the
+ Licensed Material at any time; however, doing so will not terminate
+ this Public License.
+
+ e. Sections 1, 5, 6, 7, and 8 survive termination of this Public License.
+
+Section 7 - Other Terms and Conditions.
+
+ a. The Licensor shall not be bound by any additional or different terms
+ or conditions communicated by You unless expressly agreed.
+
+ b. Any arrangements, understandings, or agreements regarding the
+ Licensed Material not stated herein are separate from and
+ independent of the terms and conditions of this Public License.
+
+Section 8 - Interpretation.
+
+ a. For the avoidance of doubt, this Public License does not, and shall
+ not be interpreted to, reduce, limit, restrict, or impose conditions
+ on any use of the Licensed Material that could lawfully be made
+ without permission under this Public License.
+
+ b. To the extent possible, if any provision of this Public License is
+ deemed unenforceable, it shall be automatically reformed to the
+ minimum extent necessary to make it enforceable. If the provision
+ cannot be reformed, it shall be severed from this Public License
+ without affecting the enforceability of the remaining terms and
+ conditions.
+
+ c. No term or condition of this Public License will be waived and no
+ failure to comply consented to unless expressly agreed to by the
+ Licensor.
+
+ d. Nothing in this Public License constitutes or may be interpreted as
+ a limitation upon, or waiver of, any privileges and immunities that
+ apply to the Licensor or You, including from the legal processes of
+ any jurisdiction or authority.
+
+Creative Commons is not a party to its public licenses. Notwithstanding,
+Creative Commons may elect to apply one of its public licenses to material
+it publishes and in those instances will be considered the "Licensor." The
+text of the Creative Commons public licenses is dedicated to the public
+domain under the CC0 Public Domain Dedication. Except for the limited
+purpose of indicating that material is shared under a Creative Commons
+public license or as otherwise permitted by the Creative Commons policies
+published at creativecommons.org/policies, Creative Commons does not
+authorize the use of the trademark "Creative Commons" or any other
+trademark or logo of Creative Commons without its prior written consent
+including, without limitation, in connection with any unauthorized
+modifications to any of its public licenses or any other arrangements,
+understandings, or agreements concerning use of licensed material. For the
+avoidance of doubt, this paragraph does not form part of the public
+licenses.
+
+Creative Commons may be contacted at creativecommons.org.
diff --git a/LICENSES/other/CDDL-1.0 b/LICENSES/other/CDDL-1.0
new file mode 100644
index 000000000000..195a1687930a
--- /dev/null
+++ b/LICENSES/other/CDDL-1.0
@@ -0,0 +1,364 @@
+Valid-License-Identifier: CDDL-1.0
+SPDX-URL: https://spdx.org/licenses/CDDL-1.0.html
+Usage-Guide:
+ To use the Common Development and Distribution License 1.0 put the
+ following SPDX tag/value pair into a comment according to the placement
+ guidelines in the licensing rules documentation:
+ SPDX-License-Identifier: CDDL-1.0
+
+License-Text:
+
+COMMON DEVELOPMENT AND DISTRIBUTION LICENSE (CDDL)
+Version 1.0
+
+ 1. Definitions.
+
+ 1.1. "Contributor" means each individual or entity that creates or
+ contributes to the creation of Modifications.
+
+ 1.2. "Contributor Version" means the combination of the Original
+ Software, prior Modifications used by a Contributor (if any),
+ and the Modifications made by that particular Contributor.
+
+ 1.3. "Covered Software" means (a) the Original Software, or (b)
+ Modifications, or (c) the combination of files containing
+ Original Software with files containing Modifications, in each
+ case including portions thereof.
+
+ 1.4. "Executable" means the Covered Software in any form other than
+ Source Code.
+
+ 1.5. "Initial Developer" means the individual or entity that first
+ makes Original Software available under this License.
+
+ 1.6. "Larger Work" means a work which combines Covered Software or
+ portions thereof with code not governed by the terms of this
+ License.
+
+ 1.7. "License" means this document.
+
+ 1.8. "Licensable" means having the right to grant, to the maximum
+ extent possible, whether at the time of the initial grant or
+ subsequently acquired, any and all of the rights conveyed herein.
+
+ 1.9. "Modifications" means the Source Code and Executable form of
+ any of the following:
+
+ A. Any file that results from an addition to, deletion from or
+ modification of the contents of a file containing Original
+ Software or previous Modifications;
+
+ B. Any new file that contains any part of the Original Software
+ or previous Modification; or
+
+ C. Any new file that is contributed or otherwise made available
+ under the terms of this License.
+
+ 1.10. "Original Software" means the Source Code and Executable form
+ of computer software code that is originally released under
+ this License.
+
+ 1.11. "Patent Claims" means any patent claim(s), now owned or
+ hereafter acquired, including without limitation, method,
+ process, and apparatus claims, in any patent Licensable by
+ grantor.
+
+ 1.12. "Source Code" means (a) the common form of computer software
+ code in which modifications are made and (b) associated
+ documentation included in or with such code.
+
+ 1.13. "You" (or "Your") means an individual or a legal entity
+ exercising rights under, and complying with all of the terms
+ of, this License. For legal entities, "You" includes any
+ entity which controls, is controlled by, or is under common
+ control with You. For purposes of this definition, "control"
+ means (a) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract
+ or otherwise, or (b) ownership of more than fifty percent
+ (50%) of the outstanding shares or beneficial ownership of
+ such entity.
+
+ 2. License Grants.
+ 2.1. The Initial Developer Grant.
+
+ Conditioned upon Your compliance with Section 3.1 below and subject
+ to third party intellectual property claims, the Initial Developer
+ hereby grants You a world-wide, royalty-free, non-exclusive
+ license:
+
+ (a) under intellectual property rights (other than patent or
+ trademark) Licensable by Initial Developer, to use,
+ reproduce, modify, display, perform, sublicense and
+ distribute the Original Software (or portions thereof),
+ with or without Modifications, and/or as part of a Larger
+ Work; and
+
+ (b) under Patent Claims infringed by the making, using or
+ selling of Original Software, to make, have made, use,
+ practice, sell, and offer for sale, and/or otherwise
+ dispose of the Original Software (or portions thereof).
+
+ (c) The licenses granted in Sections 2.1(a) and (b) are
+ effective on the date Initial Developer first distributes
+ or otherwise makes the Original Software available to a
+ third party under the terms of this License.
+
+ (d) Notwithstanding Section 2.1(b) above, no patent license is
+ granted: (1) for code that You delete from the Original
+ Software, or (2) for infringements caused by: (i) the
+ modification of the Original Software, or (ii) the
+ combination of the Original Software with other software or
+ devices.
+
+ 2.2. Contributor Grant.
+
+ Conditioned upon Your compliance with Section 3.1 below and subject
+ to third party intellectual property claims, each Contributor
+ hereby grants You a world-wide, royalty-free, non-exclusive
+ license:
+
+ (a) under intellectual property rights (other than patent or
+ trademark) Licensable by Contributor to use, reproduce,
+ modify, display, perform, sublicense and distribute the
+ Modifications created by such Contributor (or portions
+ thereof), either on an unmodified basis, with other
+ Modifications, as Covered Software and/or as part of a
+ Larger Work; and
+
+ (b) under Patent Claims infringed by the making, using, or
+ selling of Modifications made by that Contributor either
+ alone and/or in combination with its Contributor Version
+ (or portions of such combination), to make, use, sell,
+ offer for sale, have made, and/or otherwise dispose of: (1)
+ Modifications made by that Contributor (or portions
+ thereof); and (2) the combination of Modifications made by
+ that Contributor with its Contributor Version (or portions
+ of such combination).
+
+ (c) The licenses granted in Sections 2.2(a) and 2.2(b) are
+ effective on the date Contributor first distributes or
+ otherwise makes the Modifications available to a third
+ party.
+
+ (d) Notwithstanding Section 2.2(b) above, no patent license is
+ granted: (1) for any code that Contributor has deleted from
+ the Contributor Version; (2) for infringements caused by:
+ (i) third party modifications of Contributor Version, or
+ (ii) the combination of Modifications made by that
+ Contributor with other software (except as part of the
+ Contributor Version) or other devices; or (3) under Patent
+ Claims infringed by Covered Software in the absence of
+ Modifications made by that Contributor.
+
+ 3. Distribution Obligations.
+ 3.1. Availability of Source Code.
+
+ Any Covered Software that You distribute or otherwise make
+ available in Executable form must also be made available in Source
+ Code form and that Source Code form must be distributed only under
+ the terms of this License. You must include a copy of this License
+ with every copy of the Source Code form of the Covered Software You
+ distribute or otherwise make available. You must inform recipients
+ of any such Covered Software in Executable form as to how they can
+ obtain such Covered Software in Source Code form in a reasonable
+ manner on or through a medium customarily used for software
+ exchange.
+
+ 3.2. Modifications.
+
+ The Modifications that You create or to which You contribute are
+ governed by the terms of this License. You represent that You
+ believe Your Modifications are Your original creation(s) and/or You
+ have sufficient rights to grant the rights conveyed by this
+ License.
+
+ 3.3. Required Notices.
+
+ You must include a notice in each of Your Modifications that
+ identifies You as the Contributor of the Modification. You may not
+ remove or alter any copyright, patent or trademark notices
+ contained within the Covered Software, or any notices of licensing
+ or any descriptive text giving attribution to any Contributor or
+ the Initial Developer.
+
+ 3.4. Application of Additional Terms.
+
+ You may not offer or impose any terms on any Covered Software in
+ Source Code form that alters or restricts the applicable version of
+ this License or the recipients' rights hereunder. You may choose to
+ offer, and to charge a fee for, warranty, support, indemnity or
+ liability obligations to one or more recipients of Covered
+ Software. However, you may do so only on Your own behalf, and not
+ on behalf of the Initial Developer or any Contributor. You must
+ make it absolutely clear that any such warranty, support, indemnity
+ or liability obligation is offered by You alone, and You hereby
+ agree to indemnify the Initial Developer and every Contributor for
+ any liability incurred by the Initial Developer or such Contributor
+ as a result of warranty, support, indemnity or liability terms You
+ offer.
+
+ 3.5. Distribution of Executable Versions.
+
+ You may distribute the Executable form of the Covered Software
+ under the terms of this License or under the terms of a license of
+ Your choice, which may contain terms different from this License,
+ provided that You are in compliance with the terms of this License
+ and that the license for the Executable form does not attempt to
+ limit or alter the recipient's rights in the Source Code form from
+ the rights set forth in this License. If You distribute the Covered
+ Software in Executable form under a different license, You must
+ make it absolutely clear that any terms which differ from this
+ License are offered by You alone, not by the Initial Developer or
+ Contributor. You hereby agree to indemnify the Initial Developer
+ and every Contributor for any liability incurred by the Initial
+ Developer or such Contributor as a result of any such terms You
+ offer.
+
+ 3.6. Larger Works.
+
+ You may create a Larger Work by combining Covered Software with
+ other code not governed by the terms of this License and distribute
+ the Larger Work as a single product. In such a case, You must make
+ sure the requirements of this License are fulfilled for the Covered
+ Software.
+
+ 4. Versions of the License.
+ 4.1. New Versions.
+
+ Sun Microsystems, Inc. is the initial license steward and may
+ publish revised and/or new versions of this License from time to
+ time. Each version will be given a distinguishing version
+ number. Except as provided in Section 4.3, no one other than the
+ license steward has the right to modify this License.
+
+ 4.2. Effect of New Versions.
+
+ You may always continue to use, distribute or otherwise make the
+ Covered Software available under the terms of the version of the
+ License under which You originally received the Covered
+ Software. If the Initial Developer includes a notice in the
+ Original Software prohibiting it from being distributed or
+ otherwise made available under any subsequent version of the
+ License, You must distribute and make the Covered Software
+ available under the terms of the version of the License under which
+ You originally received the Covered Software. Otherwise, You may
+ also choose to use, distribute or otherwise make the Covered
+ Software available under the terms of any subsequent version of the
+ License published by the license steward.
+
+ 4.3. Modified Versions.
+
+ When You are an Initial Developer and You want to create a new
+ license for Your Original Software, You may create and use a
+ modified version of this License if You: (a) rename the license and
+ remove any references to the name of the license steward (except to
+ note that the license differs from this License); and (b) otherwise
+ make it clear that the license contains terms which differ from
+ this License.
+
+ 5. DISCLAIMER OF WARRANTY.
+
+ COVERED SOFTWARE IS PROVIDED UNDER THIS LICENSE ON AN "AS IS" BASIS,
+ WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING,
+ WITHOUT LIMITATION, WARRANTIES THAT THE COVERED SOFTWARE IS FREE OF
+ DEFECTS, MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE OR
+ NON-INFRINGING. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF
+ THE COVERED SOFTWARE IS WITH YOU. SHOULD ANY COVERED SOFTWARE PROVE
+ DEFECTIVE IN ANY RESPECT, YOU (NOT THE INITIAL DEVELOPER OR ANY OTHER
+ CONTRIBUTOR) ASSUME THE COST OF ANY NECESSARY SERVICING, REPAIR OR
+ CORRECTION. THIS DISCLAIMER OF WARRANTY CONSTITUTES AN ESSENTIAL PART
+ OF THIS LICENSE. NO USE OF ANY COVERED SOFTWARE IS AUTHORIZED HEREUNDER
+ EXCEPT UNDER THIS DISCLAIMER.
+
+ 6. TERMINATION.
+
+ 6.1. This License and the rights granted hereunder will terminate
+ automatically if You fail to comply with terms herein and fail to
+ cure such breach within 30 days of becoming aware of the
+ breach. Provisions which, by their nature, must remain in effect
+ beyond the termination of this License shall survive.
+
+ 6.2. If You assert a patent infringement claim (excluding
+ declaratory judgment actions) against Initial Developer or a
+ Contributor (the Initial Developer or Contributor against whom You
+ assert such claim is referred to as "Participant") alleging that
+ the Participant Software (meaning the Contributor Version where the
+ Participant is a Contributor or the Original Software where the
+ Participant is the Initial Developer) directly or indirectly
+ infringes any patent, then any and all rights granted directly or
+ indirectly to You by such Participant, the Initial Developer (if
+ the Initial Developer is not the Participant) and all Contributors
+ under Sections 2.1 and/or 2.2 of this License shall, upon 60 days
+ notice from Participant terminate prospectively and automatically
+ at the expiration of such 60 day notice period, unless if within
+ such 60 day period You withdraw Your claim with respect to the
+ Participant Software against such Participant either unilaterally
+ or pursuant to a written agreement with Participant.
+
+ 6.3. In the event of termination under Sections 6.1 or 6.2 above,
+ all end user licenses that have been validly granted by You or any
+ distributor hereunder prior to termination (excluding licenses
+ granted to You by any distributor) shall survive termination.
+
+ 7. LIMITATION OF LIABILITY.
+
+ UNDER NO CIRCUMSTANCES AND UNDER NO LEGAL THEORY, WHETHER TORT
+ (INCLUDING NEGLIGENCE), CONTRACT, OR OTHERWISE, SHALL YOU, THE INITIAL
+ DEVELOPER, ANY OTHER CONTRIBUTOR, OR ANY DISTRIBUTOR OF COVERED
+ SOFTWARE, OR ANY SUPPLIER OF ANY OF SUCH PARTIES, BE LIABLE TO ANY
+ PERSON FOR ANY INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ OF ANY CHARACTER INCLUDING, WITHOUT LIMITATION, DAMAGES FOR LOST
+ PROFITS, LOSS OF GOODWILL, WORK STOPPAGE, COMPUTER FAILURE OR
+ MALFUNCTION, OR ANY AND ALL OTHER COMMERCIAL DAMAGES OR LOSSES, EVEN IF
+ SUCH PARTY SHALL HAVE BEEN INFORMED OF THE POSSIBILITY OF SUCH
+ DAMAGES. THIS LIMITATION OF LIABILITY SHALL NOT APPLY TO LIABILITY FOR
+ DEATH OR PERSONAL INJURY RESULTING FROM SUCH PARTY'S NEGLIGENCE TO THE
+ EXTENT APPLICABLE LAW PROHIBITS SUCH LIMITATION. SOME JURISDICTIONS DO
+ NOT ALLOW THE EXCLUSION OR LIMITATION OF INCIDENTAL OR CONSEQUENTIAL
+ DAMAGES, SO THIS EXCLUSION AND LIMITATION MAY NOT APPLY TO YOU.
+
+ 8. U.S. GOVERNMENT END USERS.
+
+ The Covered Software is a "commercial item," as that term is defined in
+ 48 C.F.R. 2.101 (Oct. 1995), consisting of "commercial computer
+ software" (as that term is defined at 48 C.F.R. $ 252.227-7014(a)(1))
+ and "commercial computer software documentation" as such terms are used
+ in 48 C.F.R. 12.212 (Sept. 1995). Consistent with 48 C.F.R. 12.212 and
+ 48 C.F.R. 227.7202-1 through 227.7202-4 (June 1995), all
+ U.S. Government End Users acquire Covered Software with only those
+ rights set forth herein. This U.S. Government Rights clause is in lieu
+ of, and supersedes, any other FAR, DFAR, or other clause or provision
+ that addresses Government rights in computer software under this
+ License.
+
+ 9. MISCELLANEOUS.
+
+ This License represents the complete agreement concerning subject
+ matter hereof. If any provision of this License is held to be
+ unenforceable, such provision shall be reformed only to the extent
+ necessary to make it enforceable. This License shall be governed by the
+ law of the jurisdiction specified in a notice contained within the
+ Original Software (except to the extent applicable law, if any,
+ provides otherwise), excluding such jurisdiction's conflict-of-law
+ provisions. Any litigation relating to this License shall be subject to
+ the jurisdiction of the courts located in the jurisdiction and venue
+ specified in a notice contained within the Original Software, with the
+ losing party responsible for costs, including, without limitation,
+ court costs and reasonable attorneys' fees and expenses. The
+ application of the United Nations Convention on Contracts for the
+ International Sale of Goods is expressly excluded. Any law or
+ regulation which provides that the language of a contract shall be
+ construed against the drafter shall not apply to this License. You
+ agree that You alone are responsible for compliance with the United
+ States export administration regulations (and the export control laws
+ and regulation of any other countries) when You use, distribute or
+ otherwise make available any Covered Software.
+
+ 10. RESPONSIBILITY FOR CLAIMS.
+
+ As between Initial Developer and the Contributors, each party is
+ responsible for claims and damages arising, directly or indirectly, out
+ of its utilization of rights under this License and You agree to work
+ with Initial Developer and Contributors to distribute such
+ responsibility on an equitable basis. Nothing herein is intended or
+ shall be deemed to constitute any admission of liability.
diff --git a/LICENSES/other/Linux-OpenIB b/LICENSES/other/Linux-OpenIB
new file mode 100644
index 000000000000..1ad85f6b3a89
--- /dev/null
+++ b/LICENSES/other/Linux-OpenIB
@@ -0,0 +1,26 @@
+Valid-License-Identifier: Linux-OpenIB
+SPDX-URL: https://spdx.org/licenses/Linux-OpenIB.html
+Usage-Guide:
+ To use the Linux Kernel Variant of OpenIB.org license put the following
+ SPDX tag/value pair into a comment according to the placement guidelines
+ in the licensing rules documentation:
+ SPDX-License-Identifier: Linux-OpenIB
+License-Text:
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.
diff --git a/LICENSES/other/X11 b/LICENSES/other/X11
new file mode 100644
index 000000000000..fe4353fd0000
--- /dev/null
+++ b/LICENSES/other/X11
@@ -0,0 +1,37 @@
+Valid-License-Identifier: X11
+SPDX-URL: https://spdx.org/licenses/X11.html
+Usage-Guide:
+ To use the X11 put the following SPDX tag/value pair into a comment
+ according to the placement guidelines in the licensing rules
+ documentation:
+ SPDX-License-Identifier: X11
+License-Text:
+
+
+X11 License
+
+Copyright (C) 1996 X Consortium
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+the rights to use, copy, modify, merge, publish, distribute, sublicense,
+and/or sell copies of the Software, and to permit persons to whom the
+Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+X CONSORTIUM BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+Except as contained in this notice, the name of the X Consortium shall not
+be used in advertising or otherwise to promote the sale, use or other
+dealings in this Software without prior written authorization from the X
+Consortium.
+
+X Window System is a trademark of X Consortium, Inc.
diff --git a/LICENSES/preferred/GPL-2.0 b/LICENSES/preferred/GPL-2.0
index b8db91d3a1cb..ff0812fd89cc 100644
--- a/LICENSES/preferred/GPL-2.0
+++ b/LICENSES/preferred/GPL-2.0
@@ -1,5 +1,7 @@
Valid-License-Identifier: GPL-2.0
+Valid-License-Identifier: GPL-2.0-only
Valid-License-Identifier: GPL-2.0+
+Valid-License-Identifier: GPL-2.0-or-later
SPDX-URL: https://spdx.org/licenses/GPL-2.0.html
Usage-Guide:
To use this license in source code, put one of the following SPDX
@@ -7,8 +9,12 @@ Usage-Guide:
guidelines in the licensing rules documentation.
For 'GNU General Public License (GPL) version 2 only' use:
SPDX-License-Identifier: GPL-2.0
+ or
+ SPDX-License-Identifier: GPL-2.0-only
For 'GNU General Public License (GPL) version 2 or any later version' use:
SPDX-License-Identifier: GPL-2.0+
+ or
+ SPDX-License-Identifier: GPL-2.0-or-later
License-Text:
GNU GENERAL PUBLIC LICENSE
diff --git a/MAINTAINERS b/MAINTAINERS
index 79bb02ff812f..aa635837a6af 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -137,9 +137,9 @@ Maintainers List (try to look for most precise areas first)
-----------------------------------
3C59X NETWORK DRIVER
-M: Steffen Klassert <klassert@mathematik.tu-chemnitz.de>
+M: Steffen Klassert <klassert@kernel.org>
L: netdev@vger.kernel.org
-S: Maintained
+S: Odd Fixes
F: Documentation/networking/vortex.txt
F: drivers/net/ethernet/3com/3c59x.c
@@ -2332,7 +2332,7 @@ F: drivers/gpio/gpio-ath79.c
F: Documentation/devicetree/bindings/gpio/gpio-ath79.txt
ATHEROS ATH GENERIC UTILITIES
-M: "Luis R. Rodriguez" <mcgrof@do-not-panic.com>
+M: Kalle Valo <kvalo@codeaurora.org>
L: linux-wireless@vger.kernel.org
S: Supported
F: drivers/net/wireless/ath/*
@@ -2347,7 +2347,7 @@ S: Maintained
F: drivers/net/wireless/ath/ath5k/
ATHEROS ATH6KL WIRELESS DRIVER
-M: Kalle Valo <kvalo@qca.qualcomm.com>
+M: Kalle Valo <kvalo@codeaurora.org>
L: linux-wireless@vger.kernel.org
W: http://wireless.kernel.org/en/users/Drivers/ath6kl
T: git git://git.kernel.org/pub/scm/linux/kernel/git/kvalo/ath.git
@@ -2554,7 +2554,6 @@ F: Documentation/devicetree/bindings/sound/axentia,*
F: sound/soc/atmel/tse850-pcm5142.c
AZ6007 DVB DRIVER
-M: Mauro Carvalho Chehab <mchehab@s-opensource.com>
M: Mauro Carvalho Chehab <mchehab@kernel.org>
L: linux-media@vger.kernel.org
W: https://linuxtv.org
@@ -3083,7 +3082,6 @@ F: include/linux/btrfs*
F: include/uapi/linux/btrfs*
BTTV VIDEO4LINUX DRIVER
-M: Mauro Carvalho Chehab <mchehab@s-opensource.com>
M: Mauro Carvalho Chehab <mchehab@kernel.org>
L: linux-media@vger.kernel.org
W: https://linuxtv.org
@@ -3693,7 +3691,6 @@ F: drivers/cpufreq/arm_big_little_dt.c
CPU POWER MONITORING SUBSYSTEM
M: Thomas Renninger <trenn@suse.com>
-M: Shuah Khan <shuahkh@osg.samsung.com>
M: Shuah Khan <shuah@kernel.org>
L: linux-pm@vger.kernel.org
S: Maintained
@@ -3812,7 +3809,6 @@ S: Maintained
F: drivers/media/dvb-frontends/cx24120*
CX88 VIDEO4LINUX DRIVER
-M: Mauro Carvalho Chehab <mchehab@s-opensource.com>
M: Mauro Carvalho Chehab <mchehab@kernel.org>
L: linux-media@vger.kernel.org
W: https://linuxtv.org
@@ -4313,7 +4309,7 @@ F: Documentation/driver-api/dma-buf.rst
T: git git://anongit.freedesktop.org/drm/drm-misc
DMA GENERIC OFFLOAD ENGINE SUBSYSTEM
-M: Vinod Koul <vinod.koul@intel.com>
+M: Vinod Koul <vkoul@kernel.org>
L: dmaengine@vger.kernel.org
Q: https://patchwork.kernel.org/project/linux-dmaengine/list/
S: Maintained
@@ -4334,12 +4330,14 @@ W: http://git.infradead.org/users/hch/dma-mapping.git
S: Supported
F: lib/dma-debug.c
F: lib/dma-direct.c
+F: lib/dma-noncoherent.c
F: lib/dma-virt.c
F: drivers/base/dma-mapping.c
F: drivers/base/dma-coherent.c
F: include/asm-generic/dma-mapping.h
F: include/linux/dma-direct.h
F: include/linux/dma-mapping.h
+F: include/linux/dma-noncoherent.h
DME1737 HARDWARE MONITOR DRIVER
M: Juerg Haefliger <juergh@gmail.com>
@@ -5053,7 +5051,6 @@ F: drivers/edac/thunderx_edac*
EDAC-CORE
M: Borislav Petkov <bp@alien8.de>
-M: Mauro Carvalho Chehab <mchehab@s-opensource.com>
M: Mauro Carvalho Chehab <mchehab@kernel.org>
L: linux-edac@vger.kernel.org
T: git git://git.kernel.org/pub/scm/linux/kernel/git/bp/bp.git for-next
@@ -5082,7 +5079,6 @@ S: Maintained
F: drivers/edac/fsl_ddr_edac.*
EDAC-GHES
-M: Mauro Carvalho Chehab <mchehab@s-opensource.com>
M: Mauro Carvalho Chehab <mchehab@kernel.org>
L: linux-edac@vger.kernel.org
S: Maintained
@@ -5099,21 +5095,18 @@ S: Maintained
F: drivers/edac/i5000_edac.c
EDAC-I5400
-M: Mauro Carvalho Chehab <mchehab@s-opensource.com>
M: Mauro Carvalho Chehab <mchehab@kernel.org>
L: linux-edac@vger.kernel.org
S: Maintained
F: drivers/edac/i5400_edac.c
EDAC-I7300
-M: Mauro Carvalho Chehab <mchehab@s-opensource.com>
M: Mauro Carvalho Chehab <mchehab@kernel.org>
L: linux-edac@vger.kernel.org
S: Maintained
F: drivers/edac/i7300_edac.c
EDAC-I7CORE
-M: Mauro Carvalho Chehab <mchehab@s-opensource.com>
M: Mauro Carvalho Chehab <mchehab@kernel.org>
L: linux-edac@vger.kernel.org
S: Maintained
@@ -5163,7 +5156,6 @@ S: Maintained
F: drivers/edac/r82600_edac.c
EDAC-SBRIDGE
-M: Mauro Carvalho Chehab <mchehab@s-opensource.com>
M: Mauro Carvalho Chehab <mchehab@kernel.org>
L: linux-edac@vger.kernel.org
S: Maintained
@@ -5222,7 +5214,6 @@ S: Maintained
F: drivers/net/ethernet/ibm/ehea/
EM28XX VIDEO4LINUX DRIVER
-M: Mauro Carvalho Chehab <mchehab@s-opensource.com>
M: Mauro Carvalho Chehab <mchehab@kernel.org>
L: linux-media@vger.kernel.org
W: https://linuxtv.org
@@ -5399,7 +5390,6 @@ S: Maintained
F: drivers/iommu/exynos-iommu.c
EZchip NPS platform support
-M: Elad Kanfi <eladkan@mellanox.com>
M: Vineet Gupta <vgupta@synopsys.com>
S: Supported
F: arch/arc/plat-eznps
@@ -5960,8 +5950,8 @@ S: Maintained
F: scripts/get_maintainer.pl
GFS2 FILE SYSTEM
-M: Steven Whitehouse <swhiteho@redhat.com>
M: Bob Peterson <rpeterso@redhat.com>
+M: Andreas Gruenbacher <agruenba@redhat.com>
L: cluster-devel@redhat.com
W: http://sources.redhat.com/cluster/
T: git git://git.kernel.org/pub/scm/linux/kernel/git/gfs2/linux-gfs2.git
@@ -6229,6 +6219,7 @@ L: linux-hwmon@vger.kernel.org
W: http://hwmon.wiki.kernel.org/
T: git git://git.kernel.org/pub/scm/linux/kernel/git/groeck/linux-staging.git
S: Maintained
+F: Documentation/devicetree/bindings/hwmon/
F: Documentation/hwmon/
F: drivers/hwmon/
F: include/linux/hwmon*.h
@@ -6515,9 +6506,15 @@ F: Documentation/networking/hinic.txt
F: drivers/net/ethernet/huawei/hinic/
HUGETLB FILESYSTEM
-M: Nadia Yvette Chambers <nyc@holomorphy.com>
+M: Mike Kravetz <mike.kravetz@oracle.com>
+L: linux-mm@kvack.org
S: Maintained
F: fs/hugetlbfs/
+F: mm/hugetlb.c
+F: include/linux/hugetlb.h
+F: Documentation/admin-guide/mm/hugetlbpage.rst
+F: Documentation/vm/hugetlbfs_reserv.rst
+F: Documentation/ABI/testing/sysfs-kernel-mm-hugepages
HVA ST MEDIA DRIVER
M: Jean-Christophe Trotin <jean-christophe.trotin@st.com>
@@ -7677,9 +7674,11 @@ L: linux-kbuild@vger.kernel.org
S: Maintained
F: Documentation/kbuild/
F: Makefile
-F: scripts/Makefile.*
+F: scripts/Kbuild*
+F: scripts/Makefile*
F: scripts/basic/
F: scripts/mk*
+F: scripts/mod/
F: scripts/package/
KERNEL JANITORS
@@ -7704,10 +7703,10 @@ F: include/linux/sunrpc/
F: include/uapi/linux/sunrpc/
KERNEL SELFTEST FRAMEWORK
-M: Shuah Khan <shuahkh@osg.samsung.com>
M: Shuah Khan <shuah@kernel.org>
L: linux-kselftest@vger.kernel.org
T: git git://git.kernel.org/pub/scm/linux/kernel/git/shuah/linux-kselftest.git
+Q: https://patchwork.kernel.org/project/linux-kselftest/list/
S: Maintained
F: tools/testing/selftests/
F: Documentation/dev-tools/kselftest*
@@ -8213,7 +8212,7 @@ F: drivers/misc/lkdtm/*
LINUX KERNEL MEMORY CONSISTENCY MODEL (LKMM)
M: Alan Stern <stern@rowland.harvard.edu>
-M: Andrea Parri <parri.andrea@gmail.com>
+M: Andrea Parri <andrea.parri@amarulasolutions.com>
M: Will Deacon <will.deacon@arm.com>
M: Peter Zijlstra <peterz@infradead.org>
M: Boqun Feng <boqun.feng@gmail.com>
@@ -8320,6 +8319,7 @@ F: Documentation/admin-guide/LSM/LoadPin.rst
LOCKING PRIMITIVES
M: Peter Zijlstra <peterz@infradead.org>
M: Ingo Molnar <mingo@redhat.com>
+M: Will Deacon <will.deacon@arm.com>
L: linux-kernel@vger.kernel.org
T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git locking/core
S: Maintained
@@ -8871,7 +8871,6 @@ F: Documentation/devicetree/bindings/media/nvidia,tegra-vde.txt
F: drivers/staging/media/tegra-vde/
MEDIA INPUT INFRASTRUCTURE (V4L/DVB)
-M: Mauro Carvalho Chehab <mchehab@s-opensource.com>
M: Mauro Carvalho Chehab <mchehab@kernel.org>
P: LinuxTV.org Project
L: linux-media@vger.kernel.org
@@ -9031,7 +9030,6 @@ Q: http://patchwork.ozlabs.org/project/netdev/list/
F: drivers/net/ethernet/mellanox/mlx5/core/en_*
MELLANOX ETHERNET INNOVA DRIVER
-M: Ilan Tayari <ilant@mellanox.com>
R: Boris Pismenny <borisp@mellanox.com>
L: netdev@vger.kernel.org
S: Supported
@@ -9041,7 +9039,6 @@ F: drivers/net/ethernet/mellanox/mlx5/core/fpga/*
F: include/linux/mlx5/mlx5_ifc_fpga.h
MELLANOX ETHERNET INNOVA IPSEC DRIVER
-M: Ilan Tayari <ilant@mellanox.com>
R: Boris Pismenny <borisp@mellanox.com>
L: netdev@vger.kernel.org
S: Supported
@@ -9097,7 +9094,6 @@ F: include/uapi/rdma/mlx4-abi.h
MELLANOX MLX5 core VPI driver
M: Saeed Mahameed <saeedm@mellanox.com>
-M: Matan Barak <matanb@mellanox.com>
M: Leon Romanovsky <leonro@mellanox.com>
L: netdev@vger.kernel.org
L: linux-rdma@vger.kernel.org
@@ -9108,7 +9104,6 @@ F: drivers/net/ethernet/mellanox/mlx5/core/
F: include/linux/mlx5/
MELLANOX MLX5 IB driver
-M: Matan Barak <matanb@mellanox.com>
M: Leon Romanovsky <leonro@mellanox.com>
L: linux-rdma@vger.kernel.org
W: http://www.mellanox.com
@@ -9709,7 +9704,7 @@ S: Maintained
F: drivers/net/ethernet/netronome/
NETWORK BLOCK DEVICE (NBD)
-M: Josef Bacik <jbacik@fb.com>
+M: Josef Bacik <josef@toxicpanda.com>
S: Maintained
L: linux-block@vger.kernel.org
L: nbd@other.debian.org
@@ -9725,6 +9720,7 @@ W: https://fedorahosted.org/dropwatch/
F: net/core/drop_monitor.c
NETWORKING DRIVERS
+M: "David S. Miller" <davem@davemloft.net>
L: netdev@vger.kernel.org
W: http://www.linuxfoundation.org/en/Net
Q: http://patchwork.ozlabs.org/project/netdev/list/
@@ -9841,7 +9837,6 @@ F: net/netfilter/xt_CONNSECMARK.c
F: net/netfilter/xt_SECMARK.c
NETWORKING [TLS]
-M: Ilya Lesokhin <ilyal@mellanox.com>
M: Aviad Yehezkel <aviadye@mellanox.com>
M: Dave Watson <davejwatson@fb.com>
L: netdev@vger.kernel.org
@@ -9881,7 +9876,7 @@ F: include/linux/platform_data/nxp-nci.h
F: Documentation/devicetree/bindings/net/nfc/
NFS, SUNRPC, AND LOCKD CLIENTS
-M: Trond Myklebust <trond.myklebust@primarydata.com>
+M: Trond Myklebust <trond.myklebust@hammerspace.com>
M: Anna Schumaker <anna.schumaker@netapp.com>
L: linux-nfs@vger.kernel.org
W: http://client.linux-nfs.org
@@ -11641,7 +11636,7 @@ S: Maintained
F: drivers/media/tuners/qt1010*
QUALCOMM ATHEROS ATH10K WIRELESS DRIVER
-M: Kalle Valo <kvalo@qca.qualcomm.com>
+M: Kalle Valo <kvalo@codeaurora.org>
L: ath10k@lists.infradead.org
W: http://wireless.kernel.org/en/users/Drivers/ath10k
T: git git://git.kernel.org/pub/scm/linux/kernel/git/kvalo/ath.git
@@ -11692,7 +11687,7 @@ S: Maintained
F: drivers/media/platform/qcom/venus/
QUALCOMM WCN36XX WIRELESS DRIVER
-M: Eugene Krasnikov <k.eugene.e@gmail.com>
+M: Kalle Valo <kvalo@codeaurora.org>
L: wcn36xx@lists.infradead.org
W: http://wireless.kernel.org/en/users/Drivers/wcn36xx
T: git git://github.com/KrasnikovEugene/wcn36xx.git
@@ -12230,7 +12225,7 @@ F: Documentation/s390/vfio-ccw.txt
F: include/uapi/linux/vfio_ccw.h
S390 ZCRYPT DRIVER
-M: Harald Freudenberger <freude@de.ibm.com>
+M: Harald Freudenberger <freude@linux.ibm.com>
L: linux-s390@vger.kernel.org
W: http://www.ibm.com/developerworks/linux/linux390/
S: Supported
@@ -12259,7 +12254,6 @@ S: Odd Fixes
F: drivers/media/i2c/saa6588*
SAA7134 VIDEO4LINUX DRIVER
-M: Mauro Carvalho Chehab <mchehab@s-opensource.com>
M: Mauro Carvalho Chehab <mchehab@kernel.org>
L: linux-media@vger.kernel.org
W: https://linuxtv.org
@@ -12498,6 +12492,7 @@ F: drivers/scsi/st_*.h
SCTP PROTOCOL
M: Vlad Yasevich <vyasevich@gmail.com>
M: Neil Horman <nhorman@tuxdriver.com>
+M: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
L: linux-sctp@vger.kernel.org
W: http://lksctp.sourceforge.net
S: Maintained
@@ -12763,7 +12758,6 @@ S: Maintained
F: drivers/media/radio/si4713/radio-usb-si4713.c
SIANO DVB DRIVER
-M: Mauro Carvalho Chehab <mchehab@s-opensource.com>
M: Mauro Carvalho Chehab <mchehab@kernel.org>
L: linux-media@vger.kernel.org
W: https://linuxtv.org
@@ -13275,6 +13269,12 @@ M: Jan-Benedict Glaw <jbglaw@lug-owl.de>
S: Maintained
F: arch/alpha/kernel/srm_env.c
+ST STM32 I2C/SMBUS DRIVER
+M: Pierre-Yves MORDRET <pierre-yves.mordret@st.com>
+L: linux-i2c@vger.kernel.org
+S: Maintained
+F: drivers/i2c/busses/i2c-stm32*
+
STABLE BRANCH
M: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
L: stable@vger.kernel.org
@@ -13754,7 +13754,6 @@ S: Maintained
F: drivers/media/i2c/tda9840*
TEA5761 TUNER DRIVER
-M: Mauro Carvalho Chehab <mchehab@s-opensource.com>
M: Mauro Carvalho Chehab <mchehab@kernel.org>
L: linux-media@vger.kernel.org
W: https://linuxtv.org
@@ -13763,7 +13762,6 @@ S: Odd fixes
F: drivers/media/tuners/tea5761.*
TEA5767 TUNER DRIVER
-M: Mauro Carvalho Chehab <mchehab@s-opensource.com>
M: Mauro Carvalho Chehab <mchehab@kernel.org>
L: linux-media@vger.kernel.org
W: https://linuxtv.org
@@ -13853,7 +13851,6 @@ S: Supported
F: drivers/iommu/tegra*
TEGRA KBC DRIVER
-M: Rakesh Iyer <riyer@nvidia.com>
M: Laxman Dewangan <ldewangan@nvidia.com>
S: Supported
F: drivers/input/keyboard/tegra-kbc.c
@@ -14180,7 +14177,6 @@ F: Documentation/networking/tlan.txt
F: drivers/net/ethernet/ti/tlan.*
TM6000 VIDEO4LINUX DRIVER
-M: Mauro Carvalho Chehab <mchehab@s-opensource.com>
M: Mauro Carvalho Chehab <mchehab@kernel.org>
L: linux-media@vger.kernel.org
W: https://linuxtv.org
@@ -14663,7 +14659,6 @@ F: drivers/usb/common/usb-otg-fsm.c
USB OVER IP DRIVER
M: Valentina Manea <valentina.manea.m@gmail.com>
-M: Shuah Khan <shuahkh@osg.samsung.com>
M: Shuah Khan <shuah@kernel.org>
L: linux-usb@vger.kernel.org
S: Maintained
@@ -15407,7 +15402,6 @@ S: Maintained
F: arch/x86/entry/vdso/
XC2028/3028 TUNER DRIVER
-M: Mauro Carvalho Chehab <mchehab@s-opensource.com>
M: Mauro Carvalho Chehab <mchehab@kernel.org>
L: linux-media@vger.kernel.org
W: https://linuxtv.org
@@ -15523,6 +15517,14 @@ L: linux-kernel@vger.kernel.org
S: Supported
F: drivers/char/xillybus/
+XLP9XX I2C DRIVER
+M: George Cherian <george.cherian@cavium.com>
+M: Jan Glauber <jglauber@cavium.com>
+L: linux-i2c@vger.kernel.org
+W: http://www.cavium.com
+S: Supported
+F: drivers/i2c/busses/i2c-xlp9xx.c
+
XRA1403 GPIO EXPANDER
M: Nandor Han <nandor.han@ge.com>
M: Semi Malinen <semi.malinen@ge.com>
@@ -15641,7 +15643,7 @@ L: linux-mm@kvack.org
S: Maintained
F: mm/zsmalloc.c
F: include/linux/zsmalloc.h
-F: Documentation/vm/zsmalloc.txt
+F: Documentation/vm/zsmalloc.rst
ZSWAP COMPRESSED SWAP CACHING
M: Seth Jennings <sjenning@redhat.com>
diff --git a/Makefile b/Makefile
index 619a85ad716b..554dcaddbce4 100644
--- a/Makefile
+++ b/Makefile
@@ -2,8 +2,8 @@
VERSION = 4
PATCHLEVEL = 17
SUBLEVEL = 0
-EXTRAVERSION = -rc3
-NAME = Fearless Coyote
+EXTRAVERSION =
+NAME = Merciless Moray
# *DOCUMENTATION*
# To see a list of typical targets execute "make help"
@@ -500,6 +500,9 @@ RETPOLINE_CFLAGS_CLANG := -mretpoline-external-thunk
RETPOLINE_CFLAGS := $(call cc-option,$(RETPOLINE_CFLAGS_GCC),$(call cc-option,$(RETPOLINE_CFLAGS_CLANG)))
export RETPOLINE_CFLAGS
+KBUILD_CFLAGS += $(call cc-option,-fno-PIE)
+KBUILD_AFLAGS += $(call cc-option,-fno-PIE)
+
# check for 'asm goto'
ifeq ($(call shell-cached,$(CONFIG_SHELL) $(srctree)/scripts/gcc-goto.sh $(CC) $(KBUILD_CFLAGS)), y)
CC_HAVE_ASM_GOTO := 1
@@ -621,9 +624,9 @@ endif # $(dot-config)
# Defaults to vmlinux, but the arch makefile usually adds further targets
all: vmlinux
-KBUILD_CFLAGS += $(call cc-option,-fno-PIE)
-KBUILD_AFLAGS += $(call cc-option,-fno-PIE)
-CFLAGS_GCOV := -fprofile-arcs -ftest-coverage -fno-tree-loop-im $(call cc-disable-warning,maybe-uninitialized,)
+CFLAGS_GCOV := -fprofile-arcs -ftest-coverage \
+ $(call cc-option,-fno-tree-loop-im) \
+ $(call cc-disable-warning,maybe-uninitialized,)
export CFLAGS_GCOV CFLAGS_KCOV
# The arch Makefile can set ARCH_{CPP,A,C}FLAGS to override the default
diff --git a/arch/Kconfig b/arch/Kconfig
index 4377b0cec976..b695a3e3e922 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -278,9 +278,6 @@ config HAVE_CLK
The <linux/clk.h> calls support software clock gating and
thus are a key power management tool on many systems.
-config HAVE_DMA_API_DEBUG
- bool
-
config HAVE_HW_BREAKPOINT
bool
depends on PERF_EVENTS
@@ -464,6 +461,10 @@ config GCC_PLUGIN_LATENT_ENTROPY
config GCC_PLUGIN_STRUCTLEAK
bool "Force initialization of variables containing userspace addresses"
depends on GCC_PLUGINS
+ # Currently STRUCTLEAK inserts initialization out of live scope of
+ # variables from KASAN point of view. This leads to KASAN false
+ # positive reports. Prohibit this combination for now.
+ depends on !KASAN_EXTRA
help
This plugin zero-initializes any structures containing a
__user attribute. This can prevent some classes of information
diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig
index b2022885ced8..0c4805a572c8 100644
--- a/arch/alpha/Kconfig
+++ b/arch/alpha/Kconfig
@@ -10,6 +10,8 @@ config ALPHA
select HAVE_OPROFILE
select HAVE_PCSPKR_PLATFORM
select HAVE_PERF_EVENTS
+ select NEED_DMA_MAP_STATE
+ select NEED_SG_DMA_LENGTH
select VIRT_TO_BUS
select GENERIC_IRQ_PROBE
select AUTO_IRQ_AFFINITY if SMP
@@ -64,15 +66,6 @@ config ZONE_DMA
bool
default y
-config ARCH_DMA_ADDR_T_64BIT
- def_bool y
-
-config NEED_DMA_MAP_STATE
- def_bool y
-
-config NEED_SG_DMA_LENGTH
- def_bool y
-
config GENERIC_ISA_DMA
bool
default y
@@ -211,6 +204,7 @@ config ALPHA_EIGER
config ALPHA_JENSEN
bool "Jensen"
depends on BROKEN
+ select DMA_DIRECT_OPS
help
DEC PC 150 AXP (aka Jensen): This is a very old Digital system - one
of the first-generation Alpha systems. A number of these systems
@@ -345,9 +339,6 @@ config PCI_DOMAINS
config PCI_SYSCALL
def_bool PCI
-config IOMMU_HELPER
- def_bool PCI
-
config ALPHA_NONAME
bool
depends on ALPHA_BOOK1 || ALPHA_NONAME_CH
@@ -585,7 +576,7 @@ config ARCH_DISCONTIGMEM_ENABLE
Say Y to support efficient handling of discontiguous physical memory,
for architectures which are either NUMA (Non-Uniform Memory Access)
or have huge holes in the physical address space for other reasons.
- See <file:Documentation/vm/numa> for more.
+ See <file:Documentation/vm/numa.rst> for more.
source "mm/Kconfig"
diff --git a/arch/alpha/include/asm/dma-mapping.h b/arch/alpha/include/asm/dma-mapping.h
index b78f61f20796..8beeafd4f68e 100644
--- a/arch/alpha/include/asm/dma-mapping.h
+++ b/arch/alpha/include/asm/dma-mapping.h
@@ -2,11 +2,15 @@
#ifndef _ALPHA_DMA_MAPPING_H
#define _ALPHA_DMA_MAPPING_H
-extern const struct dma_map_ops *dma_ops;
+extern const struct dma_map_ops alpha_pci_ops;
static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus)
{
- return dma_ops;
+#ifdef CONFIG_ALPHA_JENSEN
+ return &dma_direct_ops;
+#else
+ return &alpha_pci_ops;
+#endif
}
#endif /* _ALPHA_DMA_MAPPING_H */
diff --git a/arch/alpha/include/asm/pci.h b/arch/alpha/include/asm/pci.h
index b9ec55351924..cf6bc1e64d66 100644
--- a/arch/alpha/include/asm/pci.h
+++ b/arch/alpha/include/asm/pci.h
@@ -56,11 +56,6 @@ struct pci_controller {
/* IOMMU controls. */
-/* The PCI address space does not equal the physical memory address space.
- The networking and block device layers use this boolean for bounce buffer
- decisions. */
-#define PCI_DMA_BUS_IS_PHYS 0
-
/* TODO: integrate with include/asm-generic/pci.h ? */
static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel)
{
diff --git a/arch/alpha/include/uapi/asm/siginfo.h b/arch/alpha/include/uapi/asm/siginfo.h
index 0cf3b527b274..db3f0138536f 100644
--- a/arch/alpha/include/uapi/asm/siginfo.h
+++ b/arch/alpha/include/uapi/asm/siginfo.h
@@ -7,18 +7,4 @@
#include <asm-generic/siginfo.h>
-/*
- * SIGFPE si_codes
- */
-#ifdef __KERNEL__
-#define FPE_FIXME 0 /* Broken dup of SI_USER */
-#endif /* __KERNEL__ */
-
-/*
- * SIGTRAP si_codes
- */
-#ifdef __KERNEL__
-#define TRAP_FIXME 0 /* Broken dup of SI_USER */
-#endif /* __KERNEL__ */
-
#endif
diff --git a/arch/alpha/kernel/io.c b/arch/alpha/kernel/io.c
index 3e3d49c254c5..c025a3e5e357 100644
--- a/arch/alpha/kernel/io.c
+++ b/arch/alpha/kernel/io.c
@@ -37,20 +37,20 @@ unsigned int ioread32(void __iomem *addr)
void iowrite8(u8 b, void __iomem *addr)
{
- IO_CONCAT(__IO_PREFIX,iowrite8)(b, addr);
mb();
+ IO_CONCAT(__IO_PREFIX,iowrite8)(b, addr);
}
void iowrite16(u16 b, void __iomem *addr)
{
- IO_CONCAT(__IO_PREFIX,iowrite16)(b, addr);
mb();
+ IO_CONCAT(__IO_PREFIX,iowrite16)(b, addr);
}
void iowrite32(u32 b, void __iomem *addr)
{
- IO_CONCAT(__IO_PREFIX,iowrite32)(b, addr);
mb();
+ IO_CONCAT(__IO_PREFIX,iowrite32)(b, addr);
}
EXPORT_SYMBOL(ioread8);
@@ -176,26 +176,26 @@ u64 readq(const volatile void __iomem *addr)
void writeb(u8 b, volatile void __iomem *addr)
{
- __raw_writeb(b, addr);
mb();
+ __raw_writeb(b, addr);
}
void writew(u16 b, volatile void __iomem *addr)
{
- __raw_writew(b, addr);
mb();
+ __raw_writew(b, addr);
}
void writel(u32 b, volatile void __iomem *addr)
{
- __raw_writel(b, addr);
mb();
+ __raw_writel(b, addr);
}
void writeq(u64 b, volatile void __iomem *addr)
{
- __raw_writeq(b, addr);
mb();
+ __raw_writeq(b, addr);
}
EXPORT_SYMBOL(readb);
diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c
index 89faa6f4de47..6e921754c8fc 100644
--- a/arch/alpha/kernel/osf_sys.c
+++ b/arch/alpha/kernel/osf_sys.c
@@ -871,8 +871,7 @@ SYSCALL_DEFINE5(osf_setsysinfo, unsigned long, op, void __user *, buffer,
send a signal. Old exceptions are not signaled. */
fex = (exc >> IEEE_STATUS_TO_EXCSUM_SHIFT) & swcr;
if (fex) {
- siginfo_t info;
- int si_code = 0;
+ int si_code = FPE_FLTUNK;
if (fex & IEEE_TRAP_ENABLE_DNO) si_code = FPE_FLTUND;
if (fex & IEEE_TRAP_ENABLE_INE) si_code = FPE_FLTRES;
@@ -881,11 +880,9 @@ SYSCALL_DEFINE5(osf_setsysinfo, unsigned long, op, void __user *, buffer,
if (fex & IEEE_TRAP_ENABLE_DZE) si_code = FPE_FLTDIV;
if (fex & IEEE_TRAP_ENABLE_INV) si_code = FPE_FLTINV;
- info.si_signo = SIGFPE;
- info.si_errno = 0;
- info.si_code = si_code;
- info.si_addr = NULL; /* FIXME */
- send_sig_info(SIGFPE, &info, current);
+ send_sig_fault(SIGFPE, si_code,
+ (void __user *)NULL, /* FIXME */
+ 0, current);
}
return 0;
}
diff --git a/arch/alpha/kernel/pci-noop.c b/arch/alpha/kernel/pci-noop.c
index b6ebb65127a8..c7c5879869d3 100644
--- a/arch/alpha/kernel/pci-noop.c
+++ b/arch/alpha/kernel/pci-noop.c
@@ -102,36 +102,3 @@ SYSCALL_DEFINE5(pciconfig_write, unsigned long, bus, unsigned long, dfn,
else
return -ENODEV;
}
-
-static void *alpha_noop_alloc_coherent(struct device *dev, size_t size,
- dma_addr_t *dma_handle, gfp_t gfp,
- unsigned long attrs)
-{
- void *ret;
-
- if (!dev || *dev->dma_mask >= 0xffffffffUL)
- gfp &= ~GFP_DMA;
- ret = (void *)__get_free_pages(gfp, get_order(size));
- if (ret) {
- memset(ret, 0, size);
- *dma_handle = virt_to_phys(ret);
- }
- return ret;
-}
-
-static int alpha_noop_supported(struct device *dev, u64 mask)
-{
- return mask < 0x00ffffffUL ? 0 : 1;
-}
-
-const struct dma_map_ops alpha_noop_ops = {
- .alloc = alpha_noop_alloc_coherent,
- .free = dma_noop_free_coherent,
- .map_page = dma_noop_map_page,
- .map_sg = dma_noop_map_sg,
- .mapping_error = dma_noop_mapping_error,
- .dma_supported = alpha_noop_supported,
-};
-
-const struct dma_map_ops *dma_ops = &alpha_noop_ops;
-EXPORT_SYMBOL(dma_ops);
diff --git a/arch/alpha/kernel/pci_iommu.c b/arch/alpha/kernel/pci_iommu.c
index 83b34b9188ea..6923b0d9c1e1 100644
--- a/arch/alpha/kernel/pci_iommu.c
+++ b/arch/alpha/kernel/pci_iommu.c
@@ -950,6 +950,4 @@ const struct dma_map_ops alpha_pci_ops = {
.mapping_error = alpha_pci_mapping_error,
.dma_supported = alpha_pci_supported,
};
-
-const struct dma_map_ops *dma_ops = &alpha_pci_ops;
-EXPORT_SYMBOL(dma_ops);
+EXPORT_SYMBOL(alpha_pci_ops);
diff --git a/arch/alpha/kernel/signal.c b/arch/alpha/kernel/signal.c
index 9ebb3bcbc626..8c0c4ee0be6e 100644
--- a/arch/alpha/kernel/signal.c
+++ b/arch/alpha/kernel/signal.c
@@ -219,14 +219,8 @@ do_sigreturn(struct sigcontext __user *sc)
/* Send SIGTRAP if we're single-stepping: */
if (ptrace_cancel_bpt (current)) {
- siginfo_t info;
-
- info.si_signo = SIGTRAP;
- info.si_errno = 0;
- info.si_code = TRAP_BRKPT;
- info.si_addr = (void __user *) regs->pc;
- info.si_trapno = 0;
- send_sig_info(SIGTRAP, &info, current);
+ send_sig_fault(SIGTRAP, TRAP_BRKPT, (void __user *) regs->pc, 0,
+ current);
}
return;
@@ -253,14 +247,8 @@ do_rt_sigreturn(struct rt_sigframe __user *frame)
/* Send SIGTRAP if we're single-stepping: */
if (ptrace_cancel_bpt (current)) {
- siginfo_t info;
-
- info.si_signo = SIGTRAP;
- info.si_errno = 0;
- info.si_code = TRAP_BRKPT;
- info.si_addr = (void __user *) regs->pc;
- info.si_trapno = 0;
- send_sig_info(SIGTRAP, &info, current);
+ send_sig_fault(SIGTRAP, TRAP_BRKPT, (void __user *) regs->pc, 0,
+ current);
}
return;
diff --git a/arch/alpha/kernel/traps.c b/arch/alpha/kernel/traps.c
index f43bd05dede2..bc9627698796 100644
--- a/arch/alpha/kernel/traps.c
+++ b/arch/alpha/kernel/traps.c
@@ -213,7 +213,6 @@ do_entArith(unsigned long summary, unsigned long write_mask,
struct pt_regs *regs)
{
long si_code = FPE_FLTINV;
- siginfo_t info;
if (summary & 1) {
/* Software-completion summary bit is set, so try to
@@ -228,17 +227,12 @@ do_entArith(unsigned long summary, unsigned long write_mask,
}
die_if_kernel("Arithmetic fault", regs, 0, NULL);
- info.si_signo = SIGFPE;
- info.si_errno = 0;
- info.si_code = si_code;
- info.si_addr = (void __user *) regs->pc;
- send_sig_info(SIGFPE, &info, current);
+ send_sig_fault(SIGFPE, si_code, (void __user *) regs->pc, 0, current);
}
asmlinkage void
do_entIF(unsigned long type, struct pt_regs *regs)
{
- siginfo_t info;
int signo, code;
if ((regs->ps & ~IPL_MAX) == 0) {
@@ -270,31 +264,20 @@ do_entIF(unsigned long type, struct pt_regs *regs)
switch (type) {
case 0: /* breakpoint */
- info.si_signo = SIGTRAP;
- info.si_errno = 0;
- info.si_code = TRAP_BRKPT;
- info.si_trapno = 0;
- info.si_addr = (void __user *) regs->pc;
-
if (ptrace_cancel_bpt(current)) {
regs->pc -= 4; /* make pc point to former bpt */
}
- send_sig_info(SIGTRAP, &info, current);
+ send_sig_fault(SIGTRAP, TRAP_BRKPT, (void __user *)regs->pc, 0,
+ current);
return;
case 1: /* bugcheck */
- info.si_signo = SIGTRAP;
- info.si_errno = 0;
- info.si_code = TRAP_FIXME;
- info.si_addr = (void __user *) regs->pc;
- info.si_trapno = 0;
- send_sig_info(SIGTRAP, &info, current);
+ send_sig_fault(SIGTRAP, TRAP_UNK, (void __user *) regs->pc, 0,
+ current);
return;
case 2: /* gentrap */
- info.si_addr = (void __user *) regs->pc;
- info.si_trapno = regs->r16;
switch ((long) regs->r16) {
case GEN_INTOVF:
signo = SIGFPE;
@@ -326,7 +309,7 @@ do_entIF(unsigned long type, struct pt_regs *regs)
break;
case GEN_ROPRAND:
signo = SIGFPE;
- code = FPE_FIXME;
+ code = FPE_FLTUNK;
break;
case GEN_DECOVF:
@@ -348,15 +331,12 @@ do_entIF(unsigned long type, struct pt_regs *regs)
case GEN_SUBRNG7:
default:
signo = SIGTRAP;
- code = TRAP_FIXME;
+ code = TRAP_UNK;
break;
}
- info.si_signo = signo;
- info.si_errno = 0;
- info.si_code = code;
- info.si_addr = (void __user *) regs->pc;
- send_sig_info(signo, &info, current);
+ send_sig_fault(signo, code, (void __user *) regs->pc, regs->r16,
+ current);
return;
case 4: /* opDEC */
@@ -380,11 +360,9 @@ do_entIF(unsigned long type, struct pt_regs *regs)
if (si_code == 0)
return;
if (si_code > 0) {
- info.si_signo = SIGFPE;
- info.si_errno = 0;
- info.si_code = si_code;
- info.si_addr = (void __user *) regs->pc;
- send_sig_info(SIGFPE, &info, current);
+ send_sig_fault(SIGFPE, si_code,
+ (void __user *) regs->pc, 0,
+ current);
return;
}
}
@@ -409,11 +387,7 @@ do_entIF(unsigned long type, struct pt_regs *regs)
;
}
- info.si_signo = SIGILL;
- info.si_errno = 0;
- info.si_code = ILL_ILLOPC;
- info.si_addr = (void __user *) regs->pc;
- send_sig_info(SIGILL, &info, current);
+ send_sig_fault(SIGILL, ILL_ILLOPC, (void __user *)regs->pc, 0, current);
}
/* There is an ifdef in the PALcode in MILO that enables a
@@ -426,15 +400,9 @@ do_entIF(unsigned long type, struct pt_regs *regs)
asmlinkage void
do_entDbg(struct pt_regs *regs)
{
- siginfo_t info;
-
die_if_kernel("Instruction fault", regs, 0, NULL);
- info.si_signo = SIGILL;
- info.si_errno = 0;
- info.si_code = ILL_ILLOPC;
- info.si_addr = (void __user *) regs->pc;
- force_sig_info(SIGILL, &info, current);
+ force_sig_fault(SIGILL, ILL_ILLOPC, (void __user *)regs->pc, 0, current);
}
@@ -758,7 +726,7 @@ do_entUnaUser(void __user * va, unsigned long opcode,
unsigned long tmp1, tmp2, tmp3, tmp4;
unsigned long fake_reg, *reg_addr = &fake_reg;
- siginfo_t info;
+ int si_code;
long error;
/* Check the UAC bits to decide what the user wants us to do
@@ -981,34 +949,27 @@ do_entUnaUser(void __user * va, unsigned long opcode,
give_sigsegv:
regs->pc -= 4; /* make pc point to faulting insn */
- info.si_signo = SIGSEGV;
- info.si_errno = 0;
/* We need to replicate some of the logic in mm/fault.c,
since we don't have access to the fault code in the
exception handling return path. */
if ((unsigned long)va >= TASK_SIZE)
- info.si_code = SEGV_ACCERR;
+ si_code = SEGV_ACCERR;
else {
struct mm_struct *mm = current->mm;
down_read(&mm->mmap_sem);
if (find_vma(mm, (unsigned long)va))
- info.si_code = SEGV_ACCERR;
+ si_code = SEGV_ACCERR;
else
- info.si_code = SEGV_MAPERR;
+ si_code = SEGV_MAPERR;
up_read(&mm->mmap_sem);
}
- info.si_addr = va;
- send_sig_info(SIGSEGV, &info, current);
+ send_sig_fault(SIGSEGV, si_code, va, 0, current);
return;
give_sigbus:
regs->pc -= 4;
- info.si_signo = SIGBUS;
- info.si_errno = 0;
- info.si_code = BUS_ADRALN;
- info.si_addr = va;
- send_sig_info(SIGBUS, &info, current);
+ send_sig_fault(SIGBUS, BUS_ADRALN, va, 0, current);
return;
}
diff --git a/arch/alpha/mm/fault.c b/arch/alpha/mm/fault.c
index cd3c572ee912..de2bd217adad 100644
--- a/arch/alpha/mm/fault.c
+++ b/arch/alpha/mm/fault.c
@@ -88,7 +88,6 @@ do_page_fault(unsigned long address, unsigned long mmcsr,
struct mm_struct *mm = current->mm;
const struct exception_table_entry *fixup;
int fault, si_code = SEGV_MAPERR;
- siginfo_t info;
unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
/* As of EV6, a load into $31/$f31 is a prefetch, and never faults
@@ -221,21 +220,13 @@ retry:
up_read(&mm->mmap_sem);
/* Send a sigbus, regardless of whether we were in kernel
or user mode. */
- info.si_signo = SIGBUS;
- info.si_errno = 0;
- info.si_code = BUS_ADRERR;
- info.si_addr = (void __user *) address;
- force_sig_info(SIGBUS, &info, current);
+ force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *) address, 0, current);
if (!user_mode(regs))
goto no_context;
return;
do_sigsegv:
- info.si_signo = SIGSEGV;
- info.si_errno = 0;
- info.si_code = si_code;
- info.si_addr = (void __user *) address;
- force_sig_info(SIGSEGV, &info, current);
+ force_sig_fault(SIGSEGV, si_code, (void __user *) address, 0, current);
return;
#ifdef CONFIG_ALPHA_LARGE_VMALLOC
diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig
index d76bf4a83740..89d47eac18b2 100644
--- a/arch/arc/Kconfig
+++ b/arch/arc/Kconfig
@@ -9,11 +9,15 @@
config ARC
def_bool y
select ARC_TIMERS
+ select ARCH_HAS_SYNC_DMA_FOR_CPU
+ select ARCH_HAS_SYNC_DMA_FOR_DEVICE
select ARCH_HAS_SG_CHAIN
select ARCH_SUPPORTS_ATOMIC_RMW if ARC_HAS_LLSC
select BUILDTIME_EXTABLE_SORT
select CLONE_BACKWARDS
select COMMON_CLK
+ select DMA_NONCOHERENT_OPS
+ select DMA_NONCOHERENT_MMAP
select GENERIC_ATOMIC64 if !ISA_ARCV2 || !(ARC_HAS_LL64 && ARC_HAS_LLSC)
select GENERIC_CLOCKEVENTS
select GENERIC_FIND_FIRST_BIT
@@ -453,16 +457,11 @@ config ARC_HAS_PAE40
default n
depends on ISA_ARCV2
select HIGHMEM
+ select PHYS_ADDR_T_64BIT
help
Enable access to physical memory beyond 4G, only supported on
ARC cores with 40 bit Physical Addressing support
-config ARCH_PHYS_ADDR_T_64BIT
- def_bool ARC_HAS_PAE40
-
-config ARCH_DMA_ADDR_T_64BIT
- bool
-
config ARC_KVADDR_SIZE
int "Kernel Virtual Address Space size (MB)"
range 0 512
diff --git a/arch/arc/include/asm/Kbuild b/arch/arc/include/asm/Kbuild
index d51bc22e8795..feed50ce89fa 100644
--- a/arch/arc/include/asm/Kbuild
+++ b/arch/arc/include/asm/Kbuild
@@ -3,6 +3,7 @@ generic-y += bugs.h
generic-y += compat.h
generic-y += device.h
generic-y += div64.h
+generic-y += dma-mapping.h
generic-y += emergency-restart.h
generic-y += extable.h
generic-y += fb.h
diff --git a/arch/arc/include/asm/dma-mapping.h b/arch/arc/include/asm/dma-mapping.h
deleted file mode 100644
index 7a16824bfe98..000000000000
--- a/arch/arc/include/asm/dma-mapping.h
+++ /dev/null
@@ -1,21 +0,0 @@
-/*
- * DMA Mapping glue for ARC
- *
- * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#ifndef ASM_ARC_DMA_MAPPING_H
-#define ASM_ARC_DMA_MAPPING_H
-
-extern const struct dma_map_ops arc_dma_ops;
-
-static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus)
-{
- return &arc_dma_ops;
-}
-
-#endif
diff --git a/arch/arc/include/asm/pci.h b/arch/arc/include/asm/pci.h
index ba56c23c1b20..4ff53c041c64 100644
--- a/arch/arc/include/asm/pci.h
+++ b/arch/arc/include/asm/pci.h
@@ -16,12 +16,6 @@
#define PCIBIOS_MIN_MEM 0x100000
#define pcibios_assign_all_busses() 1
-/*
- * The PCI address space does equal the physical memory address space.
- * The networking and block device layers use this boolean for bounce
- * buffer decisions.
- */
-#define PCI_DMA_BUS_IS_PHYS 1
#endif /* __KERNEL__ */
diff --git a/arch/arc/mm/dma.c b/arch/arc/mm/dma.c
index 1dcc404b5aec..8c1071840979 100644
--- a/arch/arc/mm/dma.c
+++ b/arch/arc/mm/dma.c
@@ -16,13 +16,12 @@
* The default DMA address == Phy address which is 0x8000_0000 based.
*/
-#include <linux/dma-mapping.h>
+#include <linux/dma-noncoherent.h>
#include <asm/cache.h>
#include <asm/cacheflush.h>
-
-static void *arc_dma_alloc(struct device *dev, size_t size,
- dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs)
+void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
+ gfp_t gfp, unsigned long attrs)
{
unsigned long order = get_order(size);
struct page *page;
@@ -89,7 +88,7 @@ static void *arc_dma_alloc(struct device *dev, size_t size,
return kvaddr;
}
-static void arc_dma_free(struct device *dev, size_t size, void *vaddr,
+void arch_dma_free(struct device *dev, size_t size, void *vaddr,
dma_addr_t dma_handle, unsigned long attrs)
{
phys_addr_t paddr = dma_handle;
@@ -105,9 +104,9 @@ static void arc_dma_free(struct device *dev, size_t size, void *vaddr,
__free_pages(page, get_order(size));
}
-static int arc_dma_mmap(struct device *dev, struct vm_area_struct *vma,
- void *cpu_addr, dma_addr_t dma_addr, size_t size,
- unsigned long attrs)
+int arch_dma_mmap(struct device *dev, struct vm_area_struct *vma,
+ void *cpu_addr, dma_addr_t dma_addr, size_t size,
+ unsigned long attrs)
{
unsigned long user_count = vma_pages(vma);
unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT;
@@ -130,149 +129,14 @@ static int arc_dma_mmap(struct device *dev, struct vm_area_struct *vma,
return ret;
}
-/*
- * streaming DMA Mapping API...
- * CPU accesses page via normal paddr, thus needs to explicitly made
- * consistent before each use
- */
-static void _dma_cache_sync(phys_addr_t paddr, size_t size,
- enum dma_data_direction dir)
+void arch_sync_dma_for_device(struct device *dev, phys_addr_t paddr,
+ size_t size, enum dma_data_direction dir)
{
- switch (dir) {
- case DMA_FROM_DEVICE:
- dma_cache_inv(paddr, size);
- break;
- case DMA_TO_DEVICE:
- dma_cache_wback(paddr, size);
- break;
- case DMA_BIDIRECTIONAL:
- dma_cache_wback_inv(paddr, size);
- break;
- default:
- pr_err("Invalid DMA dir [%d] for OP @ %pa[p]\n", dir, &paddr);
- }
+ dma_cache_wback(paddr, size);
}
-/*
- * arc_dma_map_page - map a portion of a page for streaming DMA
- *
- * Ensure that any data held in the cache is appropriately discarded
- * or written back.
- *
- * The device owns this memory once this call has completed. The CPU
- * can regain ownership by calling dma_unmap_page().
- *
- * Note: while it takes struct page as arg, caller can "abuse" it to pass
- * a region larger than PAGE_SIZE, provided it is physically contiguous
- * and this still works correctly
- */
-static dma_addr_t arc_dma_map_page(struct device *dev, struct page *page,
- unsigned long offset, size_t size, enum dma_data_direction dir,
- unsigned long attrs)
+void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr,
+ size_t size, enum dma_data_direction dir)
{
- phys_addr_t paddr = page_to_phys(page) + offset;
-
- if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
- _dma_cache_sync(paddr, size, dir);
-
- return paddr;
-}
-
-/*
- * arc_dma_unmap_page - unmap a buffer previously mapped through dma_map_page()
- *
- * After this call, reads by the CPU to the buffer are guaranteed to see
- * whatever the device wrote there.
- *
- * Note: historically this routine was not implemented for ARC
- */
-static void arc_dma_unmap_page(struct device *dev, dma_addr_t handle,
- size_t size, enum dma_data_direction dir,
- unsigned long attrs)
-{
- phys_addr_t paddr = handle;
-
- if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
- _dma_cache_sync(paddr, size, dir);
+ dma_cache_inv(paddr, size);
}
-
-static int arc_dma_map_sg(struct device *dev, struct scatterlist *sg,
- int nents, enum dma_data_direction dir, unsigned long attrs)
-{
- struct scatterlist *s;
- int i;
-
- for_each_sg(sg, s, nents, i)
- s->dma_address = dma_map_page(dev, sg_page(s), s->offset,
- s->length, dir);
-
- return nents;
-}
-
-static void arc_dma_unmap_sg(struct device *dev, struct scatterlist *sg,
- int nents, enum dma_data_direction dir,
- unsigned long attrs)
-{
- struct scatterlist *s;
- int i;
-
- for_each_sg(sg, s, nents, i)
- arc_dma_unmap_page(dev, sg_dma_address(s), sg_dma_len(s), dir,
- attrs);
-}
-
-static void arc_dma_sync_single_for_cpu(struct device *dev,
- dma_addr_t dma_handle, size_t size, enum dma_data_direction dir)
-{
- _dma_cache_sync(dma_handle, size, DMA_FROM_DEVICE);
-}
-
-static void arc_dma_sync_single_for_device(struct device *dev,
- dma_addr_t dma_handle, size_t size, enum dma_data_direction dir)
-{
- _dma_cache_sync(dma_handle, size, DMA_TO_DEVICE);
-}
-
-static void arc_dma_sync_sg_for_cpu(struct device *dev,
- struct scatterlist *sglist, int nelems,
- enum dma_data_direction dir)
-{
- int i;
- struct scatterlist *sg;
-
- for_each_sg(sglist, sg, nelems, i)
- _dma_cache_sync(sg_phys(sg), sg->length, dir);
-}
-
-static void arc_dma_sync_sg_for_device(struct device *dev,
- struct scatterlist *sglist, int nelems,
- enum dma_data_direction dir)
-{
- int i;
- struct scatterlist *sg;
-
- for_each_sg(sglist, sg, nelems, i)
- _dma_cache_sync(sg_phys(sg), sg->length, dir);
-}
-
-static int arc_dma_supported(struct device *dev, u64 dma_mask)
-{
- /* Support 32 bit DMA mask exclusively */
- return dma_mask == DMA_BIT_MASK(32);
-}
-
-const struct dma_map_ops arc_dma_ops = {
- .alloc = arc_dma_alloc,
- .free = arc_dma_free,
- .mmap = arc_dma_mmap,
- .map_page = arc_dma_map_page,
- .unmap_page = arc_dma_unmap_page,
- .map_sg = arc_dma_map_sg,
- .unmap_sg = arc_dma_unmap_sg,
- .sync_single_for_device = arc_dma_sync_single_for_device,
- .sync_single_for_cpu = arc_dma_sync_single_for_cpu,
- .sync_sg_for_cpu = arc_dma_sync_sg_for_cpu,
- .sync_sg_for_device = arc_dma_sync_sg_for_device,
- .dma_supported = arc_dma_supported,
-};
-EXPORT_SYMBOL(arc_dma_ops);
diff --git a/arch/arc/mm/fault.c b/arch/arc/mm/fault.c
index a0b7bd6d030d..b884bbd6f354 100644
--- a/arch/arc/mm/fault.c
+++ b/arch/arc/mm/fault.c
@@ -70,6 +70,8 @@ void do_page_fault(unsigned long address, struct pt_regs *regs)
int write = regs->ecr_cause & ECR_C_PROTV_STORE; /* ST/EX */
unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
+ clear_siginfo(&info);
+
/*
* We fault-in kernel-space virtual memory on-demand. The
* 'reference' page table is init_mm.pgd.
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index a7f8e7f4b88f..c43f5bb55ac8 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -60,7 +60,6 @@ config ARM
select HAVE_CONTEXT_TRACKING
select HAVE_C_RECORDMCOUNT
select HAVE_DEBUG_KMEMLEAK
- select HAVE_DMA_API_DEBUG
select HAVE_DMA_CONTIGUOUS if MMU
select HAVE_DYNAMIC_FTRACE if (!XIP_KERNEL) && !CPU_ENDIAN_BE32 && MMU
select HAVE_DYNAMIC_FTRACE_WITH_REGS if HAVE_DYNAMIC_FTRACE
@@ -96,6 +95,7 @@ config ARM
select HAVE_VIRT_CPU_ACCOUNTING_GEN
select IRQ_FORCED_THREADING
select MODULES_USE_ELF_REL
+ select NEED_DMA_MAP_STATE
select NO_BOOTMEM
select OF_EARLY_FLATTREE if OF
select OF_RESERVED_MEM if OF
@@ -119,9 +119,6 @@ config ARM_HAS_SG_CHAIN
select ARCH_HAS_SG_CHAIN
bool
-config NEED_SG_DMA_LENGTH
- bool
-
config ARM_DMA_USE_IOMMU
bool
select ARM_HAS_SG_CHAIN
@@ -224,9 +221,6 @@ config ARCH_MAY_HAVE_PC_FDC
config ZONE_DMA
bool
-config NEED_DMA_MAP_STATE
- def_bool y
-
config ARCH_SUPPORTS_UPROBES
def_bool y
@@ -1778,12 +1772,6 @@ config SECCOMP
and the task is only allowed to execute a few safe syscalls
defined by each seccomp mode.
-config SWIOTLB
- def_bool y
-
-config IOMMU_HELPER
- def_bool SWIOTLB
-
config PARAVIRT
bool "Enable paravirtualization code"
help
@@ -1815,6 +1803,7 @@ config XEN
depends on MMU
select ARCH_DMA_ADDR_T_64BIT
select ARM_PSCI
+ select SWIOTLB
select SWIOTLB_XEN
select PARAVIRT
help
diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile
index 45a6b9b7af2a..6a4e7341ecd3 100644
--- a/arch/arm/boot/compressed/Makefile
+++ b/arch/arm/boot/compressed/Makefile
@@ -117,11 +117,9 @@ ccflags-y := -fpic -mno-single-pic-base -fno-builtin -I$(obj)
asflags-y := -DZIMAGE
# Supply kernel BSS size to the decompressor via a linker symbol.
-KBSS_SZ = $(shell $(CROSS_COMPILE)nm $(obj)/../../../../vmlinux | \
- perl -e 'while (<>) { \
- $$bss_start=hex($$1) if /^([[:xdigit:]]+) B __bss_start$$/; \
- $$bss_end=hex($$1) if /^([[:xdigit:]]+) B __bss_stop$$/; \
- }; printf "%d\n", $$bss_end - $$bss_start;')
+KBSS_SZ = $(shell echo $$(($$($(CROSS_COMPILE)nm $(obj)/../../../../vmlinux | \
+ sed -n -e 's/^\([^ ]*\) [AB] __bss_start$$/-0x\1/p' \
+ -e 's/^\([^ ]*\) [AB] __bss_stop$$/+0x\1/p') )) )
LDFLAGS_vmlinux = --defsym _kernel_bss_size=$(KBSS_SZ)
# Supply ZRELADDR to the decompressor via a linker symbol.
ifneq ($(CONFIG_AUTO_ZRELADDR),y)
diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S
index 45c8823c3750..517e0e18f0b8 100644
--- a/arch/arm/boot/compressed/head.S
+++ b/arch/arm/boot/compressed/head.S
@@ -29,19 +29,19 @@
#if defined(CONFIG_DEBUG_ICEDCC)
#if defined(CONFIG_CPU_V6) || defined(CONFIG_CPU_V6K) || defined(CONFIG_CPU_V7)
- .macro loadsp, rb, tmp
+ .macro loadsp, rb, tmp1, tmp2
.endm
.macro writeb, ch, rb
mcr p14, 0, \ch, c0, c5, 0
.endm
#elif defined(CONFIG_CPU_XSCALE)
- .macro loadsp, rb, tmp
+ .macro loadsp, rb, tmp1, tmp2
.endm
.macro writeb, ch, rb
mcr p14, 0, \ch, c8, c0, 0
.endm
#else
- .macro loadsp, rb, tmp
+ .macro loadsp, rb, tmp1, tmp2
.endm
.macro writeb, ch, rb
mcr p14, 0, \ch, c1, c0, 0
@@ -57,7 +57,7 @@
.endm
#if defined(CONFIG_ARCH_SA1100)
- .macro loadsp, rb, tmp
+ .macro loadsp, rb, tmp1, tmp2
mov \rb, #0x80000000 @ physical base address
#ifdef CONFIG_DEBUG_LL_SER3
add \rb, \rb, #0x00050000 @ Ser3
@@ -66,8 +66,8 @@
#endif
.endm
#else
- .macro loadsp, rb, tmp
- addruart \rb, \tmp
+ .macro loadsp, rb, tmp1, tmp2
+ addruart \rb, \tmp1, \tmp2
.endm
#endif
#endif
@@ -561,8 +561,6 @@ not_relocated: mov r0, #0
bl decompress_kernel
bl cache_clean_flush
bl cache_off
- mov r1, r7 @ restore architecture number
- mov r2, r8 @ restore atags pointer
#ifdef CONFIG_ARM_VIRT_EXT
mrs r0, spsr @ Get saved CPU boot mode
@@ -1297,7 +1295,7 @@ phex: adr r3, phexbuf
b 1b
@ puts corrupts {r0, r1, r2, r3}
-puts: loadsp r3, r1
+puts: loadsp r3, r2, r1
1: ldrb r2, [r0], #1
teq r2, #0
moveq pc, lr
@@ -1314,8 +1312,8 @@ puts: loadsp r3, r1
@ putc corrupts {r0, r1, r2, r3}
putc:
mov r2, r0
+ loadsp r3, r1, r0
mov r0, #0
- loadsp r3, r1
b 2b
@ memdump corrupts {r0, r1, r2, r3, r10, r11, r12, lr}
@@ -1365,6 +1363,8 @@ __hyp_reentry_vectors:
__enter_kernel:
mov r0, #0 @ must be 0
+ mov r1, r7 @ restore architecture number
+ mov r2, r8 @ restore atags pointer
ARM( mov pc, r4 ) @ call kernel
M_CLASS( add r4, r4, #1 ) @ enter in Thumb mode for M class
THUMB( bx r4 ) @ entry point is always ARM for A/R classes
diff --git a/arch/arm/boot/dts/bcm-cygnus.dtsi b/arch/arm/boot/dts/bcm-cygnus.dtsi
index 699fdf94d139..9fe4f5a6379e 100644
--- a/arch/arm/boot/dts/bcm-cygnus.dtsi
+++ b/arch/arm/boot/dts/bcm-cygnus.dtsi
@@ -69,7 +69,7 @@
timer@20200 {
compatible = "arm,cortex-a9-global-timer";
reg = <0x20200 0x100>;
- interrupts = <GIC_PPI 11 IRQ_TYPE_LEVEL_HIGH>;
+ interrupts = <GIC_PPI 11 IRQ_TYPE_EDGE_RISING>;
clocks = <&periph_clk>;
};
diff --git a/arch/arm/boot/dts/da850-lcdk.dts b/arch/arm/boot/dts/da850-lcdk.dts
index a1f4d6d5a569..0edf769ea95c 100644
--- a/arch/arm/boot/dts/da850-lcdk.dts
+++ b/arch/arm/boot/dts/da850-lcdk.dts
@@ -21,8 +21,8 @@
stdout-path = "serial2:115200n8";
};
- memory {
- device_type = "memory";
+ memory@c0000000 {
+ /* 128 MB DDR2 SDRAM @ 0xc0000000 */
reg = <0xc0000000 0x08000000>;
};
diff --git a/arch/arm/boot/dts/da850.dtsi b/arch/arm/boot/dts/da850.dtsi
index c66cf7895363..12010002dbdb 100644
--- a/arch/arm/boot/dts/da850.dtsi
+++ b/arch/arm/boot/dts/da850.dtsi
@@ -7,10 +7,19 @@
* Free Software Foundation; either version 2 of the License, or (at your
* option) any later version.
*/
-#include "skeleton.dtsi"
#include <dt-bindings/interrupt-controller/irq.h>
/ {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ chosen { };
+ aliases { };
+
+ memory@c0000000 {
+ device_type = "memory";
+ reg = <0xc0000000 0x0>;
+ };
+
arm {
#address-cells = <1>;
#size-cells = <1>;
@@ -46,8 +55,6 @@
pmx_core: pinmux@14120 {
compatible = "pinctrl-single";
reg = <0x14120 0x50>;
- #address-cells = <1>;
- #size-cells = <0>;
#pinctrl-cells = <2>;
pinctrl-single,bit-per-mux;
pinctrl-single,register-width = <32>;
diff --git a/arch/arm/boot/dts/dm8148-evm.dts b/arch/arm/boot/dts/dm8148-evm.dts
index d6657b3bae84..85d7b5148b0a 100644
--- a/arch/arm/boot/dts/dm8148-evm.dts
+++ b/arch/arm/boot/dts/dm8148-evm.dts
@@ -10,7 +10,7 @@
/ {
model = "DM8148 EVM";
- compatible = "ti,dm8148-evm", "ti,dm8148";
+ compatible = "ti,dm8148-evm", "ti,dm8148", "ti,dm814";
memory@80000000 {
device_type = "memory";
diff --git a/arch/arm/boot/dts/dm8148-t410.dts b/arch/arm/boot/dts/dm8148-t410.dts
index 63883b3479f9..6418f9cdbe83 100644
--- a/arch/arm/boot/dts/dm8148-t410.dts
+++ b/arch/arm/boot/dts/dm8148-t410.dts
@@ -9,7 +9,7 @@
/ {
model = "HP t410 Smart Zero Client";
- compatible = "hp,t410", "ti,dm8148";
+ compatible = "hp,t410", "ti,dm8148", "ti,dm814";
memory@80000000 {
device_type = "memory";
diff --git a/arch/arm/boot/dts/dm8168-evm.dts b/arch/arm/boot/dts/dm8168-evm.dts
index c72a2132aa82..1d030d567307 100644
--- a/arch/arm/boot/dts/dm8168-evm.dts
+++ b/arch/arm/boot/dts/dm8168-evm.dts
@@ -10,7 +10,7 @@
/ {
model = "DM8168 EVM";
- compatible = "ti,dm8168-evm", "ti,dm8168";
+ compatible = "ti,dm8168-evm", "ti,dm8168", "ti,dm816";
memory@80000000 {
device_type = "memory";
diff --git a/arch/arm/boot/dts/dra62x-j5eco-evm.dts b/arch/arm/boot/dts/dra62x-j5eco-evm.dts
index fee0547f7302..31b824ad5d29 100644
--- a/arch/arm/boot/dts/dra62x-j5eco-evm.dts
+++ b/arch/arm/boot/dts/dra62x-j5eco-evm.dts
@@ -10,7 +10,7 @@
/ {
model = "DRA62x J5 Eco EVM";
- compatible = "ti,dra62x-j5eco-evm", "ti,dra62x", "ti,dm8148";
+ compatible = "ti,dra62x-j5eco-evm", "ti,dra62x", "ti,dm8148", "ti,dm814";
memory@80000000 {
device_type = "memory";
diff --git a/arch/arm/boot/dts/imx35.dtsi b/arch/arm/boot/dts/imx35.dtsi
index bf343195697e..54111ed218b1 100644
--- a/arch/arm/boot/dts/imx35.dtsi
+++ b/arch/arm/boot/dts/imx35.dtsi
@@ -303,7 +303,7 @@
};
can1: can@53fe4000 {
- compatible = "fsl,imx35-flexcan";
+ compatible = "fsl,imx35-flexcan", "fsl,imx25-flexcan";
reg = <0x53fe4000 0x1000>;
clocks = <&clks 33>, <&clks 33>;
clock-names = "ipg", "per";
@@ -312,7 +312,7 @@
};
can2: can@53fe8000 {
- compatible = "fsl,imx35-flexcan";
+ compatible = "fsl,imx35-flexcan", "fsl,imx25-flexcan";
reg = <0x53fe8000 0x1000>;
clocks = <&clks 34>, <&clks 34>;
clock-names = "ipg", "per";
diff --git a/arch/arm/boot/dts/imx51-zii-rdu1.dts b/arch/arm/boot/dts/imx51-zii-rdu1.dts
index 0c99ac04ad08..6464f2560e06 100644
--- a/arch/arm/boot/dts/imx51-zii-rdu1.dts
+++ b/arch/arm/boot/dts/imx51-zii-rdu1.dts
@@ -523,7 +523,7 @@
};
touchscreen@20 {
- compatible = "syna,rmi4_i2c";
+ compatible = "syna,rmi4-i2c";
reg = <0x20>;
pinctrl-names = "default";
pinctrl-0 = <&pinctrl_ts>;
@@ -541,8 +541,8 @@
rmi4-f11@11 {
reg = <0x11>;
- touch-inverted-y;
- touch-swapped-x-y;
+ touchscreen-inverted-y;
+ touchscreen-swapped-x-y;
syna,sensor-type = <1>;
};
};
diff --git a/arch/arm/boot/dts/imx53.dtsi b/arch/arm/boot/dts/imx53.dtsi
index 7d647d043f52..3d65c0192f69 100644
--- a/arch/arm/boot/dts/imx53.dtsi
+++ b/arch/arm/boot/dts/imx53.dtsi
@@ -551,7 +551,7 @@
};
can1: can@53fc8000 {
- compatible = "fsl,imx53-flexcan";
+ compatible = "fsl,imx53-flexcan", "fsl,imx25-flexcan";
reg = <0x53fc8000 0x4000>;
interrupts = <82>;
clocks = <&clks IMX5_CLK_CAN1_IPG_GATE>,
@@ -561,7 +561,7 @@
};
can2: can@53fcc000 {
- compatible = "fsl,imx53-flexcan";
+ compatible = "fsl,imx53-flexcan", "fsl,imx25-flexcan";
reg = <0x53fcc000 0x4000>;
interrupts = <83>;
clocks = <&clks IMX5_CLK_CAN2_IPG_GATE>,
diff --git a/arch/arm/boot/dts/imx7s.dtsi b/arch/arm/boot/dts/imx7s.dtsi
index 4d42335c0dee..ce85b3ca1a55 100644
--- a/arch/arm/boot/dts/imx7s.dtsi
+++ b/arch/arm/boot/dts/imx7s.dtsi
@@ -868,6 +868,7 @@
crypto: caam@30900000 {
compatible = "fsl,sec-v4.0";
+ fsl,sec-era = <8>;
#address-cells = <1>;
#size-cells = <1>;
reg = <0x30900000 0x40000>;
diff --git a/arch/arm/boot/dts/logicpd-som-lv.dtsi b/arch/arm/boot/dts/logicpd-som-lv.dtsi
index b47cac23a04b..6fa7bba3e801 100644
--- a/arch/arm/boot/dts/logicpd-som-lv.dtsi
+++ b/arch/arm/boot/dts/logicpd-som-lv.dtsi
@@ -26,7 +26,7 @@
gpio = <&gpio1 3 0>; /* gpio_3 */
startup-delay-us = <70000>;
enable-active-high;
- vin-supply = <&vmmc2>;
+ vin-supply = <&vaux3>;
};
/* HS USB Host PHY on PORT 1 */
@@ -82,6 +82,7 @@
twl_audio: audio {
compatible = "ti,twl4030-audio";
codec {
+ ti,hs_extmute_gpio = <&gpio2 25 GPIO_ACTIVE_HIGH>;
};
};
};
@@ -199,6 +200,7 @@
pinctrl-single,pins = <
OMAP3_CORE1_IOPAD(0x21ba, PIN_INPUT | MUX_MODE0) /* i2c1_scl.i2c1_scl */
OMAP3_CORE1_IOPAD(0x21bc, PIN_INPUT | MUX_MODE0) /* i2c1_sda.i2c1_sda */
+ OMAP3_CORE1_IOPAD(0x20ba, PIN_OUTPUT | MUX_MODE4) /* gpmc_ncs6.gpio_57 */
>;
};
};
@@ -213,7 +215,7 @@
};
wl127x_gpio: pinmux_wl127x_gpio_pin {
pinctrl-single,pins = <
- OMAP3_WKUP_IOPAD(0x2a0c, PIN_INPUT | MUX_MODE4) /* sys_boot0.gpio_2 */
+ OMAP3_WKUP_IOPAD(0x2a0a, PIN_INPUT | MUX_MODE4) /* sys_boot0.gpio_2 */
OMAP3_WKUP_IOPAD(0x2a0c, PIN_OUTPUT | MUX_MODE4) /* sys_boot1.gpio_3 */
>;
};
@@ -260,6 +262,11 @@
#include "twl4030.dtsi"
#include "twl4030_omap3.dtsi"
+&vaux3 {
+ regulator-min-microvolt = <2800000>;
+ regulator-max-microvolt = <2800000>;
+};
+
&twl {
twl_power: power {
compatible = "ti,twl4030-power-idle-osc-off", "ti,twl4030-power-idle";
diff --git a/arch/arm/boot/dts/r8a7790-lager.dts b/arch/arm/boot/dts/r8a7790-lager.dts
index 063fdb65dc60..f07f9018c3e7 100644
--- a/arch/arm/boot/dts/r8a7790-lager.dts
+++ b/arch/arm/boot/dts/r8a7790-lager.dts
@@ -379,7 +379,7 @@
port@0 {
reg = <0>;
adv7511_in: endpoint {
- remote-endpoint = <&du_out_lvds0>;
+ remote-endpoint = <&lvds0_out>;
};
};
@@ -467,10 +467,8 @@
status = "okay";
clocks = <&cpg CPG_MOD 724>, <&cpg CPG_MOD 723>, <&cpg CPG_MOD 722>,
- <&cpg CPG_MOD 726>, <&cpg CPG_MOD 725>,
<&x13_clk>, <&x2_clk>;
- clock-names = "du.0", "du.1", "du.2", "lvds.0", "lvds.1",
- "dclkin.0", "dclkin.1";
+ clock-names = "du.0", "du.1", "du.2", "dclkin.0", "dclkin.1";
ports {
port@0 {
@@ -478,12 +476,26 @@
remote-endpoint = <&adv7123_in>;
};
};
+ };
+};
+
+&lvds0 {
+ status = "okay";
+
+ ports {
port@1 {
endpoint {
remote-endpoint = <&adv7511_in>;
};
};
- port@2 {
+ };
+};
+
+&lvds1 {
+ status = "okay";
+
+ ports {
+ port@1 {
lvds_connector: endpoint {
};
};
diff --git a/arch/arm/boot/dts/r8a7790.dtsi b/arch/arm/boot/dts/r8a7790.dtsi
index e4367cecad18..05a0fc23ac88 100644
--- a/arch/arm/boot/dts/r8a7790.dtsi
+++ b/arch/arm/boot/dts/r8a7790.dtsi
@@ -1627,18 +1627,13 @@
du: display@feb00000 {
compatible = "renesas,du-r8a7790";
- reg = <0 0xfeb00000 0 0x70000>,
- <0 0xfeb90000 0 0x1c>,
- <0 0xfeb94000 0 0x1c>;
- reg-names = "du", "lvds.0", "lvds.1";
+ reg = <0 0xfeb00000 0 0x70000>;
interrupts = <GIC_SPI 256 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 268 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 269 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&cpg CPG_MOD 724>, <&cpg CPG_MOD 723>,
- <&cpg CPG_MOD 722>, <&cpg CPG_MOD 726>,
- <&cpg CPG_MOD 725>;
- clock-names = "du.0", "du.1", "du.2", "lvds.0",
- "lvds.1";
+ <&cpg CPG_MOD 722>;
+ clock-names = "du.0", "du.1", "du.2";
status = "disabled";
ports {
@@ -1653,11 +1648,65 @@
port@1 {
reg = <1>;
du_out_lvds0: endpoint {
+ remote-endpoint = <&lvds0_in>;
};
};
port@2 {
reg = <2>;
du_out_lvds1: endpoint {
+ remote-endpoint = <&lvds1_in>;
+ };
+ };
+ };
+ };
+
+ lvds0: lvds@feb90000 {
+ compatible = "renesas,r8a7790-lvds";
+ reg = <0 0xfeb90000 0 0x1c>;
+ clocks = <&cpg CPG_MOD 726>;
+ power-domains = <&sysc R8A7790_PD_ALWAYS_ON>;
+ resets = <&cpg 726>;
+ status = "disabled";
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+ lvds0_in: endpoint {
+ remote-endpoint = <&du_out_lvds0>;
+ };
+ };
+ port@1 {
+ reg = <1>;
+ lvds0_out: endpoint {
+ };
+ };
+ };
+ };
+
+ lvds1: lvds@feb94000 {
+ compatible = "renesas,r8a7790-lvds";
+ reg = <0 0xfeb94000 0 0x1c>;
+ clocks = <&cpg CPG_MOD 725>;
+ power-domains = <&sysc R8A7790_PD_ALWAYS_ON>;
+ resets = <&cpg 725>;
+ status = "disabled";
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+ lvds1_in: endpoint {
+ remote-endpoint = <&du_out_lvds1>;
+ };
+ };
+ port@1 {
+ reg = <1>;
+ lvds1_out: endpoint {
};
};
};
diff --git a/arch/arm/boot/dts/r8a7791-koelsch.dts b/arch/arm/boot/dts/r8a7791-koelsch.dts
index f40321a1c917..9d7213a0b8b8 100644
--- a/arch/arm/boot/dts/r8a7791-koelsch.dts
+++ b/arch/arm/boot/dts/r8a7791-koelsch.dts
@@ -468,10 +468,9 @@
pinctrl-names = "default";
status = "okay";
- clocks = <&cpg CPG_MOD 724>, <&cpg CPG_MOD 723>, <&cpg CPG_MOD 726>,
+ clocks = <&cpg CPG_MOD 724>, <&cpg CPG_MOD 723>,
<&x13_clk>, <&x2_clk>;
- clock-names = "du.0", "du.1", "lvds.0",
- "dclkin.0", "dclkin.1";
+ clock-names = "du.0", "du.1", "dclkin.0", "dclkin.1";
ports {
port@0 {
@@ -479,6 +478,13 @@
remote-endpoint = <&adv7511_in>;
};
};
+ };
+};
+
+&lvds0 {
+ status = "okay";
+
+ ports {
port@1 {
lvds_connector: endpoint {
};
diff --git a/arch/arm/boot/dts/r8a7791-porter.dts b/arch/arm/boot/dts/r8a7791-porter.dts
index c14e6fe9e4f6..ae9ed9ff53ef 100644
--- a/arch/arm/boot/dts/r8a7791-porter.dts
+++ b/arch/arm/boot/dts/r8a7791-porter.dts
@@ -441,10 +441,9 @@
pinctrl-names = "default";
status = "okay";
- clocks = <&cpg CPG_MOD 724>, <&cpg CPG_MOD 723>, <&cpg CPG_MOD 726>,
+ clocks = <&cpg CPG_MOD 724>, <&cpg CPG_MOD 723>,
<&x3_clk>, <&x16_clk>;
- clock-names = "du.0", "du.1", "lvds.0",
- "dclkin.0", "dclkin.1";
+ clock-names = "du.0", "du.1", "dclkin.0", "dclkin.1";
ports {
port@0 {
@@ -455,6 +454,17 @@
};
};
+&lvds0 {
+ status = "okay";
+
+ ports {
+ port@1 {
+ lvds_connector: endpoint {
+ };
+ };
+ };
+};
+
&rcar_sound {
pinctrl-0 = <&ssi_pins &audio_clk_pins>;
pinctrl-names = "default";
diff --git a/arch/arm/boot/dts/r8a7791.dtsi b/arch/arm/boot/dts/r8a7791.dtsi
index f11dab71b03a..506b20885413 100644
--- a/arch/arm/boot/dts/r8a7791.dtsi
+++ b/arch/arm/boot/dts/r8a7791.dtsi
@@ -1633,15 +1633,12 @@
du: display@feb00000 {
compatible = "renesas,du-r8a7791";
- reg = <0 0xfeb00000 0 0x40000>,
- <0 0xfeb90000 0 0x1c>;
- reg-names = "du", "lvds.0";
+ reg = <0 0xfeb00000 0 0x40000>;
interrupts = <GIC_SPI 256 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 268 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&cpg CPG_MOD 724>,
- <&cpg CPG_MOD 723>,
- <&cpg CPG_MOD 726>;
- clock-names = "du.0", "du.1", "lvds.0";
+ <&cpg CPG_MOD 723>;
+ clock-names = "du.0", "du.1";
status = "disabled";
ports {
@@ -1656,6 +1653,33 @@
port@1 {
reg = <1>;
du_out_lvds0: endpoint {
+ remote-endpoint = <&lvds0_in>;
+ };
+ };
+ };
+ };
+
+ lvds0: lvds@feb90000 {
+ compatible = "renesas,r8a7791-lvds";
+ reg = <0 0xfeb90000 0 0x1c>;
+ clocks = <&cpg CPG_MOD 726>;
+ power-domains = <&sysc R8A7791_PD_ALWAYS_ON>;
+ resets = <&cpg 726>;
+ status = "disabled";
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+ lvds0_in: endpoint {
+ remote-endpoint = <&du_out_lvds0>;
+ };
+ };
+ port@1 {
+ reg = <1>;
+ lvds0_out: endpoint {
};
};
};
diff --git a/arch/arm/boot/dts/r8a7793-gose.dts b/arch/arm/boot/dts/r8a7793-gose.dts
index 9ed6961f2d9a..96e117d8b2cc 100644
--- a/arch/arm/boot/dts/r8a7793-gose.dts
+++ b/arch/arm/boot/dts/r8a7793-gose.dts
@@ -447,10 +447,9 @@
pinctrl-names = "default";
status = "okay";
- clocks = <&cpg CPG_MOD 724>, <&cpg CPG_MOD 723>, <&cpg CPG_MOD 726>,
+ clocks = <&cpg CPG_MOD 724>, <&cpg CPG_MOD 723>,
<&x13_clk>, <&x2_clk>;
- clock-names = "du.0", "du.1", "lvds.0",
- "dclkin.0", "dclkin.1";
+ clock-names = "du.0", "du.1", "dclkin.0", "dclkin.1";
ports {
port@0 {
@@ -458,6 +457,11 @@
remote-endpoint = <&adv7511_in>;
};
};
+ };
+};
+
+&lvds0 {
+ ports {
port@1 {
lvds_connector: endpoint {
};
diff --git a/arch/arm/boot/dts/r8a7793.dtsi b/arch/arm/boot/dts/r8a7793.dtsi
index f9c5a557107d..4f526030dc7c 100644
--- a/arch/arm/boot/dts/r8a7793.dtsi
+++ b/arch/arm/boot/dts/r8a7793.dtsi
@@ -1292,15 +1292,12 @@
du: display@feb00000 {
compatible = "renesas,du-r8a7793";
- reg = <0 0xfeb00000 0 0x40000>,
- <0 0xfeb90000 0 0x1c>;
- reg-names = "du", "lvds.0";
+ reg = <0 0xfeb00000 0 0x40000>;
interrupts = <GIC_SPI 256 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 268 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&cpg CPG_MOD 724>,
- <&cpg CPG_MOD 723>,
- <&cpg CPG_MOD 726>;
- clock-names = "du.0", "du.1", "lvds.0";
+ <&cpg CPG_MOD 723>;
+ clock-names = "du.0", "du.1";
status = "disabled";
ports {
@@ -1315,6 +1312,34 @@
port@1 {
reg = <1>;
du_out_lvds0: endpoint {
+ remote-endpoint = <&lvds0_in>;
+ };
+ };
+ };
+ };
+
+ lvds0: lvds@feb90000 {
+ compatible = "renesas,r8a7793-lvds";
+ reg = <0 0xfeb90000 0 0x1c>;
+ clocks = <&cpg CPG_MOD 726>;
+ power-domains = <&sysc R8A7793_PD_ALWAYS_ON>;
+ resets = <&cpg 726>;
+
+ status = "disabled";
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+ lvds0_in: endpoint {
+ remote-endpoint = <&du_out_lvds0>;
+ };
+ };
+ port@1 {
+ reg = <1>;
+ lvds0_out: endpoint {
};
};
};
diff --git a/arch/arm/boot/dts/stm32mp157-pinctrl.dtsi b/arch/arm/boot/dts/stm32mp157-pinctrl.dtsi
index c0743305f31b..eb96ac3e6c1d 100644
--- a/arch/arm/boot/dts/stm32mp157-pinctrl.dtsi
+++ b/arch/arm/boot/dts/stm32mp157-pinctrl.dtsi
@@ -12,6 +12,8 @@
#size-cells = <1>;
compatible = "st,stm32mp157-pinctrl";
ranges = <0 0x50002000 0xa400>;
+ interrupt-parent = <&exti>;
+ st,syscfg = <&exti 0x60 0xff>;
pins-are-numbered;
gpioa: gpio@50002000 {
@@ -166,6 +168,8 @@
compatible = "st,stm32mp157-z-pinctrl";
ranges = <0 0x54004000 0x400>;
pins-are-numbered;
+ interrupt-parent = <&exti>;
+ st,syscfg = <&exti 0x60 0xff>;
status = "disabled";
gpioz: gpio@54004000 {
diff --git a/arch/arm/boot/dts/stm32mp157c.dtsi b/arch/arm/boot/dts/stm32mp157c.dtsi
index 9e17e42b02b2..4fa0df853c8a 100644
--- a/arch/arm/boot/dts/stm32mp157c.dtsi
+++ b/arch/arm/boot/dts/stm32mp157c.dtsi
@@ -183,6 +183,13 @@
status = "disabled";
};
+ exti: interrupt-controller@5000d000 {
+ compatible = "st,stm32mp1-exti", "syscon";
+ interrupt-controller;
+ #interrupt-cells = <2>;
+ reg = <0x5000d000 0x400>;
+ };
+
usart1: serial@5c000000 {
compatible = "st,stm32h7-uart";
reg = <0x5c000000 0x400>;
diff --git a/arch/arm/boot/dts/sun4i-a10.dtsi b/arch/arm/boot/dts/sun4i-a10.dtsi
index 77e8436beed4..3a1c6b45c9a1 100644
--- a/arch/arm/boot/dts/sun4i-a10.dtsi
+++ b/arch/arm/boot/dts/sun4i-a10.dtsi
@@ -76,7 +76,7 @@
allwinner,pipeline = "de_fe0-de_be0-lcd0-hdmi";
clocks = <&ccu CLK_AHB_LCD0>, <&ccu CLK_AHB_HDMI0>,
<&ccu CLK_AHB_DE_BE0>, <&ccu CLK_AHB_DE_FE0>,
- <&ccu CLK_DE_BE0>, <&ccu CLK_AHB_DE_FE0>,
+ <&ccu CLK_DE_BE0>, <&ccu CLK_DE_FE0>,
<&ccu CLK_TCON0_CH1>, <&ccu CLK_HDMI>,
<&ccu CLK_DRAM_DE_FE0>, <&ccu CLK_DRAM_DE_BE0>;
status = "disabled";
@@ -88,7 +88,7 @@
allwinner,pipeline = "de_fe0-de_be0-lcd0";
clocks = <&ccu CLK_AHB_LCD0>, <&ccu CLK_AHB_DE_BE0>,
<&ccu CLK_AHB_DE_FE0>, <&ccu CLK_DE_BE0>,
- <&ccu CLK_AHB_DE_FE0>, <&ccu CLK_TCON0_CH0>,
+ <&ccu CLK_DE_FE0>, <&ccu CLK_TCON0_CH0>,
<&ccu CLK_DRAM_DE_FE0>, <&ccu CLK_DRAM_DE_BE0>;
status = "disabled";
};
@@ -99,7 +99,7 @@
allwinner,pipeline = "de_fe0-de_be0-lcd0-tve0";
clocks = <&ccu CLK_AHB_TVE0>, <&ccu CLK_AHB_LCD0>,
<&ccu CLK_AHB_DE_BE0>, <&ccu CLK_AHB_DE_FE0>,
- <&ccu CLK_DE_BE0>, <&ccu CLK_AHB_DE_FE0>,
+ <&ccu CLK_DE_BE0>, <&ccu CLK_DE_FE0>,
<&ccu CLK_TCON0_CH1>, <&ccu CLK_DRAM_TVE0>,
<&ccu CLK_DRAM_DE_FE0>, <&ccu CLK_DRAM_DE_BE0>;
status = "disabled";
diff --git a/arch/arm/boot/dts/sun8i-h3-orangepi-one.dts b/arch/arm/boot/dts/sun8i-h3-orangepi-one.dts
index 3328fe583c9b..232f124ce62c 100644
--- a/arch/arm/boot/dts/sun8i-h3-orangepi-one.dts
+++ b/arch/arm/boot/dts/sun8i-h3-orangepi-one.dts
@@ -117,6 +117,7 @@
phy-handle = <&int_mii_phy>;
phy-mode = "mii";
allwinner,leds-active-low;
+ status = "okay";
};
&hdmi {
diff --git a/arch/arm/boot/dts/sun8i-v3s-licheepi-zero-dock.dts b/arch/arm/boot/dts/sun8i-v3s-licheepi-zero-dock.dts
index d1311098ea45..ad173605b1b8 100644
--- a/arch/arm/boot/dts/sun8i-v3s-licheepi-zero-dock.dts
+++ b/arch/arm/boot/dts/sun8i-v3s-licheepi-zero-dock.dts
@@ -51,7 +51,7 @@
leds {
/* The LEDs use PG0~2 pins, which conflict with MMC1 */
- status = "disbaled";
+ status = "disabled";
};
};
diff --git a/arch/arm/boot/dts/tegra20.dtsi b/arch/arm/boot/dts/tegra20.dtsi
index 0a7136462a1a..983dd5c14794 100644
--- a/arch/arm/boot/dts/tegra20.dtsi
+++ b/arch/arm/boot/dts/tegra20.dtsi
@@ -741,7 +741,7 @@
phy_type = "ulpi";
clocks = <&tegra_car TEGRA20_CLK_USB2>,
<&tegra_car TEGRA20_CLK_PLL_U>,
- <&tegra_car TEGRA20_CLK_PLL_P_OUT4>;
+ <&tegra_car TEGRA20_CLK_CDEV2>;
clock-names = "reg", "pll_u", "ulpi-link";
resets = <&tegra_car 58>, <&tegra_car 22>;
reset-names = "usb", "utmi-pads";
diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h
index bc8d4bbd82e2..9342904cccca 100644
--- a/arch/arm/include/asm/assembler.h
+++ b/arch/arm/include/asm/assembler.h
@@ -536,4 +536,14 @@ THUMB( orr \reg , \reg , #PSR_T_BIT )
#endif
.endm
+#ifdef CONFIG_KPROBES
+#define _ASM_NOKPROBE(entry) \
+ .pushsection "_kprobe_blacklist", "aw" ; \
+ .balign 4 ; \
+ .long entry; \
+ .popsection
+#else
+#define _ASM_NOKPROBE(entry)
+#endif
+
#endif /* __ASM_ASSEMBLER_H__ */
diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
index 707a1f06dc5d..f675162663f0 100644
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -309,6 +309,22 @@ static inline unsigned int kvm_get_vmid_bits(void)
return 8;
}
+/*
+ * We are not in the kvm->srcu critical section most of the time, so we take
+ * the SRCU read lock here. Since we copy the data from the user page, we
+ * can immediately drop the lock again.
+ */
+static inline int kvm_read_guest_lock(struct kvm *kvm,
+ gpa_t gpa, void *data, unsigned long len)
+{
+ int srcu_idx = srcu_read_lock(&kvm->srcu);
+ int ret = kvm_read_guest(kvm, gpa, data, len);
+
+ srcu_read_unlock(&kvm->srcu, srcu_idx);
+
+ return ret;
+}
+
static inline void *kvm_get_hyp_vector(void)
{
return kvm_ksym_ref(__kvm_hyp_vector);
diff --git a/arch/arm/include/asm/pci.h b/arch/arm/include/asm/pci.h
index 1f0de808d111..0abd389cf0ec 100644
--- a/arch/arm/include/asm/pci.h
+++ b/arch/arm/include/asm/pci.h
@@ -19,13 +19,6 @@ static inline int pci_proc_domain(struct pci_bus *bus)
}
#endif /* CONFIG_PCI_DOMAINS */
-/*
- * The PCI address space does equal the physical memory address space.
- * The networking and block device layers use this boolean for bounce
- * buffer decisions.
- */
-#define PCI_DMA_BUS_IS_PHYS (1)
-
#define HAVE_PCI_MMAP
#define ARCH_GENERIC_PCI_MMAP_RESOURCE
diff --git a/arch/arm/include/uapi/asm/siginfo.h b/arch/arm/include/uapi/asm/siginfo.h
deleted file mode 100644
index d0513880be21..000000000000
--- a/arch/arm/include/uapi/asm/siginfo.h
+++ /dev/null
@@ -1,13 +0,0 @@
-#ifndef __ASM_SIGINFO_H
-#define __ASM_SIGINFO_H
-
-#include <asm-generic/siginfo.h>
-
-/*
- * SIGFPE si_codes
- */
-#ifdef __KERNEL__
-#define FPE_FIXME 0 /* Broken dup of SI_USER */
-#endif /* __KERNEL__ */
-
-#endif
diff --git a/arch/arm/kernel/dma.c b/arch/arm/kernel/dma.c
index e651c4d0a0d9..6739d37c2bc5 100644
--- a/arch/arm/kernel/dma.c
+++ b/arch/arm/kernel/dma.c
@@ -276,21 +276,9 @@ static int proc_dma_show(struct seq_file *m, void *v)
return 0;
}
-static int proc_dma_open(struct inode *inode, struct file *file)
-{
- return single_open(file, proc_dma_show, NULL);
-}
-
-static const struct file_operations proc_dma_operations = {
- .open = proc_dma_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
static int __init proc_dma_init(void)
{
- proc_create("dma", 0, NULL, &proc_dma_operations);
+ proc_create_single("dma", 0, NULL, proc_dma_show);
return 0;
}
diff --git a/arch/arm/kernel/machine_kexec.c b/arch/arm/kernel/machine_kexec.c
index 6b38d7a634c1..dd2eb5f76b9f 100644
--- a/arch/arm/kernel/machine_kexec.c
+++ b/arch/arm/kernel/machine_kexec.c
@@ -83,7 +83,7 @@ void machine_crash_nonpanic_core(void *unused)
{
struct pt_regs regs;
- crash_setup_regs(&regs, NULL);
+ crash_setup_regs(&regs, get_irq_regs());
printk(KERN_DEBUG "CPU %u will stop doing anything useful since another CPU has crashed\n",
smp_processor_id());
crash_save_cpu(&regs, smp_processor_id());
@@ -95,6 +95,27 @@ void machine_crash_nonpanic_core(void *unused)
cpu_relax();
}
+void crash_smp_send_stop(void)
+{
+ static int cpus_stopped;
+ unsigned long msecs;
+
+ if (cpus_stopped)
+ return;
+
+ atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1);
+ smp_call_function(machine_crash_nonpanic_core, NULL, false);
+ msecs = 1000; /* Wait at most a second for the other cpus to stop */
+ while ((atomic_read(&waiting_for_crash_ipi) > 0) && msecs) {
+ mdelay(1);
+ msecs--;
+ }
+ if (atomic_read(&waiting_for_crash_ipi) > 0)
+ pr_warn("Non-crashing CPUs did not react to IPI\n");
+
+ cpus_stopped = 1;
+}
+
static void machine_kexec_mask_interrupts(void)
{
unsigned int i;
@@ -120,19 +141,8 @@ static void machine_kexec_mask_interrupts(void)
void machine_crash_shutdown(struct pt_regs *regs)
{
- unsigned long msecs;
-
local_irq_disable();
-
- atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1);
- smp_call_function(machine_crash_nonpanic_core, NULL, false);
- msecs = 1000; /* Wait at most a second for the other cpus to stop */
- while ((atomic_read(&waiting_for_crash_ipi) > 0) && msecs) {
- mdelay(1);
- msecs--;
- }
- if (atomic_read(&waiting_for_crash_ipi) > 0)
- pr_warn("Non-crashing CPUs did not react to IPI\n");
+ crash_smp_send_stop();
crash_save_cpu(regs, smp_processor_id());
machine_kexec_mask_interrupts();
diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c
index 7724b0f661b3..36718a424358 100644
--- a/arch/arm/kernel/ptrace.c
+++ b/arch/arm/kernel/ptrace.c
@@ -205,6 +205,7 @@ void ptrace_break(struct task_struct *tsk, struct pt_regs *regs)
{
siginfo_t info;
+ clear_siginfo(&info);
info.si_signo = SIGTRAP;
info.si_errno = 0;
info.si_code = TRAP_BRKPT;
diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index fc40a2b40595..35ca494c028c 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -754,7 +754,7 @@ int __init arm_add_memory(u64 start, u64 size)
else
size -= aligned_start - start;
-#ifndef CONFIG_ARCH_PHYS_ADDR_T_64BIT
+#ifndef CONFIG_PHYS_ADDR_T_64BIT
if (aligned_start > ULONG_MAX) {
pr_crit("Ignoring memory at 0x%08llx outside 32-bit physical address space\n",
(long long)start);
diff --git a/arch/arm/kernel/swp_emulate.c b/arch/arm/kernel/swp_emulate.c
index 3bda08bee674..80517f293eb9 100644
--- a/arch/arm/kernel/swp_emulate.c
+++ b/arch/arm/kernel/swp_emulate.c
@@ -91,18 +91,6 @@ static int proc_status_show(struct seq_file *m, void *v)
seq_printf(m, "Last process:\t\t%d\n", previous_pid);
return 0;
}
-
-static int proc_status_open(struct inode *inode, struct file *file)
-{
- return single_open(file, proc_status_show, PDE_DATA(inode));
-}
-
-static const struct file_operations proc_status_fops = {
- .open = proc_status_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
#endif
/*
@@ -112,6 +100,7 @@ static void set_segfault(struct pt_regs *regs, unsigned long addr)
{
siginfo_t info;
+ clear_siginfo(&info);
down_read(&current->mm->mmap_sem);
if (find_vma(current->mm, addr) == NULL)
info.si_code = SEGV_MAPERR;
@@ -260,7 +249,8 @@ static int __init swp_emulation_init(void)
return 0;
#ifdef CONFIG_PROC_FS
- if (!proc_create("cpu/swp_emulation", S_IRUGO, NULL, &proc_status_fops))
+ if (!proc_create_single("cpu/swp_emulation", S_IRUGO, NULL,
+ proc_status_show))
return -ENOMEM;
#endif /* CONFIG_PROC_FS */
diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
index 5e3633c24e63..badf02ca3693 100644
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -19,6 +19,7 @@
#include <linux/uaccess.h>
#include <linux/hardirq.h>
#include <linux/kdebug.h>
+#include <linux/kprobes.h>
#include <linux/module.h>
#include <linux/kexec.h>
#include <linux/bug.h>
@@ -417,7 +418,8 @@ void unregister_undef_hook(struct undef_hook *hook)
raw_spin_unlock_irqrestore(&undef_lock, flags);
}
-static int call_undef_hook(struct pt_regs *regs, unsigned int instr)
+static nokprobe_inline
+int call_undef_hook(struct pt_regs *regs, unsigned int instr)
{
struct undef_hook *hook;
unsigned long flags;
@@ -439,6 +441,7 @@ asmlinkage void do_undefinstr(struct pt_regs *regs)
siginfo_t info;
void __user *pc;
+ clear_siginfo(&info);
pc = (void __user *)instruction_pointer(regs);
if (processor_mode(regs) == SVC_MODE) {
@@ -490,6 +493,7 @@ die_sig:
arm_notify_die("Oops - undefined instruction", regs, &info, 0, 6);
}
+NOKPROBE_SYMBOL(do_undefinstr)
/*
* Handle FIQ similarly to NMI on x86 systems.
@@ -537,6 +541,7 @@ static int bad_syscall(int n, struct pt_regs *regs)
{
siginfo_t info;
+ clear_siginfo(&info);
if ((current->personality & PER_MASK) != PER_LINUX) {
send_sig(SIGSEGV, current, 1);
return regs->ARM_r0;
@@ -604,6 +609,7 @@ asmlinkage int arm_syscall(int no, struct pt_regs *regs)
{
siginfo_t info;
+ clear_siginfo(&info);
if ((no >> 16) != (__ARM_NR_BASE>> 16))
return bad_syscall(no, regs);
@@ -740,6 +746,8 @@ baddataabort(int code, unsigned long instr, struct pt_regs *regs)
unsigned long addr = instruction_pointer(regs);
siginfo_t info;
+ clear_siginfo(&info);
+
#ifdef CONFIG_DEBUG_USER
if (user_debug & UDBG_BADABORT) {
pr_err("[%d] %s: bad data abort: code %d instr 0x%08lx\n",
diff --git a/arch/arm/lib/getuser.S b/arch/arm/lib/getuser.S
index df73914e81c8..746e7801dcdf 100644
--- a/arch/arm/lib/getuser.S
+++ b/arch/arm/lib/getuser.S
@@ -38,6 +38,7 @@ ENTRY(__get_user_1)
mov r0, #0
ret lr
ENDPROC(__get_user_1)
+_ASM_NOKPROBE(__get_user_1)
ENTRY(__get_user_2)
check_uaccess r0, 2, r1, r2, __get_user_bad
@@ -58,6 +59,7 @@ rb .req r0
mov r0, #0
ret lr
ENDPROC(__get_user_2)
+_ASM_NOKPROBE(__get_user_2)
ENTRY(__get_user_4)
check_uaccess r0, 4, r1, r2, __get_user_bad
@@ -65,6 +67,7 @@ ENTRY(__get_user_4)
mov r0, #0
ret lr
ENDPROC(__get_user_4)
+_ASM_NOKPROBE(__get_user_4)
ENTRY(__get_user_8)
check_uaccess r0, 8, r1, r2, __get_user_bad8
@@ -78,6 +81,7 @@ ENTRY(__get_user_8)
mov r0, #0
ret lr
ENDPROC(__get_user_8)
+_ASM_NOKPROBE(__get_user_8)
#ifdef __ARMEB__
ENTRY(__get_user_32t_8)
@@ -91,6 +95,7 @@ ENTRY(__get_user_32t_8)
mov r0, #0
ret lr
ENDPROC(__get_user_32t_8)
+_ASM_NOKPROBE(__get_user_32t_8)
ENTRY(__get_user_64t_1)
check_uaccess r0, 1, r1, r2, __get_user_bad8
@@ -98,6 +103,7 @@ ENTRY(__get_user_64t_1)
mov r0, #0
ret lr
ENDPROC(__get_user_64t_1)
+_ASM_NOKPROBE(__get_user_64t_1)
ENTRY(__get_user_64t_2)
check_uaccess r0, 2, r1, r2, __get_user_bad8
@@ -114,6 +120,7 @@ rb .req r0
mov r0, #0
ret lr
ENDPROC(__get_user_64t_2)
+_ASM_NOKPROBE(__get_user_64t_2)
ENTRY(__get_user_64t_4)
check_uaccess r0, 4, r1, r2, __get_user_bad8
@@ -121,6 +128,7 @@ ENTRY(__get_user_64t_4)
mov r0, #0
ret lr
ENDPROC(__get_user_64t_4)
+_ASM_NOKPROBE(__get_user_64t_4)
#endif
__get_user_bad8:
@@ -131,6 +139,8 @@ __get_user_bad:
ret lr
ENDPROC(__get_user_bad)
ENDPROC(__get_user_bad8)
+_ASM_NOKPROBE(__get_user_bad)
+_ASM_NOKPROBE(__get_user_bad8)
.pushsection __ex_table, "a"
.long 1b, __get_user_bad
diff --git a/arch/arm/mach-axxia/Kconfig b/arch/arm/mach-axxia/Kconfig
index bb2ce1c63fd9..d3eae6037913 100644
--- a/arch/arm/mach-axxia/Kconfig
+++ b/arch/arm/mach-axxia/Kconfig
@@ -2,7 +2,6 @@
config ARCH_AXXIA
bool "LSI Axxia platforms"
depends on ARCH_MULTI_V7 && ARM_LPAE
- select ARCH_DMA_ADDR_T_64BIT
select ARM_AMBA
select ARM_GIC
select ARM_TIMER_SP804
diff --git a/arch/arm/mach-bcm/Kconfig b/arch/arm/mach-bcm/Kconfig
index c2f3b0d216a4..c46a728df44e 100644
--- a/arch/arm/mach-bcm/Kconfig
+++ b/arch/arm/mach-bcm/Kconfig
@@ -211,7 +211,6 @@ config ARCH_BRCMSTB
select BRCMSTB_L2_IRQ
select BCM7120_L2_IRQ
select ARCH_HAS_HOLES_MEMORYMODEL
- select ARCH_DMA_ADDR_T_64BIT if ARM_LPAE
select ZONE_DMA if ARM_LPAE
select SOC_BRCMSTB
select SOC_BUS
diff --git a/arch/arm/mach-davinci/board-da830-evm.c b/arch/arm/mach-davinci/board-da830-evm.c
index 004f9c8de032..d1e8ce7b4bd2 100644
--- a/arch/arm/mach-davinci/board-da830-evm.c
+++ b/arch/arm/mach-davinci/board-da830-evm.c
@@ -205,12 +205,17 @@ static const short da830_evm_mmc_sd_pins[] = {
-1
};
+#define DA830_MMCSD_WP_PIN GPIO_TO_PIN(2, 1)
+#define DA830_MMCSD_CD_PIN GPIO_TO_PIN(2, 2)
+
static struct gpiod_lookup_table mmc_gpios_table = {
.dev_id = "da830-mmc.0",
.table = {
/* gpio chip 1 contains gpio range 32-63 */
- GPIO_LOOKUP("davinci_gpio.1", 2, "cd", GPIO_ACTIVE_LOW),
- GPIO_LOOKUP("davinci_gpio.1", 1, "wp", GPIO_ACTIVE_LOW),
+ GPIO_LOOKUP("davinci_gpio.0", DA830_MMCSD_CD_PIN, "cd",
+ GPIO_ACTIVE_LOW),
+ GPIO_LOOKUP("davinci_gpio.0", DA830_MMCSD_WP_PIN, "wp",
+ GPIO_ACTIVE_LOW),
},
};
diff --git a/arch/arm/mach-davinci/board-da850-evm.c b/arch/arm/mach-davinci/board-da850-evm.c
index 3063478bcc36..158ed9a1483f 100644
--- a/arch/arm/mach-davinci/board-da850-evm.c
+++ b/arch/arm/mach-davinci/board-da850-evm.c
@@ -763,12 +763,17 @@ static const short da850_evm_mcasp_pins[] __initconst = {
-1
};
+#define DA850_MMCSD_CD_PIN GPIO_TO_PIN(4, 0)
+#define DA850_MMCSD_WP_PIN GPIO_TO_PIN(4, 1)
+
static struct gpiod_lookup_table mmc_gpios_table = {
.dev_id = "da830-mmc.0",
.table = {
/* gpio chip 2 contains gpio range 64-95 */
- GPIO_LOOKUP("davinci_gpio.2", 0, "cd", GPIO_ACTIVE_LOW),
- GPIO_LOOKUP("davinci_gpio.2", 1, "wp", GPIO_ACTIVE_LOW),
+ GPIO_LOOKUP("davinci_gpio.0", DA850_MMCSD_CD_PIN, "cd",
+ GPIO_ACTIVE_LOW),
+ GPIO_LOOKUP("davinci_gpio.0", DA850_MMCSD_WP_PIN, "wp",
+ GPIO_ACTIVE_LOW),
},
};
diff --git a/arch/arm/mach-davinci/board-dm355-evm.c b/arch/arm/mach-davinci/board-dm355-evm.c
index cb30637d9eaf..23ab9e8bc04c 100644
--- a/arch/arm/mach-davinci/board-dm355-evm.c
+++ b/arch/arm/mach-davinci/board-dm355-evm.c
@@ -19,6 +19,7 @@
#include <linux/gpio.h>
#include <linux/gpio/machine.h>
#include <linux/clk.h>
+#include <linux/dm9000.h>
#include <linux/videodev2.h>
#include <media/i2c/tvp514x.h>
#include <linux/spi/spi.h>
@@ -109,12 +110,15 @@ static struct platform_device davinci_nand_device = {
},
};
+#define DM355_I2C_SDA_PIN GPIO_TO_PIN(0, 15)
+#define DM355_I2C_SCL_PIN GPIO_TO_PIN(0, 14)
+
static struct gpiod_lookup_table i2c_recovery_gpiod_table = {
- .dev_id = "i2c_davinci",
+ .dev_id = "i2c_davinci.1",
.table = {
- GPIO_LOOKUP("davinci_gpio", 15, "sda",
+ GPIO_LOOKUP("davinci_gpio.0", DM355_I2C_SDA_PIN, "sda",
GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN),
- GPIO_LOOKUP("davinci_gpio", 14, "scl",
+ GPIO_LOOKUP("davinci_gpio.0", DM355_I2C_SCL_PIN, "scl",
GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN),
},
};
@@ -179,11 +183,16 @@ static struct resource dm355evm_dm9000_rsrc[] = {
},
};
+static struct dm9000_plat_data dm335evm_dm9000_platdata;
+
static struct platform_device dm355evm_dm9000 = {
.name = "dm9000",
.id = -1,
.resource = dm355evm_dm9000_rsrc,
.num_resources = ARRAY_SIZE(dm355evm_dm9000_rsrc),
+ .dev = {
+ .platform_data = &dm335evm_dm9000_platdata,
+ },
};
static struct tvp514x_platform_data tvp5146_pdata = {
diff --git a/arch/arm/mach-davinci/board-dm644x-evm.c b/arch/arm/mach-davinci/board-dm644x-evm.c
index 95b55aae1366..509e64ab1994 100644
--- a/arch/arm/mach-davinci/board-dm644x-evm.c
+++ b/arch/arm/mach-davinci/board-dm644x-evm.c
@@ -17,6 +17,7 @@
#include <linux/i2c.h>
#include <linux/platform_data/pcf857x.h>
#include <linux/platform_data/at24.h>
+#include <linux/platform_data/gpio-davinci.h>
#include <linux/mtd/mtd.h>
#include <linux/mtd/rawnand.h>
#include <linux/mtd/partitions.h>
@@ -596,12 +597,15 @@ static struct i2c_board_info __initdata i2c_info[] = {
},
};
+#define DM644X_I2C_SDA_PIN GPIO_TO_PIN(2, 12)
+#define DM644X_I2C_SCL_PIN GPIO_TO_PIN(2, 11)
+
static struct gpiod_lookup_table i2c_recovery_gpiod_table = {
- .dev_id = "i2c_davinci",
+ .dev_id = "i2c_davinci.1",
.table = {
- GPIO_LOOKUP("davinci_gpio", 44, "sda",
+ GPIO_LOOKUP("davinci_gpio.0", DM644X_I2C_SDA_PIN, "sda",
GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN),
- GPIO_LOOKUP("davinci_gpio", 43, "scl",
+ GPIO_LOOKUP("davinci_gpio.0", DM644X_I2C_SCL_PIN, "scl",
GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN),
},
};
diff --git a/arch/arm/mach-davinci/board-dm646x-evm.c b/arch/arm/mach-davinci/board-dm646x-evm.c
index 2d37f5b0e1f5..a3c0d1e87647 100644
--- a/arch/arm/mach-davinci/board-dm646x-evm.c
+++ b/arch/arm/mach-davinci/board-dm646x-evm.c
@@ -532,11 +532,12 @@ static struct vpif_display_config dm646x_vpif_display_config = {
.set_clock = set_vpif_clock,
.subdevinfo = dm646x_vpif_subdev,
.subdev_count = ARRAY_SIZE(dm646x_vpif_subdev),
+ .i2c_adapter_id = 1,
.chan_config[0] = {
.outputs = dm6467_ch0_outputs,
.output_count = ARRAY_SIZE(dm6467_ch0_outputs),
},
- .card_name = "DM646x EVM",
+ .card_name = "DM646x EVM Video Display",
};
/**
@@ -674,6 +675,7 @@ static struct vpif_capture_config dm646x_vpif_capture_cfg = {
.setup_input_channel_mode = setup_vpif_input_channel_mode,
.subdev_info = vpif_capture_sdev_info,
.subdev_count = ARRAY_SIZE(vpif_capture_sdev_info),
+ .i2c_adapter_id = 1,
.chan_config[0] = {
.inputs = dm6467_ch0_inputs,
.input_count = ARRAY_SIZE(dm6467_ch0_inputs),
@@ -694,6 +696,7 @@ static struct vpif_capture_config dm646x_vpif_capture_cfg = {
.fid_pol = 0,
},
},
+ .card_name = "DM646x EVM Video Capture",
};
static void __init evm_init_video(void)
diff --git a/arch/arm/mach-davinci/board-omapl138-hawk.c b/arch/arm/mach-davinci/board-omapl138-hawk.c
index 0d32042b728f..be8b892a6ea7 100644
--- a/arch/arm/mach-davinci/board-omapl138-hawk.c
+++ b/arch/arm/mach-davinci/board-omapl138-hawk.c
@@ -123,12 +123,16 @@ static const short hawk_mmcsd0_pins[] = {
-1
};
+#define DA850_HAWK_MMCSD_CD_PIN GPIO_TO_PIN(3, 12)
+#define DA850_HAWK_MMCSD_WP_PIN GPIO_TO_PIN(3, 13)
+
static struct gpiod_lookup_table mmc_gpios_table = {
.dev_id = "da830-mmc.0",
.table = {
- /* CD: gpio3_12: gpio60: chip 1 contains gpio range 32-63*/
- GPIO_LOOKUP("davinci_gpio.0", 28, "cd", GPIO_ACTIVE_LOW),
- GPIO_LOOKUP("davinci_gpio.0", 29, "wp", GPIO_ACTIVE_LOW),
+ GPIO_LOOKUP("davinci_gpio.0", DA850_HAWK_MMCSD_CD_PIN, "cd",
+ GPIO_ACTIVE_LOW),
+ GPIO_LOOKUP("davinci_gpio.0", DA850_HAWK_MMCSD_WP_PIN, "wp",
+ GPIO_ACTIVE_LOW),
},
};
diff --git a/arch/arm/mach-davinci/dm646x.c b/arch/arm/mach-davinci/dm646x.c
index 109ab1fa0d2c..c32ca27ab343 100644
--- a/arch/arm/mach-davinci/dm646x.c
+++ b/arch/arm/mach-davinci/dm646x.c
@@ -488,7 +488,8 @@ static u8 dm646x_default_priorities[DAVINCI_N_AINTC_IRQ] = {
[IRQ_DM646X_MCASP0TXINT] = 7,
[IRQ_DM646X_MCASP0RXINT] = 7,
[IRQ_DM646X_RESERVED_3] = 7,
- [IRQ_DM646X_MCASP1TXINT] = 7, /* clockevent */
+ [IRQ_DM646X_MCASP1TXINT] = 7,
+ [IRQ_TINT0_TINT12] = 7, /* clockevent */
[IRQ_TINT0_TINT34] = 7, /* clocksource */
[IRQ_TINT1_TINT12] = 7, /* DSP timer */
[IRQ_TINT1_TINT34] = 7, /* system tick */
diff --git a/arch/arm/mach-ep93xx/core.c b/arch/arm/mach-ep93xx/core.c
index e70feec6fad5..0581ffbedddd 100644
--- a/arch/arm/mach-ep93xx/core.c
+++ b/arch/arm/mach-ep93xx/core.c
@@ -323,7 +323,7 @@ void __init ep93xx_register_eth(struct ep93xx_eth_data *data, int copy_addr)
/* All EP93xx devices use the same two GPIO pins for I2C bit-banging */
static struct gpiod_lookup_table ep93xx_i2c_gpiod_table = {
- .dev_id = "i2c-gpio",
+ .dev_id = "i2c-gpio.0",
.table = {
/* Use local offsets on gpiochip/port "G" */
GPIO_LOOKUP_IDX("G", 1, NULL, 0,
diff --git a/arch/arm/mach-exynos/Kconfig b/arch/arm/mach-exynos/Kconfig
index 647c319f9f5f..2ca405816846 100644
--- a/arch/arm/mach-exynos/Kconfig
+++ b/arch/arm/mach-exynos/Kconfig
@@ -112,7 +112,6 @@ config SOC_EXYNOS5440
bool "SAMSUNG EXYNOS5440"
default y
depends on ARCH_EXYNOS5
- select ARCH_DMA_ADDR_T_64BIT if ARM_LPAE
select HAVE_ARM_ARCH_TIMER
select AUTO_ZRELADDR
select PINCTRL_EXYNOS5440
diff --git a/arch/arm/mach-highbank/Kconfig b/arch/arm/mach-highbank/Kconfig
index 81110ec34226..5552968f07f8 100644
--- a/arch/arm/mach-highbank/Kconfig
+++ b/arch/arm/mach-highbank/Kconfig
@@ -1,7 +1,6 @@
config ARCH_HIGHBANK
bool "Calxeda ECX-1000/2000 (Highbank/Midway)"
depends on ARCH_MULTI_V7
- select ARCH_DMA_ADDR_T_64BIT if ARM_LPAE
select ARCH_HAS_HOLES_MEMORYMODEL
select ARCH_SUPPORTS_BIG_ENDIAN
select ARM_AMBA
diff --git a/arch/arm/mach-ixp4xx/avila-setup.c b/arch/arm/mach-ixp4xx/avila-setup.c
index 77def6169f50..44cbbce6bda6 100644
--- a/arch/arm/mach-ixp4xx/avila-setup.c
+++ b/arch/arm/mach-ixp4xx/avila-setup.c
@@ -51,7 +51,7 @@ static struct platform_device avila_flash = {
};
static struct gpiod_lookup_table avila_i2c_gpiod_table = {
- .dev_id = "i2c-gpio",
+ .dev_id = "i2c-gpio.0",
.table = {
GPIO_LOOKUP_IDX("IXP4XX_GPIO_CHIP", AVILA_SDA_PIN,
NULL, 0, GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN),
diff --git a/arch/arm/mach-ixp4xx/dsmg600-setup.c b/arch/arm/mach-ixp4xx/dsmg600-setup.c
index 0f5c99941a7d..397190f3a8da 100644
--- a/arch/arm/mach-ixp4xx/dsmg600-setup.c
+++ b/arch/arm/mach-ixp4xx/dsmg600-setup.c
@@ -70,7 +70,7 @@ static struct platform_device dsmg600_flash = {
};
static struct gpiod_lookup_table dsmg600_i2c_gpiod_table = {
- .dev_id = "i2c-gpio",
+ .dev_id = "i2c-gpio.0",
.table = {
GPIO_LOOKUP_IDX("IXP4XX_GPIO_CHIP", DSMG600_SDA_PIN,
NULL, 0, GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN),
diff --git a/arch/arm/mach-ixp4xx/fsg-setup.c b/arch/arm/mach-ixp4xx/fsg-setup.c
index 033f79b35d51..f0a152e365b1 100644
--- a/arch/arm/mach-ixp4xx/fsg-setup.c
+++ b/arch/arm/mach-ixp4xx/fsg-setup.c
@@ -56,7 +56,7 @@ static struct platform_device fsg_flash = {
};
static struct gpiod_lookup_table fsg_i2c_gpiod_table = {
- .dev_id = "i2c-gpio",
+ .dev_id = "i2c-gpio.0",
.table = {
GPIO_LOOKUP_IDX("IXP4XX_GPIO_CHIP", FSG_SDA_PIN,
NULL, 0, GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN),
diff --git a/arch/arm/mach-ixp4xx/ixdp425-setup.c b/arch/arm/mach-ixp4xx/ixdp425-setup.c
index b168e2fbdbeb..3ec829d52cdd 100644
--- a/arch/arm/mach-ixp4xx/ixdp425-setup.c
+++ b/arch/arm/mach-ixp4xx/ixdp425-setup.c
@@ -124,7 +124,7 @@ static struct platform_device ixdp425_flash_nand = {
#endif /* CONFIG_MTD_NAND_PLATFORM */
static struct gpiod_lookup_table ixdp425_i2c_gpiod_table = {
- .dev_id = "i2c-gpio",
+ .dev_id = "i2c-gpio.0",
.table = {
GPIO_LOOKUP_IDX("IXP4XX_GPIO_CHIP", IXDP425_SDA_PIN,
NULL, 0, GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN),
diff --git a/arch/arm/mach-ixp4xx/nas100d-setup.c b/arch/arm/mach-ixp4xx/nas100d-setup.c
index 76dfff03cb71..4138d6aa4c52 100644
--- a/arch/arm/mach-ixp4xx/nas100d-setup.c
+++ b/arch/arm/mach-ixp4xx/nas100d-setup.c
@@ -102,7 +102,7 @@ static struct platform_device nas100d_leds = {
};
static struct gpiod_lookup_table nas100d_i2c_gpiod_table = {
- .dev_id = "i2c-gpio",
+ .dev_id = "i2c-gpio.0",
.table = {
GPIO_LOOKUP_IDX("IXP4XX_GPIO_CHIP", NAS100D_SDA_PIN,
NULL, 0, GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN),
diff --git a/arch/arm/mach-ixp4xx/nslu2-setup.c b/arch/arm/mach-ixp4xx/nslu2-setup.c
index 91da63a7d7b5..341b263482ef 100644
--- a/arch/arm/mach-ixp4xx/nslu2-setup.c
+++ b/arch/arm/mach-ixp4xx/nslu2-setup.c
@@ -70,7 +70,7 @@ static struct platform_device nslu2_flash = {
};
static struct gpiod_lookup_table nslu2_i2c_gpiod_table = {
- .dev_id = "i2c-gpio",
+ .dev_id = "i2c-gpio.0",
.table = {
GPIO_LOOKUP_IDX("IXP4XX_GPIO_CHIP", NSLU2_SDA_PIN,
NULL, 0, GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN),
diff --git a/arch/arm/mach-keystone/pm_domain.c b/arch/arm/mach-keystone/pm_domain.c
index fe57e2692629..abca83d22ff3 100644
--- a/arch/arm/mach-keystone/pm_domain.c
+++ b/arch/arm/mach-keystone/pm_domain.c
@@ -29,6 +29,7 @@ static struct dev_pm_domain keystone_pm_domain = {
static struct pm_clk_notifier_block platform_domain_notifier = {
.pm_domain = &keystone_pm_domain,
+ .con_ids = { NULL },
};
static const struct of_device_id of_keystone_table[] = {
diff --git a/arch/arm/mach-omap1/ams-delta-fiq.c b/arch/arm/mach-omap1/ams-delta-fiq.c
index 793a24a53c52..d7ca9e2b40d2 100644
--- a/arch/arm/mach-omap1/ams-delta-fiq.c
+++ b/arch/arm/mach-omap1/ams-delta-fiq.c
@@ -58,22 +58,24 @@ static irqreturn_t deferred_fiq(int irq, void *dev_id)
irq_num = gpio_to_irq(gpio);
fiq_count = fiq_buffer[FIQ_CNT_INT_00 + gpio];
- while (irq_counter[gpio] < fiq_count) {
- if (gpio != AMS_DELTA_GPIO_PIN_KEYBRD_CLK) {
- struct irq_data *d = irq_get_irq_data(irq_num);
-
- /*
- * It looks like handle_edge_irq() that
- * OMAP GPIO edge interrupts default to,
- * expects interrupt already unmasked.
- */
- if (irq_chip && irq_chip->irq_unmask)
+ if (irq_counter[gpio] < fiq_count &&
+ gpio != AMS_DELTA_GPIO_PIN_KEYBRD_CLK) {
+ struct irq_data *d = irq_get_irq_data(irq_num);
+
+ /*
+ * handle_simple_irq() that OMAP GPIO edge
+ * interrupts default to since commit 80ac93c27441
+ * requires interrupt already acked and unmasked.
+ */
+ if (irq_chip) {
+ if (irq_chip->irq_ack)
+ irq_chip->irq_ack(d);
+ if (irq_chip->irq_unmask)
irq_chip->irq_unmask(d);
}
- generic_handle_irq(irq_num);
-
- irq_counter[gpio]++;
}
+ for (; irq_counter[gpio] < fiq_count; irq_counter[gpio]++)
+ generic_handle_irq(irq_num);
}
return IRQ_HANDLED;
}
diff --git a/arch/arm/mach-omap2/powerdomain.c b/arch/arm/mach-omap2/powerdomain.c
index 76eb6ec5f157..1e6a967cd2d5 100644
--- a/arch/arm/mach-omap2/powerdomain.c
+++ b/arch/arm/mach-omap2/powerdomain.c
@@ -188,7 +188,7 @@ static int _pwrdm_state_switch(struct powerdomain *pwrdm, int flag)
((prev & OMAP_POWERSTATE_MASK) << 0));
trace_power_domain_target_rcuidle(pwrdm->name,
trace_state,
- smp_processor_id());
+ raw_smp_processor_id());
}
break;
default:
@@ -518,7 +518,7 @@ int pwrdm_set_next_pwrst(struct powerdomain *pwrdm, u8 pwrst)
if (arch_pwrdm && arch_pwrdm->pwrdm_set_next_pwrst) {
/* Trace the pwrdm desired target state */
trace_power_domain_target_rcuidle(pwrdm->name, pwrst,
- smp_processor_id());
+ raw_smp_processor_id());
/* Program the pwrdm desired target state */
ret = arch_pwrdm->pwrdm_set_next_pwrst(pwrdm, pwrst);
}
diff --git a/arch/arm/mach-pxa/palmz72.c b/arch/arm/mach-pxa/palmz72.c
index 5877e547cecd..0adb1bd6208e 100644
--- a/arch/arm/mach-pxa/palmz72.c
+++ b/arch/arm/mach-pxa/palmz72.c
@@ -322,7 +322,7 @@ static struct soc_camera_link palmz72_iclink = {
};
static struct gpiod_lookup_table palmz72_i2c_gpiod_table = {
- .dev_id = "i2c-gpio",
+ .dev_id = "i2c-gpio.0",
.table = {
GPIO_LOOKUP_IDX("gpio-pxa", 118, NULL, 0,
GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN),
diff --git a/arch/arm/mach-pxa/viper.c b/arch/arm/mach-pxa/viper.c
index 90d0f277de55..207dcc2e94e7 100644
--- a/arch/arm/mach-pxa/viper.c
+++ b/arch/arm/mach-pxa/viper.c
@@ -460,7 +460,7 @@ static struct platform_device smc91x_device = {
/* i2c */
static struct gpiod_lookup_table viper_i2c_gpiod_table = {
- .dev_id = "i2c-gpio",
+ .dev_id = "i2c-gpio.1",
.table = {
GPIO_LOOKUP_IDX("gpio-pxa", VIPER_RTC_I2C_SDA_GPIO,
NULL, 0, GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN),
@@ -789,7 +789,7 @@ static int __init viper_tpm_setup(char *str)
__setup("tpm=", viper_tpm_setup);
struct gpiod_lookup_table viper_tpm_i2c_gpiod_table = {
- .dev_id = "i2c-gpio",
+ .dev_id = "i2c-gpio.2",
.table = {
GPIO_LOOKUP_IDX("gpio-pxa", VIPER_TPM_I2C_SDA_GPIO,
NULL, 0, GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN),
diff --git a/arch/arm/mach-rockchip/Kconfig b/arch/arm/mach-rockchip/Kconfig
index a4065966881a..fafd3d7f9f8c 100644
--- a/arch/arm/mach-rockchip/Kconfig
+++ b/arch/arm/mach-rockchip/Kconfig
@@ -3,7 +3,6 @@ config ARCH_ROCKCHIP
depends on ARCH_MULTI_V7
select PINCTRL
select PINCTRL_ROCKCHIP
- select ARCH_DMA_ADDR_T_64BIT if ARM_LPAE
select ARCH_HAS_RESET_CONTROLLER
select ARM_AMBA
select ARM_GIC
diff --git a/arch/arm/mach-rpc/ecard.c b/arch/arm/mach-rpc/ecard.c
index bdb5ec1cf560..39aef4876ed4 100644
--- a/arch/arm/mach-rpc/ecard.c
+++ b/arch/arm/mach-rpc/ecard.c
@@ -657,25 +657,13 @@ static int ecard_devices_proc_show(struct seq_file *m, void *v)
return 0;
}
-static int ecard_devices_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, ecard_devices_proc_show, NULL);
-}
-
-static const struct file_operations bus_ecard_proc_fops = {
- .owner = THIS_MODULE,
- .open = ecard_devices_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
static struct proc_dir_entry *proc_bus_ecard_dir = NULL;
static void ecard_proc_init(void)
{
proc_bus_ecard_dir = proc_mkdir("bus/ecard", NULL);
- proc_create("devices", 0, proc_bus_ecard_dir, &bus_ecard_proc_fops);
+ proc_create_single("devices", 0, proc_bus_ecard_dir,
+ ecard_devices_proc_show);
}
#define ec_set_resource(ec,nr,st,sz) \
diff --git a/arch/arm/mach-sa1100/simpad.c b/arch/arm/mach-sa1100/simpad.c
index ace010479eb6..f45aed2519ba 100644
--- a/arch/arm/mach-sa1100/simpad.c
+++ b/arch/arm/mach-sa1100/simpad.c
@@ -327,7 +327,7 @@ static struct platform_device simpad_gpio_leds = {
* i2c
*/
static struct gpiod_lookup_table simpad_i2c_gpiod_table = {
- .dev_id = "i2c-gpio",
+ .dev_id = "i2c-gpio.0",
.table = {
GPIO_LOOKUP_IDX("gpio", 21, NULL, 0,
GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN),
diff --git a/arch/arm/mach-shmobile/Kconfig b/arch/arm/mach-shmobile/Kconfig
index 280e7312a9e1..fe60cd09a5ca 100644
--- a/arch/arm/mach-shmobile/Kconfig
+++ b/arch/arm/mach-shmobile/Kconfig
@@ -29,7 +29,6 @@ config ARCH_RMOBILE
menuconfig ARCH_RENESAS
bool "Renesas ARM SoCs"
depends on ARCH_MULTI_V7 && MMU
- select ARCH_DMA_ADDR_T_64BIT if ARM_LPAE
select ARCH_SHMOBILE
select ARM_GIC
select GPIOLIB
diff --git a/arch/arm/mach-tegra/Kconfig b/arch/arm/mach-tegra/Kconfig
index 1e0aeb47bac6..7f3b83e0d324 100644
--- a/arch/arm/mach-tegra/Kconfig
+++ b/arch/arm/mach-tegra/Kconfig
@@ -15,6 +15,5 @@ menuconfig ARCH_TEGRA
select RESET_CONTROLLER
select SOC_BUS
select ZONE_DMA if ARM_LPAE
- select ARCH_DMA_ADDR_T_64BIT if ARM_LPAE
help
This enables support for NVIDIA Tegra based systems.
diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig
index 7f14acf67caf..5a016bc80e26 100644
--- a/arch/arm/mm/Kconfig
+++ b/arch/arm/mm/Kconfig
@@ -661,6 +661,7 @@ config ARM_LPAE
bool "Support for the Large Physical Address Extension"
depends on MMU && CPU_32v7 && !CPU_32v6 && !CPU_32v5 && \
!CPU_32v4 && !CPU_32v3
+ select PHYS_ADDR_T_64BIT
help
Say Y if you have an ARMv7 processor supporting the LPAE page
table format and you would like to access memory beyond the
@@ -673,12 +674,6 @@ config ARM_PV_FIXUP
def_bool y
depends on ARM_LPAE && ARM_PATCH_PHYS_VIRT && ARCH_KEYSTONE
-config ARCH_PHYS_ADDR_T_64BIT
- def_bool ARM_LPAE
-
-config ARCH_DMA_ADDR_T_64BIT
- bool
-
config ARM_THUMB
bool "Support Thumb user binaries" if !CPU_THUMBONLY && EXPERT
depends on CPU_THUMB_CAPABLE
diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c
index 2c96190e018b..bd2c739d8083 100644
--- a/arch/arm/mm/alignment.c
+++ b/arch/arm/mm/alignment.c
@@ -950,6 +950,7 @@ do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
if (ai_usermode & UM_SIGNAL) {
siginfo_t si;
+ clear_siginfo(&si);
si.si_signo = SIGBUS;
si.si_errno = 0;
si.si_code = BUS_ADRALN;
diff --git a/arch/arm/mm/dma-mapping-nommu.c b/arch/arm/mm/dma-mapping-nommu.c
index 619f24a42d09..f448a0663b10 100644
--- a/arch/arm/mm/dma-mapping-nommu.c
+++ b/arch/arm/mm/dma-mapping-nommu.c
@@ -241,12 +241,3 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
void arch_teardown_dma_ops(struct device *dev)
{
}
-
-#define PREALLOC_DMA_DEBUG_ENTRIES 4096
-
-static int __init dma_debug_do_init(void)
-{
- dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES);
- return 0;
-}
-core_initcall(dma_debug_do_init);
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index 8c398fedbbb6..4b6613b5e042 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -466,12 +466,6 @@ void __init dma_contiguous_early_fixup(phys_addr_t base, unsigned long size)
void __init dma_contiguous_remap(void)
{
int i;
-
- if (!dma_mmu_remap_num)
- return;
-
- /* call flush_cache_all() since CMA area would be large enough */
- flush_cache_all();
for (i = 0; i < dma_mmu_remap_num; i++) {
phys_addr_t start = dma_mmu_remap[i].base;
phys_addr_t end = start + dma_mmu_remap[i].size;
@@ -504,15 +498,7 @@ void __init dma_contiguous_remap(void)
flush_tlb_kernel_range(__phys_to_virt(start),
__phys_to_virt(end));
- /*
- * All the memory in CMA region will be on ZONE_MOVABLE.
- * If that zone is considered as highmem, the memory in CMA
- * region is also considered as highmem even if it's
- * physical address belong to lowmem. In this case,
- * re-mapping isn't required.
- */
- if (!is_highmem_idx(ZONE_MOVABLE))
- iotable_init(&map, 1);
+ iotable_init(&map, 1);
}
}
@@ -1165,15 +1151,6 @@ int arm_dma_supported(struct device *dev, u64 mask)
return __dma_supported(dev, mask, false);
}
-#define PREALLOC_DMA_DEBUG_ENTRIES 4096
-
-static int __init dma_debug_do_init(void)
-{
- dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES);
- return 0;
-}
-core_initcall(dma_debug_do_init);
-
#ifdef CONFIG_ARM_DMA_USE_IOMMU
static int __dma_info_to_prot(enum dma_data_direction dir, unsigned long attrs)
diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c
index b75eada23d0a..32034543f49c 100644
--- a/arch/arm/mm/fault.c
+++ b/arch/arm/mm/fault.c
@@ -163,6 +163,8 @@ __do_user_fault(struct task_struct *tsk, unsigned long addr,
{
struct siginfo si;
+ clear_siginfo(&si);
+
#ifdef CONFIG_DEBUG_USER
if (((user_debug & UDBG_SEGV) && (sig == SIGSEGV)) ||
((user_debug & UDBG_BUS) && (sig == SIGBUS))) {
@@ -557,6 +559,7 @@ do_DataAbort(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
inf->name, fsr, addr);
show_pte(current->mm, addr);
+ clear_siginfo(&info);
info.si_signo = inf->sig;
info.si_errno = 0;
info.si_code = inf->code;
@@ -589,6 +592,7 @@ do_PrefetchAbort(unsigned long addr, unsigned int ifsr, struct pt_regs *regs)
pr_alert("Unhandled prefetch abort: %s (0x%03x) at 0x%08lx\n",
inf->name, ifsr, addr);
+ clear_siginfo(&info);
info.si_signo = inf->sig;
info.si_errno = 0;
info.si_code = inf->code;
diff --git a/arch/arm/probes/kprobes/opt-arm.c b/arch/arm/probes/kprobes/opt-arm.c
index bcdecc25461b..b2aa9b32bff2 100644
--- a/arch/arm/probes/kprobes/opt-arm.c
+++ b/arch/arm/probes/kprobes/opt-arm.c
@@ -165,13 +165,14 @@ optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs)
{
unsigned long flags;
struct kprobe *p = &op->kp;
- struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+ struct kprobe_ctlblk *kcb;
/* Save skipped registers */
regs->ARM_pc = (unsigned long)op->kp.addr;
regs->ARM_ORIG_r0 = ~0UL;
local_irq_save(flags);
+ kcb = get_kprobe_ctlblk();
if (kprobe_running()) {
kprobes_inc_nmissed_count(&op->kp);
@@ -191,6 +192,7 @@ optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs)
local_irq_restore(flags);
}
+NOKPROBE_SYMBOL(optimized_callback)
int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *orig)
{
diff --git a/arch/arm/vfp/vfpmodule.c b/arch/arm/vfp/vfpmodule.c
index 4c375e11ae95..35d0f823e823 100644
--- a/arch/arm/vfp/vfpmodule.c
+++ b/arch/arm/vfp/vfpmodule.c
@@ -218,8 +218,7 @@ static void vfp_raise_sigfpe(unsigned int sicode, struct pt_regs *regs)
{
siginfo_t info;
- memset(&info, 0, sizeof(info));
-
+ clear_siginfo(&info);
info.si_signo = SIGFPE;
info.si_code = sicode;
info.si_addr = (void __user *)(instruction_pointer(regs) - 4);
@@ -257,7 +256,7 @@ static void vfp_raise_exceptions(u32 exceptions, u32 inst, u32 fpscr, struct pt_
if (exceptions == VFP_EXCEPTION_ERROR) {
vfp_panic("unhandled bounce", inst);
- vfp_raise_sigfpe(FPE_FIXME, regs);
+ vfp_raise_sigfpe(FPE_FLTINV, regs);
return;
}
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index eb2cf4938f6d..b25ed7834f6c 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -105,7 +105,6 @@ config ARM64
select HAVE_CONTEXT_TRACKING
select HAVE_DEBUG_BUGVERBOSE
select HAVE_DEBUG_KMEMLEAK
- select HAVE_DMA_API_DEBUG
select HAVE_DMA_CONTIGUOUS
select HAVE_DYNAMIC_FTRACE
select HAVE_EFFICIENT_UNALIGNED_ACCESS
@@ -133,6 +132,8 @@ config ARM64
select IRQ_FORCED_THREADING
select MODULES_USE_ELF_RELA
select MULTI_IRQ_HANDLER
+ select NEED_DMA_MAP_STATE
+ select NEED_SG_DMA_LENGTH
select NO_BOOTMEM
select OF
select OF_EARLY_FLATTREE
@@ -142,6 +143,7 @@ config ARM64
select POWER_SUPPLY
select REFCOUNT_FULL
select SPARSE_IRQ
+ select SWIOTLB
select SYSCTL_EXCEPTION_TRACE
select THREAD_INFO_IN_TASK
help
@@ -150,9 +152,6 @@ config ARM64
config 64BIT
def_bool y
-config ARCH_PHYS_ADDR_T_64BIT
- def_bool y
-
config MMU
def_bool y
@@ -237,24 +236,9 @@ config ZONE_DMA32
config HAVE_GENERIC_GUP
def_bool y
-config ARCH_DMA_ADDR_T_64BIT
- def_bool y
-
-config NEED_DMA_MAP_STATE
- def_bool y
-
-config NEED_SG_DMA_LENGTH
- def_bool y
-
config SMP
def_bool y
-config SWIOTLB
- def_bool y
-
-config IOMMU_HELPER
- def_bool SWIOTLB
-
config KERNEL_MODE_NEON
def_bool y
diff --git a/arch/arm64/boot/dts/exynos/exynos5433.dtsi b/arch/arm64/boot/dts/exynos/exynos5433.dtsi
index c0231d077fa6..1ad8677f6a0a 100644
--- a/arch/arm64/boot/dts/exynos/exynos5433.dtsi
+++ b/arch/arm64/boot/dts/exynos/exynos5433.dtsi
@@ -1317,7 +1317,7 @@
reg = <0x14d60000 0x100>;
dmas = <&pdma0 31 &pdma0 30>;
dma-names = "tx", "rx";
- interrupts = <GIC_SPI 435 IRQ_TYPE_NONE>;
+ interrupts = <GIC_SPI 435 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&cmu_peric CLK_PCLK_I2S1>,
<&cmu_peric CLK_PCLK_I2S1>,
<&cmu_peric CLK_SCLK_I2S1>;
diff --git a/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts b/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts
index 724a0d3b7683..edb4ee0b8896 100644
--- a/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts
+++ b/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts
@@ -299,7 +299,6 @@
/* GPIO blocks 16 thru 19 do not appear to be routed to pins */
dwmmc_0: dwmmc0@f723d000 {
- max-frequency = <150000000>;
cap-mmc-highspeed;
mmc-hs200-1_8v;
non-removable;
diff --git a/arch/arm64/boot/dts/marvell/armada-cp110.dtsi b/arch/arm64/boot/dts/marvell/armada-cp110.dtsi
index 48cad7919efa..ed2f1237ea1e 100644
--- a/arch/arm64/boot/dts/marvell/armada-cp110.dtsi
+++ b/arch/arm64/boot/dts/marvell/armada-cp110.dtsi
@@ -38,9 +38,10 @@
compatible = "marvell,armada-7k-pp22";
reg = <0x0 0x100000>, <0x129000 0xb000>;
clocks = <&CP110_LABEL(clk) 1 3>, <&CP110_LABEL(clk) 1 9>,
- <&CP110_LABEL(clk) 1 5>, <&CP110_LABEL(clk) 1 18>;
+ <&CP110_LABEL(clk) 1 5>, <&CP110_LABEL(clk) 1 6>,
+ <&CP110_LABEL(clk) 1 18>;
clock-names = "pp_clk", "gop_clk",
- "mg_clk", "axi_clk";
+ "mg_clk", "mg_core_clk", "axi_clk";
marvell,system-controller = <&CP110_LABEL(syscon0)>;
status = "disabled";
dma-coherent;
@@ -141,6 +142,8 @@
#size-cells = <0>;
compatible = "marvell,xmdio";
reg = <0x12a600 0x10>;
+ clocks = <&CP110_LABEL(clk) 1 5>,
+ <&CP110_LABEL(clk) 1 6>, <&CP110_LABEL(clk) 1 18>;
status = "disabled";
};
diff --git a/arch/arm64/boot/dts/nvidia/tegra186-p3310.dtsi b/arch/arm64/boot/dts/nvidia/tegra186-p3310.dtsi
index a8baad7b80df..13f57fff1477 100644
--- a/arch/arm64/boot/dts/nvidia/tegra186-p3310.dtsi
+++ b/arch/arm64/boot/dts/nvidia/tegra186-p3310.dtsi
@@ -46,7 +46,7 @@
compatible = "ethernet-phy-ieee802.3-c22";
reg = <0x0>;
interrupt-parent = <&gpio>;
- interrupts = <TEGRA_MAIN_GPIO(M, 5) IRQ_TYPE_LEVEL_HIGH>;
+ interrupts = <TEGRA_MAIN_GPIO(M, 5) IRQ_TYPE_LEVEL_LOW>;
};
};
};
diff --git a/arch/arm64/boot/dts/socionext/uniphier-ld11.dtsi b/arch/arm64/boot/dts/socionext/uniphier-ld11.dtsi
index e62bda1cf2d9..c32dd3419c87 100644
--- a/arch/arm64/boot/dts/socionext/uniphier-ld11.dtsi
+++ b/arch/arm64/boot/dts/socionext/uniphier-ld11.dtsi
@@ -414,7 +414,7 @@
mmc-ddr-1_8v;
mmc-hs200-1_8v;
mmc-pwrseq = <&emmc_pwrseq>;
- cdns,phy-input-delay-legacy = <4>;
+ cdns,phy-input-delay-legacy = <9>;
cdns,phy-input-delay-mmc-highspeed = <2>;
cdns,phy-input-delay-mmc-ddr = <3>;
cdns,phy-dll-delay-sdclk = <21>;
diff --git a/arch/arm64/boot/dts/socionext/uniphier-ld20-ref.dts b/arch/arm64/boot/dts/socionext/uniphier-ld20-ref.dts
index 2c1a92fafbfb..440c2e6a638b 100644
--- a/arch/arm64/boot/dts/socionext/uniphier-ld20-ref.dts
+++ b/arch/arm64/boot/dts/socionext/uniphier-ld20-ref.dts
@@ -67,3 +67,11 @@
reg = <0>;
};
};
+
+&pinctrl_ether_rgmii {
+ tx {
+ pins = "RGMII_TXCLK", "RGMII_TXD0", "RGMII_TXD1",
+ "RGMII_TXD2", "RGMII_TXD3", "RGMII_TXCTL";
+ drive-strength = <9>;
+ };
+};
diff --git a/arch/arm64/boot/dts/socionext/uniphier-ld20.dtsi b/arch/arm64/boot/dts/socionext/uniphier-ld20.dtsi
index 9efe20d07589..3a5ed789c056 100644
--- a/arch/arm64/boot/dts/socionext/uniphier-ld20.dtsi
+++ b/arch/arm64/boot/dts/socionext/uniphier-ld20.dtsi
@@ -519,7 +519,7 @@
mmc-ddr-1_8v;
mmc-hs200-1_8v;
mmc-pwrseq = <&emmc_pwrseq>;
- cdns,phy-input-delay-legacy = <4>;
+ cdns,phy-input-delay-legacy = <9>;
cdns,phy-input-delay-mmc-highspeed = <2>;
cdns,phy-input-delay-mmc-ddr = <3>;
cdns,phy-dll-delay-sdclk = <21>;
diff --git a/arch/arm64/boot/dts/socionext/uniphier-pxs3.dtsi b/arch/arm64/boot/dts/socionext/uniphier-pxs3.dtsi
index 7c8f710d9bfa..e85d6ddea3c2 100644
--- a/arch/arm64/boot/dts/socionext/uniphier-pxs3.dtsi
+++ b/arch/arm64/boot/dts/socionext/uniphier-pxs3.dtsi
@@ -334,7 +334,7 @@
mmc-ddr-1_8v;
mmc-hs200-1_8v;
mmc-pwrseq = <&emmc_pwrseq>;
- cdns,phy-input-delay-legacy = <4>;
+ cdns,phy-input-delay-legacy = <9>;
cdns,phy-input-delay-mmc-highspeed = <2>;
cdns,phy-input-delay-mmc-ddr = <3>;
cdns,phy-dll-delay-sdclk = <21>;
diff --git a/arch/arm64/include/asm/atomic_lse.h b/arch/arm64/include/asm/atomic_lse.h
index 9ef0797380cb..f9b0b09153e0 100644
--- a/arch/arm64/include/asm/atomic_lse.h
+++ b/arch/arm64/include/asm/atomic_lse.h
@@ -117,7 +117,7 @@ static inline void atomic_and(int i, atomic_t *v)
/* LSE atomics */
" mvn %w[i], %w[i]\n"
" stclr %w[i], %[v]")
- : [i] "+r" (w0), [v] "+Q" (v->counter)
+ : [i] "+&r" (w0), [v] "+Q" (v->counter)
: "r" (x1)
: __LL_SC_CLOBBERS);
}
@@ -135,7 +135,7 @@ static inline int atomic_fetch_and##name(int i, atomic_t *v) \
/* LSE atomics */ \
" mvn %w[i], %w[i]\n" \
" ldclr" #mb " %w[i], %w[i], %[v]") \
- : [i] "+r" (w0), [v] "+Q" (v->counter) \
+ : [i] "+&r" (w0), [v] "+Q" (v->counter) \
: "r" (x1) \
: __LL_SC_CLOBBERS, ##cl); \
\
@@ -161,7 +161,7 @@ static inline void atomic_sub(int i, atomic_t *v)
/* LSE atomics */
" neg %w[i], %w[i]\n"
" stadd %w[i], %[v]")
- : [i] "+r" (w0), [v] "+Q" (v->counter)
+ : [i] "+&r" (w0), [v] "+Q" (v->counter)
: "r" (x1)
: __LL_SC_CLOBBERS);
}
@@ -180,7 +180,7 @@ static inline int atomic_sub_return##name(int i, atomic_t *v) \
" neg %w[i], %w[i]\n" \
" ldadd" #mb " %w[i], w30, %[v]\n" \
" add %w[i], %w[i], w30") \
- : [i] "+r" (w0), [v] "+Q" (v->counter) \
+ : [i] "+&r" (w0), [v] "+Q" (v->counter) \
: "r" (x1) \
: __LL_SC_CLOBBERS , ##cl); \
\
@@ -207,7 +207,7 @@ static inline int atomic_fetch_sub##name(int i, atomic_t *v) \
/* LSE atomics */ \
" neg %w[i], %w[i]\n" \
" ldadd" #mb " %w[i], %w[i], %[v]") \
- : [i] "+r" (w0), [v] "+Q" (v->counter) \
+ : [i] "+&r" (w0), [v] "+Q" (v->counter) \
: "r" (x1) \
: __LL_SC_CLOBBERS, ##cl); \
\
@@ -314,7 +314,7 @@ static inline void atomic64_and(long i, atomic64_t *v)
/* LSE atomics */
" mvn %[i], %[i]\n"
" stclr %[i], %[v]")
- : [i] "+r" (x0), [v] "+Q" (v->counter)
+ : [i] "+&r" (x0), [v] "+Q" (v->counter)
: "r" (x1)
: __LL_SC_CLOBBERS);
}
@@ -332,7 +332,7 @@ static inline long atomic64_fetch_and##name(long i, atomic64_t *v) \
/* LSE atomics */ \
" mvn %[i], %[i]\n" \
" ldclr" #mb " %[i], %[i], %[v]") \
- : [i] "+r" (x0), [v] "+Q" (v->counter) \
+ : [i] "+&r" (x0), [v] "+Q" (v->counter) \
: "r" (x1) \
: __LL_SC_CLOBBERS, ##cl); \
\
@@ -358,7 +358,7 @@ static inline void atomic64_sub(long i, atomic64_t *v)
/* LSE atomics */
" neg %[i], %[i]\n"
" stadd %[i], %[v]")
- : [i] "+r" (x0), [v] "+Q" (v->counter)
+ : [i] "+&r" (x0), [v] "+Q" (v->counter)
: "r" (x1)
: __LL_SC_CLOBBERS);
}
@@ -377,7 +377,7 @@ static inline long atomic64_sub_return##name(long i, atomic64_t *v) \
" neg %[i], %[i]\n" \
" ldadd" #mb " %[i], x30, %[v]\n" \
" add %[i], %[i], x30") \
- : [i] "+r" (x0), [v] "+Q" (v->counter) \
+ : [i] "+&r" (x0), [v] "+Q" (v->counter) \
: "r" (x1) \
: __LL_SC_CLOBBERS, ##cl); \
\
@@ -404,7 +404,7 @@ static inline long atomic64_fetch_sub##name(long i, atomic64_t *v) \
/* LSE atomics */ \
" neg %[i], %[i]\n" \
" ldadd" #mb " %[i], %[i], %[v]") \
- : [i] "+r" (x0), [v] "+Q" (v->counter) \
+ : [i] "+&r" (x0), [v] "+Q" (v->counter) \
: "r" (x1) \
: __LL_SC_CLOBBERS, ##cl); \
\
@@ -435,7 +435,7 @@ static inline long atomic64_dec_if_positive(atomic64_t *v)
" sub x30, x30, %[ret]\n"
" cbnz x30, 1b\n"
"2:")
- : [ret] "+r" (x0), [v] "+Q" (v->counter)
+ : [ret] "+&r" (x0), [v] "+Q" (v->counter)
:
: __LL_SC_CLOBBERS, "cc", "memory");
@@ -516,7 +516,7 @@ static inline long __cmpxchg_double##name(unsigned long old1, \
" eor %[old1], %[old1], %[oldval1]\n" \
" eor %[old2], %[old2], %[oldval2]\n" \
" orr %[old1], %[old1], %[old2]") \
- : [old1] "+r" (x0), [old2] "+r" (x1), \
+ : [old1] "+&r" (x0), [old2] "+&r" (x1), \
[v] "+Q" (*(unsigned long *)ptr) \
: [new1] "r" (x2), [new2] "r" (x3), [ptr] "r" (x4), \
[oldval1] "r" (oldval1), [oldval2] "r" (oldval2) \
diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
index 30014a9f8f2b..ea690b3562af 100644
--- a/arch/arm64/include/asm/cputype.h
+++ b/arch/arm64/include/asm/cputype.h
@@ -75,6 +75,7 @@
#define ARM_CPU_IMP_CAVIUM 0x43
#define ARM_CPU_IMP_BRCM 0x42
#define ARM_CPU_IMP_QCOM 0x51
+#define ARM_CPU_IMP_NVIDIA 0x4E
#define ARM_CPU_PART_AEM_V8 0xD0F
#define ARM_CPU_PART_FOUNDATION 0xD00
@@ -99,6 +100,9 @@
#define QCOM_CPU_PART_FALKOR 0xC00
#define QCOM_CPU_PART_KRYO 0x200
+#define NVIDIA_CPU_PART_DENVER 0x003
+#define NVIDIA_CPU_PART_CARMEL 0x004
+
#define MIDR_CORTEX_A53 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53)
#define MIDR_CORTEX_A57 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57)
#define MIDR_CORTEX_A72 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A72)
@@ -114,6 +118,8 @@
#define MIDR_QCOM_FALKOR_V1 MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_FALKOR_V1)
#define MIDR_QCOM_FALKOR MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_FALKOR)
#define MIDR_QCOM_KRYO MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO)
+#define MIDR_NVIDIA_DENVER MIDR_CPU_MODEL(ARM_CPU_IMP_NVIDIA, NVIDIA_CPU_PART_DENVER)
+#define MIDR_NVIDIA_CARMEL MIDR_CPU_MODEL(ARM_CPU_IMP_NVIDIA, NVIDIA_CPU_PART_CARMEL)
#ifndef __ASSEMBLY__
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index 23b33e8ea03a..1dab3a984608 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -333,7 +333,7 @@ static inline void kvm_vcpu_set_be(struct kvm_vcpu *vcpu)
} else {
u64 sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL1);
sctlr |= (1 << 25);
- vcpu_write_sys_reg(vcpu, SCTLR_EL1, sctlr);
+ vcpu_write_sys_reg(vcpu, sctlr, SCTLR_EL1);
}
}
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 082110993647..6128992c2ded 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -360,6 +360,22 @@ static inline unsigned int kvm_get_vmid_bits(void)
return (cpuid_feature_extract_unsigned_field(reg, ID_AA64MMFR1_VMIDBITS_SHIFT) == 2) ? 16 : 8;
}
+/*
+ * We are not in the kvm->srcu critical section most of the time, so we take
+ * the SRCU read lock here. Since we copy the data from the user page, we
+ * can immediately drop the lock again.
+ */
+static inline int kvm_read_guest_lock(struct kvm *kvm,
+ gpa_t gpa, void *data, unsigned long len)
+{
+ int srcu_idx = srcu_read_lock(&kvm->srcu);
+ int ret = kvm_read_guest(kvm, gpa, data, len);
+
+ srcu_read_unlock(&kvm->srcu, srcu_idx);
+
+ return ret;
+}
+
#ifdef CONFIG_KVM_INDIRECT_VECTORS
/*
* EL2 vectors can be mapped and rerouted in a number of ways,
diff --git a/arch/arm64/include/asm/pci.h b/arch/arm64/include/asm/pci.h
index 8747f7c5e0e7..9e690686e8aa 100644
--- a/arch/arm64/include/asm/pci.h
+++ b/arch/arm64/include/asm/pci.h
@@ -18,11 +18,6 @@
#define pcibios_assign_all_busses() \
(pci_has_flag(PCI_REASSIGN_ALL_BUS))
-/*
- * PCI address space differs from physical memory address space
- */
-#define PCI_DMA_BUS_IS_PHYS (0)
-
#define ARCH_GENERIC_PCI_MMAP_RESOURCE 1
extern int isa_dma_bridge_buggy;
diff --git a/arch/arm64/include/asm/spinlock.h b/arch/arm64/include/asm/spinlock.h
index ebdae15d665d..26c5bd7d88d8 100644
--- a/arch/arm64/include/asm/spinlock.h
+++ b/arch/arm64/include/asm/spinlock.h
@@ -122,11 +122,6 @@ static inline int arch_spin_value_unlocked(arch_spinlock_t lock)
static inline int arch_spin_is_locked(arch_spinlock_t *lock)
{
- /*
- * Ensure prior spin_lock operations to other locks have completed
- * on this CPU before we test whether "lock" is locked.
- */
- smp_mb(); /* ^^^ */
return !arch_spin_value_unlocked(READ_ONCE(*lock));
}
diff --git a/arch/arm64/kernel/arm64ksyms.c b/arch/arm64/kernel/arm64ksyms.c
index 66be504edb6c..d894a20b70b2 100644
--- a/arch/arm64/kernel/arm64ksyms.c
+++ b/arch/arm64/kernel/arm64ksyms.c
@@ -75,3 +75,11 @@ NOKPROBE_SYMBOL(_mcount);
/* arm-smccc */
EXPORT_SYMBOL(__arm_smccc_smc);
EXPORT_SYMBOL(__arm_smccc_hvc);
+
+ /* tishift.S */
+extern long long __ashlti3(long long a, int b);
+EXPORT_SYMBOL(__ashlti3);
+extern long long __ashrti3(long long a, int b);
+EXPORT_SYMBOL(__ashrti3);
+extern long long __lshrti3(long long a, int b);
+EXPORT_SYMBOL(__lshrti3);
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index a900befadfe8..e4a1182deff7 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -316,6 +316,7 @@ static const struct midr_range arm64_bp_harden_smccc_cpus[] = {
MIDR_ALL_VERSIONS(MIDR_CAVIUM_THUNDERX2),
MIDR_ALL_VERSIONS(MIDR_QCOM_FALKOR_V1),
MIDR_ALL_VERSIONS(MIDR_QCOM_FALKOR),
+ MIDR_ALL_VERSIONS(MIDR_NVIDIA_DENVER),
{},
};
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index 87a35364e750..4bcdd0318729 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -882,7 +882,7 @@ asmlinkage void do_fpsimd_exc(unsigned int esr, struct pt_regs *regs)
si_code = FPE_FLTRES;
}
- memset(&info, 0, sizeof(info));
+ clear_siginfo(&info);
info.si_signo = SIGFPE;
info.si_code = si_code;
info.si_addr = (void __user *)instruction_pointer(regs);
diff --git a/arch/arm64/kernel/sys_compat.c b/arch/arm64/kernel/sys_compat.c
index 93ab57dcfc14..a6109825eeb9 100644
--- a/arch/arm64/kernel/sys_compat.c
+++ b/arch/arm64/kernel/sys_compat.c
@@ -112,6 +112,7 @@ long compat_arm_syscall(struct pt_regs *regs)
break;
}
+ clear_siginfo(&info);
info.si_signo = SIGILL;
info.si_errno = 0;
info.si_code = ILL_ILLTRP;
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index 8bbdc17e49df..d399d459397b 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -635,6 +635,7 @@ asmlinkage void bad_el0_sync(struct pt_regs *regs, int reason, unsigned int esr)
siginfo_t info;
void __user *pc = (void __user *)instruction_pointer(regs);
+ clear_siginfo(&info);
info.si_signo = SIGILL;
info.si_errno = 0;
info.si_code = ILL_ILLOPC;
diff --git a/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c b/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c
index 86801b6055d6..39be799d0417 100644
--- a/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c
+++ b/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c
@@ -18,11 +18,20 @@
#include <linux/compiler.h>
#include <linux/irqchip/arm-gic.h>
#include <linux/kvm_host.h>
+#include <linux/swab.h>
#include <asm/kvm_emulate.h>
#include <asm/kvm_hyp.h>
#include <asm/kvm_mmu.h>
+static bool __hyp_text __is_be(struct kvm_vcpu *vcpu)
+{
+ if (vcpu_mode_is_32bit(vcpu))
+ return !!(read_sysreg_el2(spsr) & COMPAT_PSR_E_BIT);
+
+ return !!(read_sysreg(SCTLR_EL1) & SCTLR_ELx_EE);
+}
+
/*
* __vgic_v2_perform_cpuif_access -- perform a GICV access on behalf of the
* guest.
@@ -64,14 +73,19 @@ int __hyp_text __vgic_v2_perform_cpuif_access(struct kvm_vcpu *vcpu)
addr += fault_ipa - vgic->vgic_cpu_base;
if (kvm_vcpu_dabt_iswrite(vcpu)) {
- u32 data = vcpu_data_guest_to_host(vcpu,
- vcpu_get_reg(vcpu, rd),
- sizeof(u32));
+ u32 data = vcpu_get_reg(vcpu, rd);
+ if (__is_be(vcpu)) {
+ /* guest pre-swabbed data, undo this for writel() */
+ data = swab32(data);
+ }
writel_relaxed(data, addr);
} else {
u32 data = readl_relaxed(addr);
- vcpu_set_reg(vcpu, rd, vcpu_data_host_to_guest(vcpu, data,
- sizeof(u32)));
+ if (__is_be(vcpu)) {
+ /* guest expects swabbed data */
+ data = swab32(data);
+ }
+ vcpu_set_reg(vcpu, rd, data);
}
return 1;
diff --git a/arch/arm64/lib/tishift.S b/arch/arm64/lib/tishift.S
index d3db9b2cd479..0fdff97794de 100644
--- a/arch/arm64/lib/tishift.S
+++ b/arch/arm64/lib/tishift.S
@@ -1,17 +1,6 @@
-/*
- * Copyright (C) 2017 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ * Copyright (C) 2017-2018 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
*/
#include <linux/linkage.h>
diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index a96ec0181818..db01f2709842 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -508,16 +508,6 @@ static int __init arm64_dma_init(void)
}
arch_initcall(arm64_dma_init);
-#define PREALLOC_DMA_DEBUG_ENTRIES 4096
-
-static int __init dma_debug_do_init(void)
-{
- dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES);
- return 0;
-}
-fs_initcall(dma_debug_do_init);
-
-
#ifdef CONFIG_IOMMU_DMA
#include <linux/dma-iommu.h>
#include <linux/platform_device.h>
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 4165485e8b6e..576f15153080 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -293,6 +293,57 @@ static void __do_kernel_fault(unsigned long addr, unsigned int esr,
static void __do_user_fault(struct siginfo *info, unsigned int esr)
{
current->thread.fault_address = (unsigned long)info->si_addr;
+
+ /*
+ * If the faulting address is in the kernel, we must sanitize the ESR.
+ * From userspace's point of view, kernel-only mappings don't exist
+ * at all, so we report them as level 0 translation faults.
+ * (This is not quite the way that "no mapping there at all" behaves:
+ * an alignment fault not caused by the memory type would take
+ * precedence over translation fault for a real access to empty
+ * space. Unfortunately we can't easily distinguish "alignment fault
+ * not caused by memory type" from "alignment fault caused by memory
+ * type", so we ignore this wrinkle and just return the translation
+ * fault.)
+ */
+ if (current->thread.fault_address >= TASK_SIZE) {
+ switch (ESR_ELx_EC(esr)) {
+ case ESR_ELx_EC_DABT_LOW:
+ /*
+ * These bits provide only information about the
+ * faulting instruction, which userspace knows already.
+ * We explicitly clear bits which are architecturally
+ * RES0 in case they are given meanings in future.
+ * We always report the ESR as if the fault was taken
+ * to EL1 and so ISV and the bits in ISS[23:14] are
+ * clear. (In fact it always will be a fault to EL1.)
+ */
+ esr &= ESR_ELx_EC_MASK | ESR_ELx_IL |
+ ESR_ELx_CM | ESR_ELx_WNR;
+ esr |= ESR_ELx_FSC_FAULT;
+ break;
+ case ESR_ELx_EC_IABT_LOW:
+ /*
+ * Claim a level 0 translation fault.
+ * All other bits are architecturally RES0 for faults
+ * reported with that DFSC value, so we clear them.
+ */
+ esr &= ESR_ELx_EC_MASK | ESR_ELx_IL;
+ esr |= ESR_ELx_FSC_FAULT;
+ break;
+ default:
+ /*
+ * This should never happen (entry.S only brings us
+ * into this code for insn and data aborts from a lower
+ * exception level). Fail safe by not providing an ESR
+ * context record at all.
+ */
+ WARN(1, "ESR 0x%x is not DABT or IABT from EL0\n", esr);
+ esr = 0;
+ break;
+ }
+ }
+
current->thread.fault_code = esr;
arm64_force_sig_info(info, esr_to_fault_info(esr)->name, current);
}
@@ -305,11 +356,12 @@ static void do_bad_area(unsigned long addr, unsigned int esr, struct pt_regs *re
*/
if (user_mode(regs)) {
const struct fault_info *inf = esr_to_fault_info(esr);
- struct siginfo si = {
- .si_signo = inf->sig,
- .si_code = inf->code,
- .si_addr = (void __user *)addr,
- };
+ struct siginfo si;
+
+ clear_siginfo(&si);
+ si.si_signo = inf->sig;
+ si.si_code = inf->code;
+ si.si_addr = (void __user *)addr;
__do_user_fault(&si, esr);
} else {
@@ -583,6 +635,7 @@ static int do_sea(unsigned long addr, unsigned int esr, struct pt_regs *regs)
nmi_exit();
}
+ clear_siginfo(&info);
info.si_signo = inf->sig;
info.si_errno = 0;
info.si_code = inf->code;
@@ -687,6 +740,7 @@ asmlinkage void __exception do_mem_abort(unsigned long addr, unsigned int esr,
show_pte(addr);
}
+ clear_siginfo(&info);
info.si_signo = inf->sig;
info.si_errno = 0;
info.si_code = inf->code;
@@ -729,6 +783,7 @@ asmlinkage void __exception do_sp_pc_abort(unsigned long addr,
local_irq_enable();
}
+ clear_siginfo(&info);
info.si_signo = SIGBUS;
info.si_errno = 0;
info.si_code = BUS_ADRALN;
@@ -772,7 +827,6 @@ asmlinkage int __exception do_debug_exception(unsigned long addr,
struct pt_regs *regs)
{
const struct fault_info *inf = debug_fault_info + DBG_ESR_EVT(esr);
- struct siginfo info;
int rv;
/*
@@ -788,6 +842,9 @@ asmlinkage int __exception do_debug_exception(unsigned long addr,
if (!inf->fn(addr, esr, regs)) {
rv = 1;
} else {
+ struct siginfo info;
+
+ clear_siginfo(&info);
info.si_signo = inf->sig;
info.si_errno = 0;
info.si_code = inf->code;
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 9f3c47acf8ff..1b18b4722420 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -646,8 +646,10 @@ static int keep_initrd __initdata;
void __init free_initrd_mem(unsigned long start, unsigned long end)
{
- if (!keep_initrd)
+ if (!keep_initrd) {
free_reserved_area((void *)start, (void *)end, 0, "initrd");
+ memblock_free(__virt_to_phys(start), end - start);
+ }
}
static int __init keepinitrd_setup(char *__unused)
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 2dbb2c9f1ec1..493ff75670ff 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -933,13 +933,15 @@ int pud_set_huge(pud_t *pudp, phys_addr_t phys, pgprot_t prot)
{
pgprot_t sect_prot = __pgprot(PUD_TYPE_SECT |
pgprot_val(mk_sect_prot(prot)));
+ pud_t new_pud = pfn_pud(__phys_to_pfn(phys), sect_prot);
- /* ioremap_page_range doesn't honour BBM */
- if (pud_present(READ_ONCE(*pudp)))
+ /* Only allow permission changes for now */
+ if (!pgattr_change_is_safe(READ_ONCE(pud_val(*pudp)),
+ pud_val(new_pud)))
return 0;
BUG_ON(phys & ~PUD_MASK);
- set_pud(pudp, pfn_pud(__phys_to_pfn(phys), sect_prot));
+ set_pud(pudp, new_pud);
return 1;
}
@@ -947,13 +949,15 @@ int pmd_set_huge(pmd_t *pmdp, phys_addr_t phys, pgprot_t prot)
{
pgprot_t sect_prot = __pgprot(PMD_TYPE_SECT |
pgprot_val(mk_sect_prot(prot)));
+ pmd_t new_pmd = pfn_pmd(__phys_to_pfn(phys), sect_prot);
- /* ioremap_page_range doesn't honour BBM */
- if (pmd_present(READ_ONCE(*pmdp)))
+ /* Only allow permission changes for now */
+ if (!pgattr_change_is_safe(READ_ONCE(pmd_val(*pmdp)),
+ pmd_val(new_pmd)))
return 0;
BUG_ON(phys & ~PMD_MASK);
- set_pmd(pmdp, pfn_pmd(__phys_to_pfn(phys), sect_prot));
+ set_pmd(pmdp, new_pmd);
return 1;
}
diff --git a/arch/c6x/Kconfig b/arch/c6x/Kconfig
index c6b4dd1418b4..bf59855628ac 100644
--- a/arch/c6x/Kconfig
+++ b/arch/c6x/Kconfig
@@ -6,11 +6,13 @@
config C6X
def_bool y
+ select ARCH_HAS_SYNC_DMA_FOR_CPU
+ select ARCH_HAS_SYNC_DMA_FOR_DEVICE
select CLKDEV_LOOKUP
+ select DMA_NONCOHERENT_OPS
select GENERIC_ATOMIC64
select GENERIC_IRQ_SHOW
select HAVE_ARCH_TRACEHOOK
- select HAVE_DMA_API_DEBUG
select HAVE_MEMBLOCK
select SPARSE_IRQ
select IRQ_DOMAIN
diff --git a/arch/c6x/include/asm/Kbuild b/arch/c6x/include/asm/Kbuild
index 24037486317c..33a2c94fed0d 100644
--- a/arch/c6x/include/asm/Kbuild
+++ b/arch/c6x/include/asm/Kbuild
@@ -6,6 +6,7 @@ generic-y += current.h
generic-y += device.h
generic-y += div64.h
generic-y += dma.h
+generic-y += dma-mapping.h
generic-y += emergency-restart.h
generic-y += exec.h
generic-y += extable.h
diff --git a/arch/c6x/include/asm/dma-mapping.h b/arch/c6x/include/asm/dma-mapping.h
deleted file mode 100644
index 05daf1038111..000000000000
--- a/arch/c6x/include/asm/dma-mapping.h
+++ /dev/null
@@ -1,28 +0,0 @@
-/*
- * Port on Texas Instruments TMS320C6x architecture
- *
- * Copyright (C) 2004, 2009, 2010, 2011 Texas Instruments Incorporated
- * Author: Aurelien Jacquiot <aurelien.jacquiot@ti.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- */
-#ifndef _ASM_C6X_DMA_MAPPING_H
-#define _ASM_C6X_DMA_MAPPING_H
-
-extern const struct dma_map_ops c6x_dma_ops;
-
-static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus)
-{
- return &c6x_dma_ops;
-}
-
-extern void coherent_mem_init(u32 start, u32 size);
-void *c6x_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
- gfp_t gfp, unsigned long attrs);
-void c6x_dma_free(struct device *dev, size_t size, void *vaddr,
- dma_addr_t dma_handle, unsigned long attrs);
-
-#endif /* _ASM_C6X_DMA_MAPPING_H */
diff --git a/arch/c6x/include/asm/setup.h b/arch/c6x/include/asm/setup.h
index 852afb209afb..350f34debb19 100644
--- a/arch/c6x/include/asm/setup.h
+++ b/arch/c6x/include/asm/setup.h
@@ -28,5 +28,7 @@ extern unsigned char c6x_fuse_mac[6];
extern void machine_init(unsigned long dt_ptr);
extern void time_init(void);
+extern void coherent_mem_init(u32 start, u32 size);
+
#endif /* !__ASSEMBLY__ */
#endif /* _ASM_C6X_SETUP_H */
diff --git a/arch/c6x/kernel/Makefile b/arch/c6x/kernel/Makefile
index 02f340d7b8fe..fbe74174de87 100644
--- a/arch/c6x/kernel/Makefile
+++ b/arch/c6x/kernel/Makefile
@@ -8,6 +8,6 @@ extra-y := head.o vmlinux.lds
obj-y := process.o traps.o irq.o signal.o ptrace.o
obj-y += setup.o sys_c6x.o time.o devicetree.o
obj-y += switch_to.o entry.o vectors.o c6x_ksyms.o
-obj-y += soc.o dma.o
+obj-y += soc.o
obj-$(CONFIG_MODULES) += module.o
diff --git a/arch/c6x/kernel/dma.c b/arch/c6x/kernel/dma.c
deleted file mode 100644
index 9fff8be75f58..000000000000
--- a/arch/c6x/kernel/dma.c
+++ /dev/null
@@ -1,149 +0,0 @@
-/*
- * Copyright (C) 2011 Texas Instruments Incorporated
- * Author: Mark Salter <msalter@redhat.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-#include <linux/module.h>
-#include <linux/dma-mapping.h>
-#include <linux/mm.h>
-#include <linux/mm_types.h>
-#include <linux/scatterlist.h>
-
-#include <asm/cacheflush.h>
-
-static void c6x_dma_sync(dma_addr_t handle, size_t size,
- enum dma_data_direction dir)
-{
- unsigned long paddr = handle;
-
- BUG_ON(!valid_dma_direction(dir));
-
- switch (dir) {
- case DMA_FROM_DEVICE:
- L2_cache_block_invalidate(paddr, paddr + size);
- break;
- case DMA_TO_DEVICE:
- L2_cache_block_writeback(paddr, paddr + size);
- break;
- case DMA_BIDIRECTIONAL:
- L2_cache_block_writeback_invalidate(paddr, paddr + size);
- break;
- default:
- break;
- }
-}
-
-static dma_addr_t c6x_dma_map_page(struct device *dev, struct page *page,
- unsigned long offset, size_t size, enum dma_data_direction dir,
- unsigned long attrs)
-{
- dma_addr_t handle = virt_to_phys(page_address(page) + offset);
-
- if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
- c6x_dma_sync(handle, size, dir);
-
- return handle;
-}
-
-static void c6x_dma_unmap_page(struct device *dev, dma_addr_t handle,
- size_t size, enum dma_data_direction dir, unsigned long attrs)
-{
- if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
- c6x_dma_sync(handle, size, dir);
-}
-
-static int c6x_dma_map_sg(struct device *dev, struct scatterlist *sglist,
- int nents, enum dma_data_direction dir, unsigned long attrs)
-{
- struct scatterlist *sg;
- int i;
-
- for_each_sg(sglist, sg, nents, i) {
- sg->dma_address = sg_phys(sg);
- if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
- c6x_dma_sync(sg->dma_address, sg->length, dir);
- }
-
- return nents;
-}
-
-static void c6x_dma_unmap_sg(struct device *dev, struct scatterlist *sglist,
- int nents, enum dma_data_direction dir, unsigned long attrs)
-{
- struct scatterlist *sg;
- int i;
-
- if (attrs & DMA_ATTR_SKIP_CPU_SYNC)
- return;
-
- for_each_sg(sglist, sg, nents, i)
- c6x_dma_sync(sg_dma_address(sg), sg->length, dir);
-}
-
-static void c6x_dma_sync_single_for_cpu(struct device *dev, dma_addr_t handle,
- size_t size, enum dma_data_direction dir)
-{
- c6x_dma_sync(handle, size, dir);
-
-}
-
-static void c6x_dma_sync_single_for_device(struct device *dev,
- dma_addr_t handle, size_t size, enum dma_data_direction dir)
-{
- c6x_dma_sync(handle, size, dir);
-
-}
-
-static void c6x_dma_sync_sg_for_cpu(struct device *dev,
- struct scatterlist *sglist, int nents,
- enum dma_data_direction dir)
-{
- struct scatterlist *sg;
- int i;
-
- for_each_sg(sglist, sg, nents, i)
- c6x_dma_sync_single_for_cpu(dev, sg_dma_address(sg),
- sg->length, dir);
-
-}
-
-static void c6x_dma_sync_sg_for_device(struct device *dev,
- struct scatterlist *sglist, int nents,
- enum dma_data_direction dir)
-{
- struct scatterlist *sg;
- int i;
-
- for_each_sg(sglist, sg, nents, i)
- c6x_dma_sync_single_for_device(dev, sg_dma_address(sg),
- sg->length, dir);
-
-}
-
-const struct dma_map_ops c6x_dma_ops = {
- .alloc = c6x_dma_alloc,
- .free = c6x_dma_free,
- .map_page = c6x_dma_map_page,
- .unmap_page = c6x_dma_unmap_page,
- .map_sg = c6x_dma_map_sg,
- .unmap_sg = c6x_dma_unmap_sg,
- .sync_single_for_device = c6x_dma_sync_single_for_device,
- .sync_single_for_cpu = c6x_dma_sync_single_for_cpu,
- .sync_sg_for_device = c6x_dma_sync_sg_for_device,
- .sync_sg_for_cpu = c6x_dma_sync_sg_for_cpu,
-};
-EXPORT_SYMBOL(c6x_dma_ops);
-
-/* Number of entries preallocated for DMA-API debugging */
-#define PREALLOC_DMA_DEBUG_ENTRIES (1 << 16)
-
-static int __init dma_init(void)
-{
- dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES);
-
- return 0;
-}
-fs_initcall(dma_init);
diff --git a/arch/c6x/kernel/traps.c b/arch/c6x/kernel/traps.c
index 4c1d4b84dd2b..5c60aea3b75a 100644
--- a/arch/c6x/kernel/traps.c
+++ b/arch/c6x/kernel/traps.c
@@ -244,7 +244,6 @@ static struct exception_info eexcept_table[128] = {
static void do_trap(struct exception_info *except_info, struct pt_regs *regs)
{
unsigned long addr = instruction_pointer(regs);
- siginfo_t info;
if (except_info->code != TRAP_BRKPT)
pr_err("TRAP: %s PC[0x%lx] signo[%d] code[%d]\n",
@@ -253,12 +252,8 @@ static void do_trap(struct exception_info *except_info, struct pt_regs *regs)
die_if_kernel(except_info->kernel_str, regs, addr);
- info.si_signo = except_info->signo;
- info.si_errno = 0;
- info.si_code = except_info->code;
- info.si_addr = (void __user *)addr;
-
- force_sig_info(except_info->signo, &info, current);
+ force_sig_fault(except_info->signo, except_info->code,
+ (void __user *)addr, current);
}
/*
diff --git a/arch/c6x/mm/dma-coherent.c b/arch/c6x/mm/dma-coherent.c
index 95e38ad27c69..d0a8e0c4b27e 100644
--- a/arch/c6x/mm/dma-coherent.c
+++ b/arch/c6x/mm/dma-coherent.c
@@ -19,10 +19,12 @@
#include <linux/bitops.h>
#include <linux/module.h>
#include <linux/interrupt.h>
-#include <linux/dma-mapping.h>
+#include <linux/dma-noncoherent.h>
#include <linux/memblock.h>
+#include <asm/cacheflush.h>
#include <asm/page.h>
+#include <asm/setup.h>
/*
* DMA coherent memory management, can be redefined using the memdma=
@@ -73,7 +75,7 @@ static void __free_dma_pages(u32 addr, int order)
* Allocate DMA coherent memory space and return both the kernel
* virtual and DMA address for that space.
*/
-void *c6x_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
+void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
gfp_t gfp, unsigned long attrs)
{
u32 paddr;
@@ -98,7 +100,7 @@ void *c6x_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
/*
* Free DMA coherent memory as defined by the above mapping.
*/
-void c6x_dma_free(struct device *dev, size_t size, void *vaddr,
+void arch_dma_free(struct device *dev, size_t size, void *vaddr,
dma_addr_t dma_handle, unsigned long attrs)
{
int order;
@@ -139,3 +141,35 @@ void __init coherent_mem_init(phys_addr_t start, u32 size)
dma_bitmap = phys_to_virt(bitmap_phys);
memset(dma_bitmap, 0, dma_pages * PAGE_SIZE);
}
+
+static void c6x_dma_sync(struct device *dev, phys_addr_t paddr, size_t size,
+ enum dma_data_direction dir)
+{
+ BUG_ON(!valid_dma_direction(dir));
+
+ switch (dir) {
+ case DMA_FROM_DEVICE:
+ L2_cache_block_invalidate(paddr, paddr + size);
+ break;
+ case DMA_TO_DEVICE:
+ L2_cache_block_writeback(paddr, paddr + size);
+ break;
+ case DMA_BIDIRECTIONAL:
+ L2_cache_block_writeback_invalidate(paddr, paddr + size);
+ break;
+ default:
+ break;
+ }
+}
+
+void arch_sync_dma_for_device(struct device *dev, phys_addr_t paddr,
+ size_t size, enum dma_data_direction dir)
+{
+ return c6x_dma_sync(dev, paddr, size, dir);
+}
+
+void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr,
+ size_t size, enum dma_data_direction dir)
+{
+ return c6x_dma_sync(dev, paddr, size, dir);
+}
diff --git a/arch/h8300/include/asm/pci.h b/arch/h8300/include/asm/pci.h
index 7c9e55d62215..d4d345a52092 100644
--- a/arch/h8300/include/asm/pci.h
+++ b/arch/h8300/include/asm/pci.h
@@ -15,6 +15,4 @@ static inline void pcibios_penalize_isa_irq(int irq, int active)
/* We don't do dynamic PCI IRQ allocation */
}
-#define PCI_DMA_BUS_IS_PHYS (1)
-
#endif /* _ASM_H8300_PCI_H */
diff --git a/arch/hexagon/Kconfig b/arch/hexagon/Kconfig
index 76d2f20d525e..37adb2003033 100644
--- a/arch/hexagon/Kconfig
+++ b/arch/hexagon/Kconfig
@@ -19,6 +19,7 @@ config HEXAGON
select GENERIC_IRQ_SHOW
select HAVE_ARCH_KGDB
select HAVE_ARCH_TRACEHOOK
+ select NEED_SG_DMA_LENGTH
select NO_IOPORT_MAP
select GENERIC_IOMAP
select GENERIC_SMP_IDLE_THREAD
@@ -63,9 +64,6 @@ config GENERIC_CSUM
config GENERIC_IRQ_PROBE
def_bool y
-config NEED_SG_DMA_LENGTH
- def_bool y
-
config RWSEM_GENERIC_SPINLOCK
def_bool n
diff --git a/arch/hexagon/include/asm/io.h b/arch/hexagon/include/asm/io.h
index 9e8621d94ee9..e17262ad125e 100644
--- a/arch/hexagon/include/asm/io.h
+++ b/arch/hexagon/include/asm/io.h
@@ -216,6 +216,12 @@ static inline void memcpy_toio(volatile void __iomem *dst, const void *src,
memcpy((void *) dst, src, count);
}
+static inline void memset_io(volatile void __iomem *addr, int value,
+ size_t size)
+{
+ memset((void __force *)addr, value, size);
+}
+
#define PCI_IO_ADDR (volatile void __iomem *)
/*
diff --git a/arch/hexagon/kernel/dma.c b/arch/hexagon/kernel/dma.c
index ad8347c29dcf..77459df34e2e 100644
--- a/arch/hexagon/kernel/dma.c
+++ b/arch/hexagon/kernel/dma.c
@@ -208,7 +208,6 @@ const struct dma_map_ops hexagon_dma_ops = {
.sync_single_for_cpu = hexagon_sync_single_for_cpu,
.sync_single_for_device = hexagon_sync_single_for_device,
.mapping_error = hexagon_mapping_error,
- .is_phys = 1,
};
void __init hexagon_dma_init(void)
diff --git a/arch/hexagon/kernel/traps.c b/arch/hexagon/kernel/traps.c
index 2942a9204a9a..91ee04842c22 100644
--- a/arch/hexagon/kernel/traps.c
+++ b/arch/hexagon/kernel/traps.c
@@ -412,10 +412,6 @@ void do_trap0(struct pt_regs *regs)
case TRAP_DEBUG:
/* Trap0 0xdb is debug breakpoint */
if (user_mode(regs)) {
- struct siginfo info;
-
- info.si_signo = SIGTRAP;
- info.si_errno = 0;
/*
* Some architecures add some per-thread state
* to distinguish between breakpoint traps and
@@ -423,9 +419,8 @@ void do_trap0(struct pt_regs *regs)
* set the si_code value appropriately, or we
* may want to use a different trap0 flavor.
*/
- info.si_code = TRAP_BRKPT;
- info.si_addr = (void __user *) pt_elr(regs);
- force_sig_info(SIGTRAP, &info, current);
+ force_sig_fault(SIGTRAP, TRAP_BRKPT,
+ (void __user *) pt_elr(regs), current);
} else {
#ifdef CONFIG_KGDB
kgdb_handle_exception(pt_cause(regs), SIGTRAP,
diff --git a/arch/hexagon/lib/checksum.c b/arch/hexagon/lib/checksum.c
index 617506d1a559..7cd0a2259269 100644
--- a/arch/hexagon/lib/checksum.c
+++ b/arch/hexagon/lib/checksum.c
@@ -199,3 +199,4 @@ csum_partial_copy_nocheck(const void *src, void *dst, int len, __wsum sum)
memcpy(dst, src, len);
return csum_partial(dst, len, sum);
}
+EXPORT_SYMBOL(csum_partial_copy_nocheck);
diff --git a/arch/hexagon/mm/vm_fault.c b/arch/hexagon/mm/vm_fault.c
index 3eec33c5cfd7..933bbcef5363 100644
--- a/arch/hexagon/mm/vm_fault.c
+++ b/arch/hexagon/mm/vm_fault.c
@@ -50,7 +50,7 @@ void do_page_fault(unsigned long address, long cause, struct pt_regs *regs)
{
struct vm_area_struct *vma;
struct mm_struct *mm = current->mm;
- siginfo_t info;
+ int si_signo;
int si_code = SEGV_MAPERR;
int fault;
const struct exception_table_entry *fixup;
@@ -140,28 +140,22 @@ good_area:
* unable to fix up the page fault.
*/
if (fault & VM_FAULT_SIGBUS) {
- info.si_signo = SIGBUS;
- info.si_code = BUS_ADRERR;
+ si_signo = SIGBUS;
+ si_code = BUS_ADRERR;
}
/* Address is not in the memory map */
else {
- info.si_signo = SIGSEGV;
- info.si_code = SEGV_ACCERR;
+ si_signo = SIGSEGV;
+ si_code = SEGV_ACCERR;
}
- info.si_errno = 0;
- info.si_addr = (void __user *)address;
- force_sig_info(info.si_signo, &info, current);
+ force_sig_fault(si_signo, si_code, (void __user *)address, current);
return;
bad_area:
up_read(&mm->mmap_sem);
if (user_mode(regs)) {
- info.si_signo = SIGSEGV;
- info.si_errno = 0;
- info.si_code = si_code;
- info.si_addr = (void *)address;
- force_sig_info(info.si_signo, &info, current);
+ force_sig_fault(SIGSEGV, si_code, (void __user *)address, current);
return;
}
/* Kernel-mode fault falls through */
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index bbe12a038d21..792437d526c6 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -29,7 +29,6 @@ config IA64
select HAVE_FUNCTION_TRACER
select TTY
select HAVE_ARCH_TRACEHOOK
- select HAVE_DMA_API_DEBUG
select HAVE_MEMBLOCK
select HAVE_MEMBLOCK_NODE_MAP
select HAVE_VIRT_CPU_ACCOUNTING
@@ -54,6 +53,8 @@ config IA64
select MODULES_USE_ELF_RELA
select ARCH_USE_CMPXCHG_LOCKREF
select HAVE_ARCH_AUDITSYSCALL
+ select NEED_DMA_MAP_STATE
+ select NEED_SG_DMA_LENGTH
default y
help
The Itanium Processor Family is Intel's 64-bit successor to
@@ -78,18 +79,6 @@ config MMU
bool
default y
-config ARCH_DMA_ADDR_T_64BIT
- def_bool y
-
-config NEED_DMA_MAP_STATE
- def_bool y
-
-config NEED_SG_DMA_LENGTH
- def_bool y
-
-config SWIOTLB
- bool
-
config STACKTRACE_SUPPORT
def_bool y
@@ -146,7 +135,6 @@ config IA64_GENERIC
bool "generic"
select NUMA
select ACPI_NUMA
- select DMA_DIRECT_OPS
select SWIOTLB
select PCI_MSI
help
@@ -167,7 +155,6 @@ config IA64_GENERIC
config IA64_DIG
bool "DIG-compliant"
- select DMA_DIRECT_OPS
select SWIOTLB
config IA64_DIG_VTD
@@ -183,7 +170,6 @@ config IA64_HP_ZX1
config IA64_HP_ZX1_SWIOTLB
bool "HP-zx1/sx1000 with software I/O TLB"
- select DMA_DIRECT_OPS
select SWIOTLB
help
Build a kernel that runs on HP zx1 and sx1000 systems even when they
@@ -207,7 +193,6 @@ config IA64_SGI_UV
bool "SGI-UV"
select NUMA
select ACPI_NUMA
- select DMA_DIRECT_OPS
select SWIOTLB
help
Selecting this option will optimize the kernel for use on UV based
@@ -218,7 +203,6 @@ config IA64_SGI_UV
config IA64_HP_SIM
bool "Ski-simulator"
- select DMA_DIRECT_OPS
select SWIOTLB
depends on !PM
@@ -397,7 +381,7 @@ config ARCH_DISCONTIGMEM_ENABLE
Say Y to support efficient handling of discontiguous physical memory,
for architectures which are either NUMA (Non-Uniform Memory Access)
or have huge holes in the physical address space for other reasons.
- See <file:Documentation/vm/numa> for more.
+ See <file:Documentation/vm/numa.rst> for more.
config ARCH_FLATMEM_ENABLE
def_bool y
@@ -613,6 +597,3 @@ source "security/Kconfig"
source "crypto/Kconfig"
source "lib/Kconfig"
-
-config IOMMU_HELPER
- def_bool (IA64_HP_ZX1 || IA64_HP_ZX1_SWIOTLB || IA64_GENERIC || SWIOTLB)
diff --git a/arch/ia64/hp/common/sba_iommu.c b/arch/ia64/hp/common/sba_iommu.c
index aec4a3354abe..ee5b652d320a 100644
--- a/arch/ia64/hp/common/sba_iommu.c
+++ b/arch/ia64/hp/common/sba_iommu.c
@@ -1845,9 +1845,6 @@ static void ioc_init(unsigned long hpa, struct ioc *ioc)
ioc_resource_init(ioc);
ioc_sac_init(ioc);
- if ((long) ~iovp_mask > (long) ia64_max_iommu_merge_mask)
- ia64_max_iommu_merge_mask = ~iovp_mask;
-
printk(KERN_INFO PFX
"%s %d.%d HPA 0x%lx IOVA space %dMb at 0x%lx\n",
ioc->name, (ioc->rev >> 4) & 0xF, ioc->rev & 0xF,
@@ -1942,19 +1939,6 @@ static const struct seq_operations ioc_seq_ops = {
.show = ioc_show
};
-static int
-ioc_open(struct inode *inode, struct file *file)
-{
- return seq_open(file, &ioc_seq_ops);
-}
-
-static const struct file_operations ioc_fops = {
- .open = ioc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release
-};
-
static void __init
ioc_proc_init(void)
{
@@ -1964,7 +1948,7 @@ ioc_proc_init(void)
if (!dir)
return;
- proc_create(ioc_list->name, 0, dir, &ioc_fops);
+ proc_create_seq(ioc_list->name, 0, dir, &ioc_seq_ops);
}
#endif
diff --git a/arch/ia64/hp/sim/simserial.c b/arch/ia64/hp/sim/simserial.c
index a419ccf33cde..663388a73d4e 100644
--- a/arch/ia64/hp/sim/simserial.c
+++ b/arch/ia64/hp/sim/simserial.c
@@ -435,19 +435,6 @@ static int rs_proc_show(struct seq_file *m, void *v)
return 0;
}
-static int rs_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, rs_proc_show, NULL);
-}
-
-static const struct file_operations rs_proc_fops = {
- .owner = THIS_MODULE,
- .open = rs_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
static const struct tty_operations hp_ops = {
.open = rs_open,
.close = rs_close,
@@ -462,7 +449,7 @@ static const struct tty_operations hp_ops = {
.unthrottle = rs_unthrottle,
.send_xchar = rs_send_xchar,
.hangup = rs_hangup,
- .proc_fops = &rs_proc_fops,
+ .proc_show = rs_proc_show,
};
static const struct tty_port_operations hp_port_ops = {
diff --git a/arch/ia64/include/asm/hardirq.h b/arch/ia64/include/asm/hardirq.h
index bdc4669c71c3..ccde7c2ba00f 100644
--- a/arch/ia64/include/asm/hardirq.h
+++ b/arch/ia64/include/asm/hardirq.h
@@ -13,7 +13,7 @@
#define __ARCH_IRQ_STAT 1
-#define local_softirq_pending() (local_cpu_data->softirq_pending)
+#define local_softirq_pending_ref ia64_cpu_info.softirq_pending
#include <linux/threads.h>
#include <linux/irq.h>
diff --git a/arch/ia64/include/asm/pci.h b/arch/ia64/include/asm/pci.h
index b1d04e8bafc8..780e8744ba85 100644
--- a/arch/ia64/include/asm/pci.h
+++ b/arch/ia64/include/asm/pci.h
@@ -30,23 +30,6 @@ struct pci_vector_struct {
#define PCIBIOS_MIN_IO 0x1000
#define PCIBIOS_MIN_MEM 0x10000000
-/*
- * PCI_DMA_BUS_IS_PHYS should be set to 1 if there is _necessarily_ a direct
- * correspondence between device bus addresses and CPU physical addresses.
- * Platforms with a hardware I/O MMU _must_ turn this off to suppress the
- * bounce buffer handling code in the block and network device layers.
- * Platforms with separate bus address spaces _must_ turn this off and provide
- * a device DMA mapping implementation that takes care of the necessary
- * address translation.
- *
- * For now, the ia64 platforms which may have separate/multiple bus address
- * spaces all have I/O MMUs which support the merging of physically
- * discontiguous buffers, so we can use that as the sole factor to determine
- * the setting of PCI_DMA_BUS_IS_PHYS.
- */
-extern unsigned long ia64_max_iommu_merge_mask;
-#define PCI_DMA_BUS_IS_PHYS (ia64_max_iommu_merge_mask == ~0UL)
-
#define HAVE_PCI_MMAP
#define ARCH_GENERIC_PCI_MMAP_RESOURCE
#define arch_can_pci_mmap_wc() 1
diff --git a/arch/ia64/include/uapi/asm/siginfo.h b/arch/ia64/include/uapi/asm/siginfo.h
index 5aa454ed89db..52b5af424511 100644
--- a/arch/ia64/include/uapi/asm/siginfo.h
+++ b/arch/ia64/include/uapi/asm/siginfo.h
@@ -27,11 +27,4 @@
#define __ISR_VALID_BIT 0
#define __ISR_VALID (1 << __ISR_VALID_BIT)
-/*
- * SIGFPE si_codes
- */
-#ifdef __KERNEL__
-#define FPE_FIXME 0 /* Broken dup of SI_USER */
-#endif /* __KERNEL__ */
-
#endif /* _UAPI_ASM_IA64_SIGINFO_H */
diff --git a/arch/ia64/kernel/brl_emu.c b/arch/ia64/kernel/brl_emu.c
index 9bcc908bc85e..a61f6c6a36f8 100644
--- a/arch/ia64/kernel/brl_emu.c
+++ b/arch/ia64/kernel/brl_emu.c
@@ -62,6 +62,7 @@ ia64_emulate_brl (struct pt_regs *regs, unsigned long ar_ec)
struct illegal_op_return rv;
long tmp_taken, unimplemented_address;
+ clear_siginfo(&siginfo);
rv.fkt = (unsigned long) -1;
/*
diff --git a/arch/ia64/kernel/dma-mapping.c b/arch/ia64/kernel/dma-mapping.c
index f2d57e66fd86..7a471d8d67d4 100644
--- a/arch/ia64/kernel/dma-mapping.c
+++ b/arch/ia64/kernel/dma-mapping.c
@@ -9,16 +9,6 @@ int iommu_detected __read_mostly;
const struct dma_map_ops *dma_ops;
EXPORT_SYMBOL(dma_ops);
-#define PREALLOC_DMA_DEBUG_ENTRIES (1 << 16)
-
-static int __init dma_init(void)
-{
- dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES);
-
- return 0;
-}
-fs_initcall(dma_init);
-
const struct dma_map_ops *dma_get_ops(struct device *dev)
{
return dma_ops;
diff --git a/arch/ia64/kernel/palinfo.c b/arch/ia64/kernel/palinfo.c
index b6e597860888..f4a94241265c 100644
--- a/arch/ia64/kernel/palinfo.c
+++ b/arch/ia64/kernel/palinfo.c
@@ -920,18 +920,6 @@ static int proc_palinfo_show(struct seq_file *m, void *v)
return 0;
}
-static int proc_palinfo_open(struct inode *inode, struct file *file)
-{
- return single_open(file, proc_palinfo_show, PDE_DATA(inode));
-}
-
-static const struct file_operations proc_palinfo_fops = {
- .open = proc_palinfo_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
static int palinfo_add_proc(unsigned int cpu)
{
pal_func_cpu_u_t f;
@@ -948,8 +936,8 @@ static int palinfo_add_proc(unsigned int cpu)
for (j=0; j < NR_PALINFO_ENTRIES; j++) {
f.func_id = j;
- proc_create_data(palinfo_entries[j].name, 0, cpu_dir,
- &proc_palinfo_fops, (void *)f.value);
+ proc_create_single_data(palinfo_entries[j].name, 0, cpu_dir,
+ proc_palinfo_show, (void *)f.value);
}
return 0;
}
diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c
index 8fb280e33114..3b38c717008a 100644
--- a/arch/ia64/kernel/perfmon.c
+++ b/arch/ia64/kernel/perfmon.c
@@ -5708,13 +5708,6 @@ const struct seq_operations pfm_seq_ops = {
.show = pfm_proc_show
};
-static int
-pfm_proc_open(struct inode *inode, struct file *file)
-{
- return seq_open(file, &pfm_seq_ops);
-}
-
-
/*
* we come here as soon as local_cpu_data->pfm_syst_wide is set. this happens
* during pfm_enable() hence before pfm_start(). We cannot assume monitoring
@@ -6537,13 +6530,6 @@ found:
return 0;
}
-static const struct file_operations pfm_proc_fops = {
- .open = pfm_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
int __init
pfm_init(void)
{
@@ -6615,7 +6601,7 @@ pfm_init(void)
/*
* create /proc/perfmon (mostly for debugging purposes)
*/
- perfmon_dir = proc_create("perfmon", S_IRUGO, NULL, &pfm_proc_fops);
+ perfmon_dir = proc_create_seq("perfmon", S_IRUGO, NULL, &pfm_seq_ops);
if (perfmon_dir == NULL) {
printk(KERN_ERR "perfmon: cannot create /proc entry, perfmon disabled\n");
pmu_conf = NULL;
diff --git a/arch/ia64/kernel/salinfo.c b/arch/ia64/kernel/salinfo.c
index 52c404b08904..aba1f463a8dd 100644
--- a/arch/ia64/kernel/salinfo.c
+++ b/arch/ia64/kernel/salinfo.c
@@ -54,8 +54,6 @@ MODULE_AUTHOR("Jesse Barnes <jbarnes@sgi.com>");
MODULE_DESCRIPTION("/proc interface to IA-64 SAL features");
MODULE_LICENSE("GPL");
-static const struct file_operations proc_salinfo_fops;
-
typedef struct {
const char *name; /* name of the proc entry */
unsigned long feature; /* feature bit */
@@ -578,6 +576,17 @@ static int salinfo_cpu_pre_down(unsigned int cpu)
return 0;
}
+/*
+ * 'data' contains an integer that corresponds to the feature we're
+ * testing
+ */
+static int proc_salinfo_show(struct seq_file *m, void *v)
+{
+ unsigned long data = (unsigned long)v;
+ seq_puts(m, (sal_platform_features & data) ? "1\n" : "0\n");
+ return 0;
+}
+
static int __init
salinfo_init(void)
{
@@ -593,9 +602,9 @@ salinfo_init(void)
for (i=0; i < NR_SALINFO_ENTRIES; i++) {
/* pass the feature bit in question as misc data */
- *sdir++ = proc_create_data(salinfo_entries[i].name, 0, salinfo_dir,
- &proc_salinfo_fops,
- (void *)salinfo_entries[i].feature);
+ *sdir++ = proc_create_single_data(salinfo_entries[i].name, 0,
+ salinfo_dir, proc_salinfo_show,
+ (void *)salinfo_entries[i].feature);
}
for (i = 0; i < ARRAY_SIZE(salinfo_log_name); i++) {
@@ -633,27 +642,4 @@ salinfo_init(void)
return 0;
}
-/*
- * 'data' contains an integer that corresponds to the feature we're
- * testing
- */
-static int proc_salinfo_show(struct seq_file *m, void *v)
-{
- unsigned long data = (unsigned long)v;
- seq_puts(m, (sal_platform_features & data) ? "1\n" : "0\n");
- return 0;
-}
-
-static int proc_salinfo_open(struct inode *inode, struct file *file)
-{
- return single_open(file, proc_salinfo_show, PDE_DATA(inode));
-}
-
-static const struct file_operations proc_salinfo_fops = {
- .open = proc_salinfo_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
module_init(salinfo_init);
diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c
index dee56bcb993d..ad43cbf70628 100644
--- a/arch/ia64/kernel/setup.c
+++ b/arch/ia64/kernel/setup.c
@@ -124,18 +124,6 @@ unsigned long ia64_i_cache_stride_shift = ~0;
unsigned long ia64_cache_stride_shift = ~0;
/*
- * The merge_mask variable needs to be set to (max(iommu_page_size(iommu)) - 1). This
- * mask specifies a mask of address bits that must be 0 in order for two buffers to be
- * mergeable by the I/O MMU (i.e., the end address of the first buffer and the start
- * address of the second buffer must be aligned to (merge_mask+1) in order to be
- * mergeable). By default, we assume there is no I/O MMU which can merge physically
- * discontiguous buffers, so we set the merge_mask to ~0UL, which corresponds to a iommu
- * page-size of 2^64.
- */
-unsigned long ia64_max_iommu_merge_mask = ~0UL;
-EXPORT_SYMBOL(ia64_max_iommu_merge_mask);
-
-/*
* We use a special marker for the end of memory and it uses the extra (+1) slot
*/
struct rsvd_region rsvd_region[IA64_MAX_RSVD_REGIONS + 1] __initdata;
diff --git a/arch/ia64/kernel/signal.c b/arch/ia64/kernel/signal.c
index 54547c7cf8a2..d1234a5ba4c5 100644
--- a/arch/ia64/kernel/signal.c
+++ b/arch/ia64/kernel/signal.c
@@ -153,6 +153,7 @@ ia64_rt_sigreturn (struct sigscratch *scr)
return retval;
give_sigsegv:
+ clear_siginfo(&si);
si.si_signo = SIGSEGV;
si.si_errno = 0;
si.si_code = SI_KERNEL;
@@ -236,6 +237,7 @@ force_sigsegv_info (int sig, void __user *addr)
unsigned long flags;
struct siginfo si;
+ clear_siginfo(&si);
if (sig == SIGSEGV) {
/*
* Acquiring siglock around the sa_handler-update is almost
diff --git a/arch/ia64/kernel/traps.c b/arch/ia64/kernel/traps.c
index 6d4e76a4267f..c6f4932073a1 100644
--- a/arch/ia64/kernel/traps.c
+++ b/arch/ia64/kernel/traps.c
@@ -104,6 +104,7 @@ __kprobes ia64_bad_break (unsigned long break_num, struct pt_regs *regs)
int sig, code;
/* SIGILL, SIGFPE, SIGSEGV, and SIGBUS want these field initialized: */
+ clear_siginfo(&siginfo);
siginfo.si_addr = (void __user *) (regs->cr_iip + ia64_psr(regs)->ri);
siginfo.si_imm = break_num;
siginfo.si_flags = 0; /* clear __ISR_VALID */
@@ -293,7 +294,6 @@ handle_fpu_swa (int fp_fault, struct pt_regs *regs, unsigned long isr)
{
long exception, bundle[2];
unsigned long fault_ip;
- struct siginfo siginfo;
fault_ip = regs->cr_iip;
if (!fp_fault && (ia64_psr(regs)->ri == 0))
@@ -344,13 +344,16 @@ handle_fpu_swa (int fp_fault, struct pt_regs *regs, unsigned long isr)
printk(KERN_ERR "handle_fpu_swa: fp_emulate() returned -1\n");
return -1;
} else {
+ struct siginfo siginfo;
+
/* is next instruction a trap? */
if (exception & 2) {
ia64_increment_ip(regs);
}
+ clear_siginfo(&siginfo);
siginfo.si_signo = SIGFPE;
siginfo.si_errno = 0;
- siginfo.si_code = FPE_FIXME; /* default code */
+ siginfo.si_code = FPE_FLTUNK; /* default code */
siginfo.si_addr = (void __user *) (regs->cr_iip + ia64_psr(regs)->ri);
if (isr & 0x11) {
siginfo.si_code = FPE_FLTINV;
@@ -372,9 +375,12 @@ handle_fpu_swa (int fp_fault, struct pt_regs *regs, unsigned long isr)
return -1;
} else if (exception != 0) {
/* raise exception */
+ struct siginfo siginfo;
+
+ clear_siginfo(&siginfo);
siginfo.si_signo = SIGFPE;
siginfo.si_errno = 0;
- siginfo.si_code = FPE_FIXME; /* default code */
+ siginfo.si_code = FPE_FLTUNK; /* default code */
siginfo.si_addr = (void __user *) (regs->cr_iip + ia64_psr(regs)->ri);
if (isr & 0x880) {
siginfo.si_code = FPE_FLTOVF;
@@ -420,7 +426,7 @@ ia64_illegal_op_fault (unsigned long ec, long arg1, long arg2, long arg3,
if (die_if_kernel(buf, &regs, 0))
return rv;
- memset(&si, 0, sizeof(si));
+ clear_siginfo(&si);
si.si_signo = SIGILL;
si.si_code = ILL_ILLOPC;
si.si_addr = (void __user *) (regs.cr_iip + ia64_psr(&regs)->ri);
@@ -434,7 +440,6 @@ ia64_fault (unsigned long vector, unsigned long isr, unsigned long ifa,
long arg7, struct pt_regs regs)
{
unsigned long code, error = isr, iip;
- struct siginfo siginfo;
char buf[128];
int result, sig;
static const char *reason[] = {
@@ -485,6 +490,7 @@ ia64_fault (unsigned long vector, unsigned long isr, unsigned long ifa,
case 26: /* NaT Consumption */
if (user_mode(&regs)) {
+ struct siginfo siginfo;
void __user *addr;
if (((isr >> 4) & 0xf) == 2) {
@@ -499,6 +505,7 @@ ia64_fault (unsigned long vector, unsigned long isr, unsigned long ifa,
addr = (void __user *) (regs.cr_iip
+ ia64_psr(&regs)->ri);
}
+ clear_siginfo(&siginfo);
siginfo.si_signo = sig;
siginfo.si_code = code;
siginfo.si_errno = 0;
@@ -515,6 +522,9 @@ ia64_fault (unsigned long vector, unsigned long isr, unsigned long ifa,
case 31: /* Unsupported Data Reference */
if (user_mode(&regs)) {
+ struct siginfo siginfo;
+
+ clear_siginfo(&siginfo);
siginfo.si_signo = SIGILL;
siginfo.si_code = ILL_ILLOPN;
siginfo.si_errno = 0;
@@ -531,6 +541,10 @@ ia64_fault (unsigned long vector, unsigned long isr, unsigned long ifa,
case 29: /* Debug */
case 35: /* Taken Branch Trap */
case 36: /* Single Step Trap */
+ {
+ struct siginfo siginfo;
+
+ clear_siginfo(&siginfo);
if (fsys_mode(current, &regs)) {
extern char __kernel_syscall_via_break[];
/*
@@ -578,11 +592,15 @@ ia64_fault (unsigned long vector, unsigned long isr, unsigned long ifa,
siginfo.si_isr = isr;
force_sig_info(SIGTRAP, &siginfo, current);
return;
+ }
case 32: /* fp fault */
case 33: /* fp trap */
result = handle_fpu_swa((vector == 32) ? 1 : 0, &regs, isr);
if ((result < 0) || (current->thread.flags & IA64_THREAD_FPEMU_SIGFPE)) {
+ struct siginfo siginfo;
+
+ clear_siginfo(&siginfo);
siginfo.si_signo = SIGFPE;
siginfo.si_errno = 0;
siginfo.si_code = FPE_FLTINV;
@@ -616,6 +634,9 @@ ia64_fault (unsigned long vector, unsigned long isr, unsigned long ifa,
} else {
/* Unimplemented Instr. Address Trap */
if (user_mode(&regs)) {
+ struct siginfo siginfo;
+
+ clear_siginfo(&siginfo);
siginfo.si_signo = SIGILL;
siginfo.si_code = ILL_BADIADDR;
siginfo.si_errno = 0;
diff --git a/arch/ia64/kernel/unaligned.c b/arch/ia64/kernel/unaligned.c
index 72e9b4242564..e309f9859acc 100644
--- a/arch/ia64/kernel/unaligned.c
+++ b/arch/ia64/kernel/unaligned.c
@@ -1537,6 +1537,7 @@ ia64_handle_unaligned (unsigned long ifa, struct pt_regs *regs)
/* NOT_REACHED */
}
force_sigbus:
+ clear_siginfo(&si);
si.si_signo = SIGBUS;
si.si_errno = 0;
si.si_code = BUS_ADRALN;
diff --git a/arch/ia64/mm/fault.c b/arch/ia64/mm/fault.c
index dfdc152d6737..817fa120645f 100644
--- a/arch/ia64/mm/fault.c
+++ b/arch/ia64/mm/fault.c
@@ -85,7 +85,6 @@ ia64_do_page_fault (unsigned long address, unsigned long isr, struct pt_regs *re
int signal = SIGSEGV, code = SEGV_MAPERR;
struct vm_area_struct *vma, *prev_vma;
struct mm_struct *mm = current->mm;
- struct siginfo si;
unsigned long mask;
int fault;
unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
@@ -249,6 +248,9 @@ retry:
return;
}
if (user_mode(regs)) {
+ struct siginfo si;
+
+ clear_siginfo(&si);
si.si_signo = signal;
si.si_errno = 0;
si.si_code = code;
diff --git a/arch/ia64/sn/kernel/io_common.c b/arch/ia64/sn/kernel/io_common.c
index 11f2275570fb..8479e9a7ce16 100644
--- a/arch/ia64/sn/kernel/io_common.c
+++ b/arch/ia64/sn/kernel/io_common.c
@@ -480,11 +480,6 @@ sn_io_early_init(void)
tioca_init_provider();
tioce_init_provider();
- /*
- * This is needed to avoid bounce limit checks in the blk layer
- */
- ia64_max_iommu_merge_mask = ~PAGE_MASK;
-
sn_irq_lh_init();
INIT_LIST_HEAD(&sn_sysdata_list);
sn_init_cpei_timer();
diff --git a/arch/ia64/sn/kernel/sn2/prominfo_proc.c b/arch/ia64/sn/kernel/sn2/prominfo_proc.c
index ec4de2b09653..e15457bf21ac 100644
--- a/arch/ia64/sn/kernel/sn2/prominfo_proc.c
+++ b/arch/ia64/sn/kernel/sn2/prominfo_proc.c
@@ -140,18 +140,6 @@ static int proc_fit_show(struct seq_file *m, void *v)
return 0;
}
-static int proc_fit_open(struct inode *inode, struct file *file)
-{
- return single_open(file, proc_fit_show, PDE_DATA(inode));
-}
-
-static const struct file_operations proc_fit_fops = {
- .open = proc_fit_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
static int proc_version_show(struct seq_file *m, void *v)
{
unsigned long nasid = (unsigned long)m->private;
@@ -174,18 +162,6 @@ static int proc_version_show(struct seq_file *m, void *v)
return 0;
}
-static int proc_version_open(struct inode *inode, struct file *file)
-{
- return single_open(file, proc_version_show, PDE_DATA(inode));
-}
-
-static const struct file_operations proc_version_fops = {
- .open = proc_version_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
/* module entry points */
int __init prominfo_init(void);
void __exit prominfo_exit(void);
@@ -217,10 +193,10 @@ int __init prominfo_init(void)
if (!dir)
continue;
nasid = cnodeid_to_nasid(cnodeid);
- proc_create_data("fit", 0, dir,
- &proc_fit_fops, (void *)nasid);
- proc_create_data("version", 0, dir,
- &proc_version_fops, (void *)nasid);
+ proc_create_single_data("fit", 0, dir, proc_fit_show,
+ (void *)nasid);
+ proc_create_single_data("version", 0, dir, proc_version_show,
+ (void *)nasid);
}
return 0;
}
diff --git a/arch/ia64/sn/kernel/sn2/sn_proc_fs.c b/arch/ia64/sn/kernel/sn2/sn_proc_fs.c
index 29cf8f8c08e9..c2a4d84297b0 100644
--- a/arch/ia64/sn/kernel/sn2/sn_proc_fs.c
+++ b/arch/ia64/sn/kernel/sn2/sn_proc_fs.c
@@ -18,33 +18,18 @@ static int partition_id_show(struct seq_file *s, void *p)
return 0;
}
-static int partition_id_open(struct inode *inode, struct file *file)
-{
- return single_open(file, partition_id_show, NULL);
-}
-
static int system_serial_number_show(struct seq_file *s, void *p)
{
seq_printf(s, "%s\n", sn_system_serial_number());
return 0;
}
-static int system_serial_number_open(struct inode *inode, struct file *file)
-{
- return single_open(file, system_serial_number_show, NULL);
-}
-
static int licenseID_show(struct seq_file *s, void *p)
{
seq_printf(s, "0x%llx\n", sn_partition_serial_number_val());
return 0;
}
-static int licenseID_open(struct inode *inode, struct file *file)
-{
- return single_open(file, licenseID_show, NULL);
-}
-
static int coherence_id_show(struct seq_file *s, void *p)
{
seq_printf(s, "%d\n", partition_coherence_id());
@@ -52,43 +37,10 @@ static int coherence_id_show(struct seq_file *s, void *p)
return 0;
}
-static int coherence_id_open(struct inode *inode, struct file *file)
-{
- return single_open(file, coherence_id_show, NULL);
-}
-
/* /proc/sgi_sn/sn_topology uses seq_file, see sn_hwperf.c */
extern int sn_topology_open(struct inode *, struct file *);
extern int sn_topology_release(struct inode *, struct file *);
-static const struct file_operations proc_partition_id_fops = {
- .open = partition_id_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
-static const struct file_operations proc_system_sn_fops = {
- .open = system_serial_number_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
-static const struct file_operations proc_license_id_fops = {
- .open = licenseID_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
-static const struct file_operations proc_coherence_id_fops = {
- .open = coherence_id_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
static const struct file_operations proc_sn_topo_fops = {
.open = sn_topology_open,
.read = seq_read,
@@ -104,13 +56,13 @@ void register_sn_procfs(void)
if (!(sgi_proc_dir = proc_mkdir("sgi_sn", NULL)))
return;
- proc_create("partition_id", 0444, sgi_proc_dir,
- &proc_partition_id_fops);
- proc_create("system_serial_number", 0444, sgi_proc_dir,
- &proc_system_sn_fops);
- proc_create("licenseID", 0444, sgi_proc_dir, &proc_license_id_fops);
- proc_create("coherence_id", 0444, sgi_proc_dir,
- &proc_coherence_id_fops);
+ proc_create_single("partition_id", 0444, sgi_proc_dir,
+ partition_id_show);
+ proc_create_single("system_serial_number", 0444, sgi_proc_dir,
+ system_serial_number_show);
+ proc_create_single("licenseID", 0444, sgi_proc_dir, licenseID_show);
+ proc_create_single("coherence_id", 0444, sgi_proc_dir,
+ coherence_id_show);
proc_create("sn_topology", 0444, sgi_proc_dir, &proc_sn_topo_fops);
}
diff --git a/arch/m68k/68000/timers.c b/arch/m68k/68000/timers.c
index 252455bce144..71ddb4c98726 100644
--- a/arch/m68k/68000/timers.c
+++ b/arch/m68k/68000/timers.c
@@ -125,7 +125,9 @@ int m68328_hwclk(int set, struct rtc_time *t)
{
if (!set) {
long now = RTCTIME;
- t->tm_year = t->tm_mon = t->tm_mday = 1;
+ t->tm_year = 1;
+ t->tm_mon = 0;
+ t->tm_mday = 1;
t->tm_hour = (now >> 24) % 24;
t->tm_min = (now >> 16) % 60;
t->tm_sec = now % 60;
diff --git a/arch/m68k/apollo/config.c b/arch/m68k/apollo/config.c
index 0d27706f14d4..b2a6bc63f8cd 100644
--- a/arch/m68k/apollo/config.c
+++ b/arch/m68k/apollo/config.c
@@ -221,8 +221,10 @@ int dn_dummy_hwclk(int op, struct rtc_time *t) {
t->tm_hour=rtc->hours;
t->tm_mday=rtc->day_of_month;
t->tm_wday=rtc->day_of_week;
- t->tm_mon=rtc->month;
+ t->tm_mon = rtc->month - 1;
t->tm_year=rtc->year;
+ if (t->tm_year < 70)
+ t->tm_year += 100;
} else {
rtc->second=t->tm_sec;
rtc->minute=t->tm_min;
@@ -230,8 +232,8 @@ int dn_dummy_hwclk(int op, struct rtc_time *t) {
rtc->day_of_month=t->tm_mday;
if(t->tm_wday!=-1)
rtc->day_of_week=t->tm_wday;
- rtc->month=t->tm_mon;
- rtc->year=t->tm_year;
+ rtc->month = t->tm_mon + 1;
+ rtc->year = t->tm_year % 100;
}
return 0;
diff --git a/arch/m68k/configs/amiga_defconfig b/arch/m68k/configs/amiga_defconfig
index 37a8e5ab8728..a874e54404d1 100644
--- a/arch/m68k/configs/amiga_defconfig
+++ b/arch/m68k/configs/amiga_defconfig
@@ -98,8 +98,8 @@ CONFIG_NF_CONNTRACK_SANE=m
CONFIG_NF_CONNTRACK_SIP=m
CONFIG_NF_CONNTRACK_TFTP=m
CONFIG_NF_TABLES=m
-CONFIG_NF_TABLES_INET=m
-CONFIG_NF_TABLES_NETDEV=m
+CONFIG_NF_TABLES_INET=y
+CONFIG_NF_TABLES_NETDEV=y
CONFIG_NFT_EXTHDR=m
CONFIG_NFT_META=m
CONFIG_NFT_RT=m
@@ -204,7 +204,7 @@ CONFIG_NF_SOCKET_IPV4=m
CONFIG_NFT_CHAIN_ROUTE_IPV4=m
CONFIG_NFT_DUP_IPV4=m
CONFIG_NFT_FIB_IPV4=m
-CONFIG_NF_TABLES_ARP=m
+CONFIG_NF_TABLES_ARP=y
CONFIG_NF_FLOW_TABLE_IPV4=m
CONFIG_NF_LOG_ARP=m
CONFIG_NFT_CHAIN_NAT_IPV4=m
@@ -233,12 +233,12 @@ CONFIG_IP_NF_ARP_MANGLE=m
CONFIG_NF_CONNTRACK_IPV6=m
CONFIG_NF_SOCKET_IPV6=m
CONFIG_NFT_CHAIN_ROUTE_IPV6=m
-CONFIG_NFT_DUP_IPV6=m
-CONFIG_NFT_FIB_IPV6=m
-CONFIG_NF_FLOW_TABLE_IPV6=m
CONFIG_NFT_CHAIN_NAT_IPV6=m
CONFIG_NFT_MASQ_IPV6=m
CONFIG_NFT_REDIR_IPV6=m
+CONFIG_NFT_DUP_IPV6=m
+CONFIG_NFT_FIB_IPV6=m
+CONFIG_NF_FLOW_TABLE_IPV6=m
CONFIG_IP6_NF_IPTABLES=m
CONFIG_IP6_NF_MATCH_AH=m
CONFIG_IP6_NF_MATCH_EUI64=m
@@ -259,7 +259,7 @@ CONFIG_IP6_NF_RAW=m
CONFIG_IP6_NF_NAT=m
CONFIG_IP6_NF_TARGET_MASQUERADE=m
CONFIG_IP6_NF_TARGET_NPT=m
-CONFIG_NF_TABLES_BRIDGE=m
+CONFIG_NF_TABLES_BRIDGE=y
CONFIG_NFT_BRIDGE_META=m
CONFIG_NFT_BRIDGE_REJECT=m
CONFIG_NF_LOG_BRIDGE=m
@@ -310,7 +310,6 @@ CONFIG_NET_MPLS_GSO=m
CONFIG_MPLS_ROUTING=m
CONFIG_MPLS_IPTUNNEL=m
CONFIG_NET_NSH=m
-CONFIG_NET_L3_MASTER_DEV=y
CONFIG_AF_KCM=m
# CONFIG_WIRELESS is not set
CONFIG_PSAMPLE=m
@@ -414,6 +413,7 @@ CONFIG_ARIADNE=y
# CONFIG_NET_VENDOR_MARVELL is not set
# CONFIG_NET_VENDOR_MICREL is not set
# CONFIG_NET_VENDOR_NETRONOME is not set
+# CONFIG_NET_VENDOR_NI is not set
CONFIG_HYDRA=y
CONFIG_APNE=y
CONFIG_ZORRO8390=y
@@ -485,6 +485,7 @@ CONFIG_RTC_DRV_MSM6242=m
CONFIG_RTC_DRV_RP5C01=m
# CONFIG_VIRTIO_MENU is not set
# CONFIG_IOMMU_SUPPORT is not set
+CONFIG_DAX=m
CONFIG_HEARTBEAT=y
CONFIG_PROC_HARDWARE=y
CONFIG_AMIGA_BUILTIN_SERIAL=y
@@ -621,6 +622,7 @@ CONFIG_CRYPTO_CRYPTD=m
CONFIG_CRYPTO_MCRYPTD=m
CONFIG_CRYPTO_TEST=m
CONFIG_CRYPTO_CHACHA20POLY1305=m
+CONFIG_CRYPTO_CFB=m
CONFIG_CRYPTO_LRW=m
CONFIG_CRYPTO_PCBC=m
CONFIG_CRYPTO_KEYWRAP=m
@@ -647,6 +649,8 @@ CONFIG_CRYPTO_KHAZAD=m
CONFIG_CRYPTO_SALSA20=m
CONFIG_CRYPTO_SEED=m
CONFIG_CRYPTO_SERPENT=m
+CONFIG_CRYPTO_SM4=m
+CONFIG_CRYPTO_SPECK=m
CONFIG_CRYPTO_TEA=m
CONFIG_CRYPTO_TWOFISH=m
CONFIG_CRYPTO_LZO=m
diff --git a/arch/m68k/configs/apollo_defconfig b/arch/m68k/configs/apollo_defconfig
index 6a466266b852..8ce39e23aa42 100644
--- a/arch/m68k/configs/apollo_defconfig
+++ b/arch/m68k/configs/apollo_defconfig
@@ -96,8 +96,8 @@ CONFIG_NF_CONNTRACK_SANE=m
CONFIG_NF_CONNTRACK_SIP=m
CONFIG_NF_CONNTRACK_TFTP=m
CONFIG_NF_TABLES=m
-CONFIG_NF_TABLES_INET=m
-CONFIG_NF_TABLES_NETDEV=m
+CONFIG_NF_TABLES_INET=y
+CONFIG_NF_TABLES_NETDEV=y
CONFIG_NFT_EXTHDR=m
CONFIG_NFT_META=m
CONFIG_NFT_RT=m
@@ -202,7 +202,7 @@ CONFIG_NF_SOCKET_IPV4=m
CONFIG_NFT_CHAIN_ROUTE_IPV4=m
CONFIG_NFT_DUP_IPV4=m
CONFIG_NFT_FIB_IPV4=m
-CONFIG_NF_TABLES_ARP=m
+CONFIG_NF_TABLES_ARP=y
CONFIG_NF_FLOW_TABLE_IPV4=m
CONFIG_NF_LOG_ARP=m
CONFIG_NFT_CHAIN_NAT_IPV4=m
@@ -231,12 +231,12 @@ CONFIG_IP_NF_ARP_MANGLE=m
CONFIG_NF_CONNTRACK_IPV6=m
CONFIG_NF_SOCKET_IPV6=m
CONFIG_NFT_CHAIN_ROUTE_IPV6=m
-CONFIG_NFT_DUP_IPV6=m
-CONFIG_NFT_FIB_IPV6=m
-CONFIG_NF_FLOW_TABLE_IPV6=m
CONFIG_NFT_CHAIN_NAT_IPV6=m
CONFIG_NFT_MASQ_IPV6=m
CONFIG_NFT_REDIR_IPV6=m
+CONFIG_NFT_DUP_IPV6=m
+CONFIG_NFT_FIB_IPV6=m
+CONFIG_NF_FLOW_TABLE_IPV6=m
CONFIG_IP6_NF_IPTABLES=m
CONFIG_IP6_NF_MATCH_AH=m
CONFIG_IP6_NF_MATCH_EUI64=m
@@ -257,7 +257,7 @@ CONFIG_IP6_NF_RAW=m
CONFIG_IP6_NF_NAT=m
CONFIG_IP6_NF_TARGET_MASQUERADE=m
CONFIG_IP6_NF_TARGET_NPT=m
-CONFIG_NF_TABLES_BRIDGE=m
+CONFIG_NF_TABLES_BRIDGE=y
CONFIG_NFT_BRIDGE_META=m
CONFIG_NFT_BRIDGE_REJECT=m
CONFIG_NF_LOG_BRIDGE=m
@@ -308,7 +308,6 @@ CONFIG_NET_MPLS_GSO=m
CONFIG_MPLS_ROUTING=m
CONFIG_MPLS_IPTUNNEL=m
CONFIG_NET_NSH=m
-CONFIG_NET_L3_MASTER_DEV=y
CONFIG_AF_KCM=m
# CONFIG_WIRELESS is not set
CONFIG_PSAMPLE=m
@@ -392,6 +391,7 @@ CONFIG_VETH=m
# CONFIG_NET_VENDOR_MICREL is not set
# CONFIG_NET_VENDOR_NATSEMI is not set
# CONFIG_NET_VENDOR_NETRONOME is not set
+# CONFIG_NET_VENDOR_NI is not set
# CONFIG_NET_VENDOR_QUALCOMM is not set
# CONFIG_NET_VENDOR_RENESAS is not set
# CONFIG_NET_VENDOR_ROCKER is not set
@@ -446,6 +446,7 @@ CONFIG_RTC_CLASS=y
CONFIG_RTC_DRV_GENERIC=m
# CONFIG_VIRTIO_MENU is not set
# CONFIG_IOMMU_SUPPORT is not set
+CONFIG_DAX=m
CONFIG_HEARTBEAT=y
CONFIG_PROC_HARDWARE=y
CONFIG_EXT4_FS=y
@@ -580,6 +581,7 @@ CONFIG_CRYPTO_CRYPTD=m
CONFIG_CRYPTO_MCRYPTD=m
CONFIG_CRYPTO_TEST=m
CONFIG_CRYPTO_CHACHA20POLY1305=m
+CONFIG_CRYPTO_CFB=m
CONFIG_CRYPTO_LRW=m
CONFIG_CRYPTO_PCBC=m
CONFIG_CRYPTO_KEYWRAP=m
@@ -606,6 +608,8 @@ CONFIG_CRYPTO_KHAZAD=m
CONFIG_CRYPTO_SALSA20=m
CONFIG_CRYPTO_SEED=m
CONFIG_CRYPTO_SERPENT=m
+CONFIG_CRYPTO_SM4=m
+CONFIG_CRYPTO_SPECK=m
CONFIG_CRYPTO_TEA=m
CONFIG_CRYPTO_TWOFISH=m
CONFIG_CRYPTO_LZO=m
diff --git a/arch/m68k/configs/atari_defconfig b/arch/m68k/configs/atari_defconfig
index b0691a7a3345..346c4e75edf8 100644
--- a/arch/m68k/configs/atari_defconfig
+++ b/arch/m68k/configs/atari_defconfig
@@ -96,8 +96,8 @@ CONFIG_NF_CONNTRACK_SANE=m
CONFIG_NF_CONNTRACK_SIP=m
CONFIG_NF_CONNTRACK_TFTP=m
CONFIG_NF_TABLES=m
-CONFIG_NF_TABLES_INET=m
-CONFIG_NF_TABLES_NETDEV=m
+CONFIG_NF_TABLES_INET=y
+CONFIG_NF_TABLES_NETDEV=y
CONFIG_NFT_EXTHDR=m
CONFIG_NFT_META=m
CONFIG_NFT_RT=m
@@ -202,7 +202,7 @@ CONFIG_NF_SOCKET_IPV4=m
CONFIG_NFT_CHAIN_ROUTE_IPV4=m
CONFIG_NFT_DUP_IPV4=m
CONFIG_NFT_FIB_IPV4=m
-CONFIG_NF_TABLES_ARP=m
+CONFIG_NF_TABLES_ARP=y
CONFIG_NF_FLOW_TABLE_IPV4=m
CONFIG_NF_LOG_ARP=m
CONFIG_NFT_CHAIN_NAT_IPV4=m
@@ -231,12 +231,12 @@ CONFIG_IP_NF_ARP_MANGLE=m
CONFIG_NF_CONNTRACK_IPV6=m
CONFIG_NF_SOCKET_IPV6=m
CONFIG_NFT_CHAIN_ROUTE_IPV6=m
-CONFIG_NFT_DUP_IPV6=m
-CONFIG_NFT_FIB_IPV6=m
-CONFIG_NF_FLOW_TABLE_IPV6=m
CONFIG_NFT_CHAIN_NAT_IPV6=m
CONFIG_NFT_MASQ_IPV6=m
CONFIG_NFT_REDIR_IPV6=m
+CONFIG_NFT_DUP_IPV6=m
+CONFIG_NFT_FIB_IPV6=m
+CONFIG_NF_FLOW_TABLE_IPV6=m
CONFIG_IP6_NF_IPTABLES=m
CONFIG_IP6_NF_MATCH_AH=m
CONFIG_IP6_NF_MATCH_EUI64=m
@@ -257,7 +257,7 @@ CONFIG_IP6_NF_RAW=m
CONFIG_IP6_NF_NAT=m
CONFIG_IP6_NF_TARGET_MASQUERADE=m
CONFIG_IP6_NF_TARGET_NPT=m
-CONFIG_NF_TABLES_BRIDGE=m
+CONFIG_NF_TABLES_BRIDGE=y
CONFIG_NFT_BRIDGE_META=m
CONFIG_NFT_BRIDGE_REJECT=m
CONFIG_NF_LOG_BRIDGE=m
@@ -308,7 +308,6 @@ CONFIG_NET_MPLS_GSO=m
CONFIG_MPLS_ROUTING=m
CONFIG_MPLS_IPTUNNEL=m
CONFIG_NET_NSH=m
-CONFIG_NET_L3_MASTER_DEV=y
CONFIG_AF_KCM=m
# CONFIG_WIRELESS is not set
CONFIG_PSAMPLE=m
@@ -401,6 +400,7 @@ CONFIG_ATARILANCE=y
# CONFIG_NET_VENDOR_MARVELL is not set
# CONFIG_NET_VENDOR_MICREL is not set
# CONFIG_NET_VENDOR_NETRONOME is not set
+# CONFIG_NET_VENDOR_NI is not set
CONFIG_NE2000=y
# CONFIG_NET_VENDOR_QUALCOMM is not set
# CONFIG_NET_VENDOR_RENESAS is not set
@@ -461,6 +461,7 @@ CONFIG_RTC_CLASS=y
CONFIG_RTC_DRV_GENERIC=m
# CONFIG_VIRTIO_MENU is not set
# CONFIG_IOMMU_SUPPORT is not set
+CONFIG_DAX=m
CONFIG_HEARTBEAT=y
CONFIG_PROC_HARDWARE=y
CONFIG_NATFEAT=y
@@ -602,6 +603,7 @@ CONFIG_CRYPTO_CRYPTD=m
CONFIG_CRYPTO_MCRYPTD=m
CONFIG_CRYPTO_TEST=m
CONFIG_CRYPTO_CHACHA20POLY1305=m
+CONFIG_CRYPTO_CFB=m
CONFIG_CRYPTO_LRW=m
CONFIG_CRYPTO_PCBC=m
CONFIG_CRYPTO_KEYWRAP=m
@@ -628,6 +630,8 @@ CONFIG_CRYPTO_KHAZAD=m
CONFIG_CRYPTO_SALSA20=m
CONFIG_CRYPTO_SEED=m
CONFIG_CRYPTO_SERPENT=m
+CONFIG_CRYPTO_SM4=m
+CONFIG_CRYPTO_SPECK=m
CONFIG_CRYPTO_TEA=m
CONFIG_CRYPTO_TWOFISH=m
CONFIG_CRYPTO_LZO=m
diff --git a/arch/m68k/configs/bvme6000_defconfig b/arch/m68k/configs/bvme6000_defconfig
index 6f6470fa9a50..fca9c7aa71a3 100644
--- a/arch/m68k/configs/bvme6000_defconfig
+++ b/arch/m68k/configs/bvme6000_defconfig
@@ -94,8 +94,8 @@ CONFIG_NF_CONNTRACK_SANE=m
CONFIG_NF_CONNTRACK_SIP=m
CONFIG_NF_CONNTRACK_TFTP=m
CONFIG_NF_TABLES=m
-CONFIG_NF_TABLES_INET=m
-CONFIG_NF_TABLES_NETDEV=m
+CONFIG_NF_TABLES_INET=y
+CONFIG_NF_TABLES_NETDEV=y
CONFIG_NFT_EXTHDR=m
CONFIG_NFT_META=m
CONFIG_NFT_RT=m
@@ -200,7 +200,7 @@ CONFIG_NF_SOCKET_IPV4=m
CONFIG_NFT_CHAIN_ROUTE_IPV4=m
CONFIG_NFT_DUP_IPV4=m
CONFIG_NFT_FIB_IPV4=m
-CONFIG_NF_TABLES_ARP=m
+CONFIG_NF_TABLES_ARP=y
CONFIG_NF_FLOW_TABLE_IPV4=m
CONFIG_NF_LOG_ARP=m
CONFIG_NFT_CHAIN_NAT_IPV4=m
@@ -229,12 +229,12 @@ CONFIG_IP_NF_ARP_MANGLE=m
CONFIG_NF_CONNTRACK_IPV6=m
CONFIG_NF_SOCKET_IPV6=m
CONFIG_NFT_CHAIN_ROUTE_IPV6=m
-CONFIG_NFT_DUP_IPV6=m
-CONFIG_NFT_FIB_IPV6=m
-CONFIG_NF_FLOW_TABLE_IPV6=m
CONFIG_NFT_CHAIN_NAT_IPV6=m
CONFIG_NFT_MASQ_IPV6=m
CONFIG_NFT_REDIR_IPV6=m
+CONFIG_NFT_DUP_IPV6=m
+CONFIG_NFT_FIB_IPV6=m
+CONFIG_NF_FLOW_TABLE_IPV6=m
CONFIG_IP6_NF_IPTABLES=m
CONFIG_IP6_NF_MATCH_AH=m
CONFIG_IP6_NF_MATCH_EUI64=m
@@ -255,7 +255,7 @@ CONFIG_IP6_NF_RAW=m
CONFIG_IP6_NF_NAT=m
CONFIG_IP6_NF_TARGET_MASQUERADE=m
CONFIG_IP6_NF_TARGET_NPT=m
-CONFIG_NF_TABLES_BRIDGE=m
+CONFIG_NF_TABLES_BRIDGE=y
CONFIG_NFT_BRIDGE_META=m
CONFIG_NFT_BRIDGE_REJECT=m
CONFIG_NF_LOG_BRIDGE=m
@@ -306,7 +306,6 @@ CONFIG_NET_MPLS_GSO=m
CONFIG_MPLS_ROUTING=m
CONFIG_MPLS_IPTUNNEL=m
CONFIG_NET_NSH=m
-CONFIG_NET_L3_MASTER_DEV=y
CONFIG_AF_KCM=m
# CONFIG_WIRELESS is not set
CONFIG_PSAMPLE=m
@@ -391,6 +390,7 @@ CONFIG_BVME6000_NET=y
# CONFIG_NET_VENDOR_MICREL is not set
# CONFIG_NET_VENDOR_NATSEMI is not set
# CONFIG_NET_VENDOR_NETRONOME is not set
+# CONFIG_NET_VENDOR_NI is not set
# CONFIG_NET_VENDOR_QUALCOMM is not set
# CONFIG_NET_VENDOR_RENESAS is not set
# CONFIG_NET_VENDOR_ROCKER is not set
@@ -439,6 +439,7 @@ CONFIG_RTC_CLASS=y
CONFIG_RTC_DRV_GENERIC=m
# CONFIG_VIRTIO_MENU is not set
# CONFIG_IOMMU_SUPPORT is not set
+CONFIG_DAX=m
CONFIG_PROC_HARDWARE=y
CONFIG_EXT4_FS=y
CONFIG_REISERFS_FS=m
@@ -572,6 +573,7 @@ CONFIG_CRYPTO_CRYPTD=m
CONFIG_CRYPTO_MCRYPTD=m
CONFIG_CRYPTO_TEST=m
CONFIG_CRYPTO_CHACHA20POLY1305=m
+CONFIG_CRYPTO_CFB=m
CONFIG_CRYPTO_LRW=m
CONFIG_CRYPTO_PCBC=m
CONFIG_CRYPTO_KEYWRAP=m
@@ -598,6 +600,8 @@ CONFIG_CRYPTO_KHAZAD=m
CONFIG_CRYPTO_SALSA20=m
CONFIG_CRYPTO_SEED=m
CONFIG_CRYPTO_SERPENT=m
+CONFIG_CRYPTO_SM4=m
+CONFIG_CRYPTO_SPECK=m
CONFIG_CRYPTO_TEA=m
CONFIG_CRYPTO_TWOFISH=m
CONFIG_CRYPTO_LZO=m
diff --git a/arch/m68k/configs/hp300_defconfig b/arch/m68k/configs/hp300_defconfig
index 31a1a2b5e860..f9eab174915c 100644
--- a/arch/m68k/configs/hp300_defconfig
+++ b/arch/m68k/configs/hp300_defconfig
@@ -96,8 +96,8 @@ CONFIG_NF_CONNTRACK_SANE=m
CONFIG_NF_CONNTRACK_SIP=m
CONFIG_NF_CONNTRACK_TFTP=m
CONFIG_NF_TABLES=m
-CONFIG_NF_TABLES_INET=m
-CONFIG_NF_TABLES_NETDEV=m
+CONFIG_NF_TABLES_INET=y
+CONFIG_NF_TABLES_NETDEV=y
CONFIG_NFT_EXTHDR=m
CONFIG_NFT_META=m
CONFIG_NFT_RT=m
@@ -202,7 +202,7 @@ CONFIG_NF_SOCKET_IPV4=m
CONFIG_NFT_CHAIN_ROUTE_IPV4=m
CONFIG_NFT_DUP_IPV4=m
CONFIG_NFT_FIB_IPV4=m
-CONFIG_NF_TABLES_ARP=m
+CONFIG_NF_TABLES_ARP=y
CONFIG_NF_FLOW_TABLE_IPV4=m
CONFIG_NF_LOG_ARP=m
CONFIG_NFT_CHAIN_NAT_IPV4=m
@@ -231,12 +231,12 @@ CONFIG_IP_NF_ARP_MANGLE=m
CONFIG_NF_CONNTRACK_IPV6=m
CONFIG_NF_SOCKET_IPV6=m
CONFIG_NFT_CHAIN_ROUTE_IPV6=m
-CONFIG_NFT_DUP_IPV6=m
-CONFIG_NFT_FIB_IPV6=m
-CONFIG_NF_FLOW_TABLE_IPV6=m
CONFIG_NFT_CHAIN_NAT_IPV6=m
CONFIG_NFT_MASQ_IPV6=m
CONFIG_NFT_REDIR_IPV6=m
+CONFIG_NFT_DUP_IPV6=m
+CONFIG_NFT_FIB_IPV6=m
+CONFIG_NF_FLOW_TABLE_IPV6=m
CONFIG_IP6_NF_IPTABLES=m
CONFIG_IP6_NF_MATCH_AH=m
CONFIG_IP6_NF_MATCH_EUI64=m
@@ -257,7 +257,7 @@ CONFIG_IP6_NF_RAW=m
CONFIG_IP6_NF_NAT=m
CONFIG_IP6_NF_TARGET_MASQUERADE=m
CONFIG_IP6_NF_TARGET_NPT=m
-CONFIG_NF_TABLES_BRIDGE=m
+CONFIG_NF_TABLES_BRIDGE=y
CONFIG_NFT_BRIDGE_META=m
CONFIG_NFT_BRIDGE_REJECT=m
CONFIG_NF_LOG_BRIDGE=m
@@ -308,7 +308,6 @@ CONFIG_NET_MPLS_GSO=m
CONFIG_MPLS_ROUTING=m
CONFIG_MPLS_IPTUNNEL=m
CONFIG_NET_NSH=m
-CONFIG_NET_L3_MASTER_DEV=y
CONFIG_AF_KCM=m
# CONFIG_WIRELESS is not set
CONFIG_PSAMPLE=m
@@ -393,6 +392,7 @@ CONFIG_HPLANCE=y
# CONFIG_NET_VENDOR_MICREL is not set
# CONFIG_NET_VENDOR_NATSEMI is not set
# CONFIG_NET_VENDOR_NETRONOME is not set
+# CONFIG_NET_VENDOR_NI is not set
# CONFIG_NET_VENDOR_QUALCOMM is not set
# CONFIG_NET_VENDOR_RENESAS is not set
# CONFIG_NET_VENDOR_ROCKER is not set
@@ -449,6 +449,7 @@ CONFIG_RTC_CLASS=y
CONFIG_RTC_DRV_GENERIC=m
# CONFIG_VIRTIO_MENU is not set
# CONFIG_IOMMU_SUPPORT is not set
+CONFIG_DAX=m
CONFIG_PROC_HARDWARE=y
CONFIG_EXT4_FS=y
CONFIG_REISERFS_FS=m
@@ -582,6 +583,7 @@ CONFIG_CRYPTO_CRYPTD=m
CONFIG_CRYPTO_MCRYPTD=m
CONFIG_CRYPTO_TEST=m
CONFIG_CRYPTO_CHACHA20POLY1305=m
+CONFIG_CRYPTO_CFB=m
CONFIG_CRYPTO_LRW=m
CONFIG_CRYPTO_PCBC=m
CONFIG_CRYPTO_KEYWRAP=m
@@ -608,6 +610,8 @@ CONFIG_CRYPTO_KHAZAD=m
CONFIG_CRYPTO_SALSA20=m
CONFIG_CRYPTO_SEED=m
CONFIG_CRYPTO_SERPENT=m
+CONFIG_CRYPTO_SM4=m
+CONFIG_CRYPTO_SPECK=m
CONFIG_CRYPTO_TEA=m
CONFIG_CRYPTO_TWOFISH=m
CONFIG_CRYPTO_LZO=m
diff --git a/arch/m68k/configs/mac_defconfig b/arch/m68k/configs/mac_defconfig
index 390d4a87441c..b52e597899eb 100644
--- a/arch/m68k/configs/mac_defconfig
+++ b/arch/m68k/configs/mac_defconfig
@@ -95,8 +95,8 @@ CONFIG_NF_CONNTRACK_SANE=m
CONFIG_NF_CONNTRACK_SIP=m
CONFIG_NF_CONNTRACK_TFTP=m
CONFIG_NF_TABLES=m
-CONFIG_NF_TABLES_INET=m
-CONFIG_NF_TABLES_NETDEV=m
+CONFIG_NF_TABLES_INET=y
+CONFIG_NF_TABLES_NETDEV=y
CONFIG_NFT_EXTHDR=m
CONFIG_NFT_META=m
CONFIG_NFT_RT=m
@@ -201,7 +201,7 @@ CONFIG_NF_SOCKET_IPV4=m
CONFIG_NFT_CHAIN_ROUTE_IPV4=m
CONFIG_NFT_DUP_IPV4=m
CONFIG_NFT_FIB_IPV4=m
-CONFIG_NF_TABLES_ARP=m
+CONFIG_NF_TABLES_ARP=y
CONFIG_NF_FLOW_TABLE_IPV4=m
CONFIG_NF_LOG_ARP=m
CONFIG_NFT_CHAIN_NAT_IPV4=m
@@ -230,12 +230,12 @@ CONFIG_IP_NF_ARP_MANGLE=m
CONFIG_NF_CONNTRACK_IPV6=m
CONFIG_NF_SOCKET_IPV6=m
CONFIG_NFT_CHAIN_ROUTE_IPV6=m
-CONFIG_NFT_DUP_IPV6=m
-CONFIG_NFT_FIB_IPV6=m
-CONFIG_NF_FLOW_TABLE_IPV6=m
CONFIG_NFT_CHAIN_NAT_IPV6=m
CONFIG_NFT_MASQ_IPV6=m
CONFIG_NFT_REDIR_IPV6=m
+CONFIG_NFT_DUP_IPV6=m
+CONFIG_NFT_FIB_IPV6=m
+CONFIG_NF_FLOW_TABLE_IPV6=m
CONFIG_IP6_NF_IPTABLES=m
CONFIG_IP6_NF_MATCH_AH=m
CONFIG_IP6_NF_MATCH_EUI64=m
@@ -256,7 +256,7 @@ CONFIG_IP6_NF_RAW=m
CONFIG_IP6_NF_NAT=m
CONFIG_IP6_NF_TARGET_MASQUERADE=m
CONFIG_IP6_NF_TARGET_NPT=m
-CONFIG_NF_TABLES_BRIDGE=m
+CONFIG_NF_TABLES_BRIDGE=y
CONFIG_NFT_BRIDGE_META=m
CONFIG_NFT_BRIDGE_REJECT=m
CONFIG_NF_LOG_BRIDGE=m
@@ -310,7 +310,6 @@ CONFIG_NET_MPLS_GSO=m
CONFIG_MPLS_ROUTING=m
CONFIG_MPLS_IPTUNNEL=m
CONFIG_NET_NSH=m
-CONFIG_NET_L3_MASTER_DEV=y
CONFIG_AF_KCM=m
# CONFIG_WIRELESS is not set
CONFIG_PSAMPLE=m
@@ -410,6 +409,7 @@ CONFIG_MAC89x0=y
# CONFIG_NET_VENDOR_MICREL is not set
CONFIG_MACSONIC=y
# CONFIG_NET_VENDOR_NETRONOME is not set
+# CONFIG_NET_VENDOR_NI is not set
CONFIG_MAC8390=y
# CONFIG_NET_VENDOR_QUALCOMM is not set
# CONFIG_NET_VENDOR_RENESAS is not set
@@ -471,6 +471,7 @@ CONFIG_RTC_CLASS=y
CONFIG_RTC_DRV_GENERIC=m
# CONFIG_VIRTIO_MENU is not set
# CONFIG_IOMMU_SUPPORT is not set
+CONFIG_DAX=m
CONFIG_PROC_HARDWARE=y
CONFIG_EXT4_FS=y
CONFIG_REISERFS_FS=m
@@ -604,6 +605,7 @@ CONFIG_CRYPTO_CRYPTD=m
CONFIG_CRYPTO_MCRYPTD=m
CONFIG_CRYPTO_TEST=m
CONFIG_CRYPTO_CHACHA20POLY1305=m
+CONFIG_CRYPTO_CFB=m
CONFIG_CRYPTO_LRW=m
CONFIG_CRYPTO_PCBC=m
CONFIG_CRYPTO_KEYWRAP=m
@@ -630,6 +632,8 @@ CONFIG_CRYPTO_KHAZAD=m
CONFIG_CRYPTO_SALSA20=m
CONFIG_CRYPTO_SEED=m
CONFIG_CRYPTO_SERPENT=m
+CONFIG_CRYPTO_SM4=m
+CONFIG_CRYPTO_SPECK=m
CONFIG_CRYPTO_TEA=m
CONFIG_CRYPTO_TWOFISH=m
CONFIG_CRYPTO_LZO=m
diff --git a/arch/m68k/configs/multi_defconfig b/arch/m68k/configs/multi_defconfig
index 77be97d82dc3..2a84eeec5b02 100644
--- a/arch/m68k/configs/multi_defconfig
+++ b/arch/m68k/configs/multi_defconfig
@@ -105,8 +105,8 @@ CONFIG_NF_CONNTRACK_SANE=m
CONFIG_NF_CONNTRACK_SIP=m
CONFIG_NF_CONNTRACK_TFTP=m
CONFIG_NF_TABLES=m
-CONFIG_NF_TABLES_INET=m
-CONFIG_NF_TABLES_NETDEV=m
+CONFIG_NF_TABLES_INET=y
+CONFIG_NF_TABLES_NETDEV=y
CONFIG_NFT_EXTHDR=m
CONFIG_NFT_META=m
CONFIG_NFT_RT=m
@@ -211,7 +211,7 @@ CONFIG_NF_SOCKET_IPV4=m
CONFIG_NFT_CHAIN_ROUTE_IPV4=m
CONFIG_NFT_DUP_IPV4=m
CONFIG_NFT_FIB_IPV4=m
-CONFIG_NF_TABLES_ARP=m
+CONFIG_NF_TABLES_ARP=y
CONFIG_NF_FLOW_TABLE_IPV4=m
CONFIG_NF_LOG_ARP=m
CONFIG_NFT_CHAIN_NAT_IPV4=m
@@ -240,12 +240,12 @@ CONFIG_IP_NF_ARP_MANGLE=m
CONFIG_NF_CONNTRACK_IPV6=m
CONFIG_NF_SOCKET_IPV6=m
CONFIG_NFT_CHAIN_ROUTE_IPV6=m
-CONFIG_NFT_DUP_IPV6=m
-CONFIG_NFT_FIB_IPV6=m
-CONFIG_NF_FLOW_TABLE_IPV6=m
CONFIG_NFT_CHAIN_NAT_IPV6=m
CONFIG_NFT_MASQ_IPV6=m
CONFIG_NFT_REDIR_IPV6=m
+CONFIG_NFT_DUP_IPV6=m
+CONFIG_NFT_FIB_IPV6=m
+CONFIG_NF_FLOW_TABLE_IPV6=m
CONFIG_IP6_NF_IPTABLES=m
CONFIG_IP6_NF_MATCH_AH=m
CONFIG_IP6_NF_MATCH_EUI64=m
@@ -266,7 +266,7 @@ CONFIG_IP6_NF_RAW=m
CONFIG_IP6_NF_NAT=m
CONFIG_IP6_NF_TARGET_MASQUERADE=m
CONFIG_IP6_NF_TARGET_NPT=m
-CONFIG_NF_TABLES_BRIDGE=m
+CONFIG_NF_TABLES_BRIDGE=y
CONFIG_NFT_BRIDGE_META=m
CONFIG_NFT_BRIDGE_REJECT=m
CONFIG_NF_LOG_BRIDGE=m
@@ -320,7 +320,6 @@ CONFIG_NET_MPLS_GSO=m
CONFIG_MPLS_ROUTING=m
CONFIG_MPLS_IPTUNNEL=m
CONFIG_NET_NSH=m
-CONFIG_NET_L3_MASTER_DEV=y
CONFIG_AF_KCM=m
# CONFIG_WIRELESS is not set
CONFIG_PSAMPLE=m
@@ -452,6 +451,7 @@ CONFIG_MVME16x_NET=y
# CONFIG_NET_VENDOR_MICREL is not set
CONFIG_MACSONIC=y
# CONFIG_NET_VENDOR_NETRONOME is not set
+# CONFIG_NET_VENDOR_NI is not set
CONFIG_HYDRA=y
CONFIG_MAC8390=y
CONFIG_NE2000=y
@@ -541,6 +541,7 @@ CONFIG_RTC_DRV_RP5C01=m
CONFIG_RTC_DRV_GENERIC=m
# CONFIG_VIRTIO_MENU is not set
# CONFIG_IOMMU_SUPPORT is not set
+CONFIG_DAX=m
CONFIG_HEARTBEAT=y
CONFIG_PROC_HARDWARE=y
CONFIG_NATFEAT=y
@@ -684,6 +685,7 @@ CONFIG_CRYPTO_CRYPTD=m
CONFIG_CRYPTO_MCRYPTD=m
CONFIG_CRYPTO_TEST=m
CONFIG_CRYPTO_CHACHA20POLY1305=m
+CONFIG_CRYPTO_CFB=m
CONFIG_CRYPTO_LRW=m
CONFIG_CRYPTO_PCBC=m
CONFIG_CRYPTO_KEYWRAP=m
@@ -710,6 +712,8 @@ CONFIG_CRYPTO_KHAZAD=m
CONFIG_CRYPTO_SALSA20=m
CONFIG_CRYPTO_SEED=m
CONFIG_CRYPTO_SERPENT=m
+CONFIG_CRYPTO_SM4=m
+CONFIG_CRYPTO_SPECK=m
CONFIG_CRYPTO_TEA=m
CONFIG_CRYPTO_TWOFISH=m
CONFIG_CRYPTO_LZO=m
diff --git a/arch/m68k/configs/mvme147_defconfig b/arch/m68k/configs/mvme147_defconfig
index 2ca140757b0f..476e69994340 100644
--- a/arch/m68k/configs/mvme147_defconfig
+++ b/arch/m68k/configs/mvme147_defconfig
@@ -93,8 +93,8 @@ CONFIG_NF_CONNTRACK_SANE=m
CONFIG_NF_CONNTRACK_SIP=m
CONFIG_NF_CONNTRACK_TFTP=m
CONFIG_NF_TABLES=m
-CONFIG_NF_TABLES_INET=m
-CONFIG_NF_TABLES_NETDEV=m
+CONFIG_NF_TABLES_INET=y
+CONFIG_NF_TABLES_NETDEV=y
CONFIG_NFT_EXTHDR=m
CONFIG_NFT_META=m
CONFIG_NFT_RT=m
@@ -199,7 +199,7 @@ CONFIG_NF_SOCKET_IPV4=m
CONFIG_NFT_CHAIN_ROUTE_IPV4=m
CONFIG_NFT_DUP_IPV4=m
CONFIG_NFT_FIB_IPV4=m
-CONFIG_NF_TABLES_ARP=m
+CONFIG_NF_TABLES_ARP=y
CONFIG_NF_FLOW_TABLE_IPV4=m
CONFIG_NF_LOG_ARP=m
CONFIG_NFT_CHAIN_NAT_IPV4=m
@@ -228,12 +228,12 @@ CONFIG_IP_NF_ARP_MANGLE=m
CONFIG_NF_CONNTRACK_IPV6=m
CONFIG_NF_SOCKET_IPV6=m
CONFIG_NFT_CHAIN_ROUTE_IPV6=m
-CONFIG_NFT_DUP_IPV6=m
-CONFIG_NFT_FIB_IPV6=m
-CONFIG_NF_FLOW_TABLE_IPV6=m
CONFIG_NFT_CHAIN_NAT_IPV6=m
CONFIG_NFT_MASQ_IPV6=m
CONFIG_NFT_REDIR_IPV6=m
+CONFIG_NFT_DUP_IPV6=m
+CONFIG_NFT_FIB_IPV6=m
+CONFIG_NF_FLOW_TABLE_IPV6=m
CONFIG_IP6_NF_IPTABLES=m
CONFIG_IP6_NF_MATCH_AH=m
CONFIG_IP6_NF_MATCH_EUI64=m
@@ -254,7 +254,7 @@ CONFIG_IP6_NF_RAW=m
CONFIG_IP6_NF_NAT=m
CONFIG_IP6_NF_TARGET_MASQUERADE=m
CONFIG_IP6_NF_TARGET_NPT=m
-CONFIG_NF_TABLES_BRIDGE=m
+CONFIG_NF_TABLES_BRIDGE=y
CONFIG_NFT_BRIDGE_META=m
CONFIG_NFT_BRIDGE_REJECT=m
CONFIG_NF_LOG_BRIDGE=m
@@ -305,7 +305,6 @@ CONFIG_NET_MPLS_GSO=m
CONFIG_MPLS_ROUTING=m
CONFIG_MPLS_IPTUNNEL=m
CONFIG_NET_NSH=m
-CONFIG_NET_L3_MASTER_DEV=y
CONFIG_AF_KCM=m
# CONFIG_WIRELESS is not set
CONFIG_PSAMPLE=m
@@ -391,6 +390,7 @@ CONFIG_MVME147_NET=y
# CONFIG_NET_VENDOR_MICREL is not set
# CONFIG_NET_VENDOR_NATSEMI is not set
# CONFIG_NET_VENDOR_NETRONOME is not set
+# CONFIG_NET_VENDOR_NI is not set
# CONFIG_NET_VENDOR_QUALCOMM is not set
# CONFIG_NET_VENDOR_RENESAS is not set
# CONFIG_NET_VENDOR_ROCKER is not set
@@ -439,6 +439,7 @@ CONFIG_RTC_CLASS=y
CONFIG_RTC_DRV_GENERIC=m
# CONFIG_VIRTIO_MENU is not set
# CONFIG_IOMMU_SUPPORT is not set
+CONFIG_DAX=m
CONFIG_PROC_HARDWARE=y
CONFIG_EXT4_FS=y
CONFIG_REISERFS_FS=m
@@ -572,6 +573,7 @@ CONFIG_CRYPTO_CRYPTD=m
CONFIG_CRYPTO_MCRYPTD=m
CONFIG_CRYPTO_TEST=m
CONFIG_CRYPTO_CHACHA20POLY1305=m
+CONFIG_CRYPTO_CFB=m
CONFIG_CRYPTO_LRW=m
CONFIG_CRYPTO_PCBC=m
CONFIG_CRYPTO_KEYWRAP=m
@@ -598,6 +600,8 @@ CONFIG_CRYPTO_KHAZAD=m
CONFIG_CRYPTO_SALSA20=m
CONFIG_CRYPTO_SEED=m
CONFIG_CRYPTO_SERPENT=m
+CONFIG_CRYPTO_SM4=m
+CONFIG_CRYPTO_SPECK=m
CONFIG_CRYPTO_TEA=m
CONFIG_CRYPTO_TWOFISH=m
CONFIG_CRYPTO_LZO=m
diff --git a/arch/m68k/configs/mvme16x_defconfig b/arch/m68k/configs/mvme16x_defconfig
index 6a3b4dcc5aab..1477cda9146e 100644
--- a/arch/m68k/configs/mvme16x_defconfig
+++ b/arch/m68k/configs/mvme16x_defconfig
@@ -94,8 +94,8 @@ CONFIG_NF_CONNTRACK_SANE=m
CONFIG_NF_CONNTRACK_SIP=m
CONFIG_NF_CONNTRACK_TFTP=m
CONFIG_NF_TABLES=m
-CONFIG_NF_TABLES_INET=m
-CONFIG_NF_TABLES_NETDEV=m
+CONFIG_NF_TABLES_INET=y
+CONFIG_NF_TABLES_NETDEV=y
CONFIG_NFT_EXTHDR=m
CONFIG_NFT_META=m
CONFIG_NFT_RT=m
@@ -200,7 +200,7 @@ CONFIG_NF_SOCKET_IPV4=m
CONFIG_NFT_CHAIN_ROUTE_IPV4=m
CONFIG_NFT_DUP_IPV4=m
CONFIG_NFT_FIB_IPV4=m
-CONFIG_NF_TABLES_ARP=m
+CONFIG_NF_TABLES_ARP=y
CONFIG_NF_FLOW_TABLE_IPV4=m
CONFIG_NF_LOG_ARP=m
CONFIG_NFT_CHAIN_NAT_IPV4=m
@@ -229,12 +229,12 @@ CONFIG_IP_NF_ARP_MANGLE=m
CONFIG_NF_CONNTRACK_IPV6=m
CONFIG_NF_SOCKET_IPV6=m
CONFIG_NFT_CHAIN_ROUTE_IPV6=m
-CONFIG_NFT_DUP_IPV6=m
-CONFIG_NFT_FIB_IPV6=m
-CONFIG_NF_FLOW_TABLE_IPV6=m
CONFIG_NFT_CHAIN_NAT_IPV6=m
CONFIG_NFT_MASQ_IPV6=m
CONFIG_NFT_REDIR_IPV6=m
+CONFIG_NFT_DUP_IPV6=m
+CONFIG_NFT_FIB_IPV6=m
+CONFIG_NF_FLOW_TABLE_IPV6=m
CONFIG_IP6_NF_IPTABLES=m
CONFIG_IP6_NF_MATCH_AH=m
CONFIG_IP6_NF_MATCH_EUI64=m
@@ -255,7 +255,7 @@ CONFIG_IP6_NF_RAW=m
CONFIG_IP6_NF_NAT=m
CONFIG_IP6_NF_TARGET_MASQUERADE=m
CONFIG_IP6_NF_TARGET_NPT=m
-CONFIG_NF_TABLES_BRIDGE=m
+CONFIG_NF_TABLES_BRIDGE=y
CONFIG_NFT_BRIDGE_META=m
CONFIG_NFT_BRIDGE_REJECT=m
CONFIG_NF_LOG_BRIDGE=m
@@ -306,7 +306,6 @@ CONFIG_NET_MPLS_GSO=m
CONFIG_MPLS_ROUTING=m
CONFIG_MPLS_IPTUNNEL=m
CONFIG_NET_NSH=m
-CONFIG_NET_L3_MASTER_DEV=y
CONFIG_AF_KCM=m
# CONFIG_WIRELESS is not set
CONFIG_PSAMPLE=m
@@ -391,6 +390,7 @@ CONFIG_MVME16x_NET=y
# CONFIG_NET_VENDOR_MICREL is not set
# CONFIG_NET_VENDOR_NATSEMI is not set
# CONFIG_NET_VENDOR_NETRONOME is not set
+# CONFIG_NET_VENDOR_NI is not set
# CONFIG_NET_VENDOR_QUALCOMM is not set
# CONFIG_NET_VENDOR_RENESAS is not set
# CONFIG_NET_VENDOR_ROCKER is not set
@@ -439,6 +439,7 @@ CONFIG_RTC_CLASS=y
CONFIG_RTC_DRV_GENERIC=m
# CONFIG_VIRTIO_MENU is not set
# CONFIG_IOMMU_SUPPORT is not set
+CONFIG_DAX=m
CONFIG_PROC_HARDWARE=y
CONFIG_EXT4_FS=y
CONFIG_REISERFS_FS=m
@@ -572,6 +573,7 @@ CONFIG_CRYPTO_CRYPTD=m
CONFIG_CRYPTO_MCRYPTD=m
CONFIG_CRYPTO_TEST=m
CONFIG_CRYPTO_CHACHA20POLY1305=m
+CONFIG_CRYPTO_CFB=m
CONFIG_CRYPTO_LRW=m
CONFIG_CRYPTO_PCBC=m
CONFIG_CRYPTO_KEYWRAP=m
@@ -598,6 +600,8 @@ CONFIG_CRYPTO_KHAZAD=m
CONFIG_CRYPTO_SALSA20=m
CONFIG_CRYPTO_SEED=m
CONFIG_CRYPTO_SERPENT=m
+CONFIG_CRYPTO_SM4=m
+CONFIG_CRYPTO_SPECK=m
CONFIG_CRYPTO_TEA=m
CONFIG_CRYPTO_TWOFISH=m
CONFIG_CRYPTO_LZO=m
diff --git a/arch/m68k/configs/q40_defconfig b/arch/m68k/configs/q40_defconfig
index 2a3e29c97652..b3a543dc48a0 100644
--- a/arch/m68k/configs/q40_defconfig
+++ b/arch/m68k/configs/q40_defconfig
@@ -94,8 +94,8 @@ CONFIG_NF_CONNTRACK_SANE=m
CONFIG_NF_CONNTRACK_SIP=m
CONFIG_NF_CONNTRACK_TFTP=m
CONFIG_NF_TABLES=m
-CONFIG_NF_TABLES_INET=m
-CONFIG_NF_TABLES_NETDEV=m
+CONFIG_NF_TABLES_INET=y
+CONFIG_NF_TABLES_NETDEV=y
CONFIG_NFT_EXTHDR=m
CONFIG_NFT_META=m
CONFIG_NFT_RT=m
@@ -200,7 +200,7 @@ CONFIG_NF_SOCKET_IPV4=m
CONFIG_NFT_CHAIN_ROUTE_IPV4=m
CONFIG_NFT_DUP_IPV4=m
CONFIG_NFT_FIB_IPV4=m
-CONFIG_NF_TABLES_ARP=m
+CONFIG_NF_TABLES_ARP=y
CONFIG_NF_FLOW_TABLE_IPV4=m
CONFIG_NF_LOG_ARP=m
CONFIG_NFT_CHAIN_NAT_IPV4=m
@@ -229,12 +229,12 @@ CONFIG_IP_NF_ARP_MANGLE=m
CONFIG_NF_CONNTRACK_IPV6=m
CONFIG_NF_SOCKET_IPV6=m
CONFIG_NFT_CHAIN_ROUTE_IPV6=m
-CONFIG_NFT_DUP_IPV6=m
-CONFIG_NFT_FIB_IPV6=m
-CONFIG_NF_FLOW_TABLE_IPV6=m
CONFIG_NFT_CHAIN_NAT_IPV6=m
CONFIG_NFT_MASQ_IPV6=m
CONFIG_NFT_REDIR_IPV6=m
+CONFIG_NFT_DUP_IPV6=m
+CONFIG_NFT_FIB_IPV6=m
+CONFIG_NF_FLOW_TABLE_IPV6=m
CONFIG_IP6_NF_IPTABLES=m
CONFIG_IP6_NF_MATCH_AH=m
CONFIG_IP6_NF_MATCH_EUI64=m
@@ -255,7 +255,7 @@ CONFIG_IP6_NF_RAW=m
CONFIG_IP6_NF_NAT=m
CONFIG_IP6_NF_TARGET_MASQUERADE=m
CONFIG_IP6_NF_TARGET_NPT=m
-CONFIG_NF_TABLES_BRIDGE=m
+CONFIG_NF_TABLES_BRIDGE=y
CONFIG_NFT_BRIDGE_META=m
CONFIG_NFT_BRIDGE_REJECT=m
CONFIG_NF_LOG_BRIDGE=m
@@ -306,7 +306,6 @@ CONFIG_NET_MPLS_GSO=m
CONFIG_MPLS_ROUTING=m
CONFIG_MPLS_IPTUNNEL=m
CONFIG_NET_NSH=m
-CONFIG_NET_L3_MASTER_DEV=y
CONFIG_AF_KCM=m
# CONFIG_WIRELESS is not set
CONFIG_PSAMPLE=m
@@ -400,6 +399,7 @@ CONFIG_VETH=m
# CONFIG_NET_VENDOR_MARVELL is not set
# CONFIG_NET_VENDOR_MICREL is not set
# CONFIG_NET_VENDOR_NETRONOME is not set
+# CONFIG_NET_VENDOR_NI is not set
CONFIG_NE2000=y
# CONFIG_NET_VENDOR_QUALCOMM is not set
# CONFIG_NET_VENDOR_RENESAS is not set
@@ -461,6 +461,7 @@ CONFIG_RTC_CLASS=y
CONFIG_RTC_DRV_GENERIC=m
# CONFIG_VIRTIO_MENU is not set
# CONFIG_IOMMU_SUPPORT is not set
+CONFIG_DAX=m
CONFIG_HEARTBEAT=y
CONFIG_PROC_HARDWARE=y
CONFIG_EXT4_FS=y
@@ -595,6 +596,7 @@ CONFIG_CRYPTO_CRYPTD=m
CONFIG_CRYPTO_MCRYPTD=m
CONFIG_CRYPTO_TEST=m
CONFIG_CRYPTO_CHACHA20POLY1305=m
+CONFIG_CRYPTO_CFB=m
CONFIG_CRYPTO_LRW=m
CONFIG_CRYPTO_PCBC=m
CONFIG_CRYPTO_KEYWRAP=m
@@ -621,6 +623,8 @@ CONFIG_CRYPTO_KHAZAD=m
CONFIG_CRYPTO_SALSA20=m
CONFIG_CRYPTO_SEED=m
CONFIG_CRYPTO_SERPENT=m
+CONFIG_CRYPTO_SM4=m
+CONFIG_CRYPTO_SPECK=m
CONFIG_CRYPTO_TEA=m
CONFIG_CRYPTO_TWOFISH=m
CONFIG_CRYPTO_LZO=m
diff --git a/arch/m68k/configs/sun3_defconfig b/arch/m68k/configs/sun3_defconfig
index cba2494c99b2..d543ed5dfa96 100644
--- a/arch/m68k/configs/sun3_defconfig
+++ b/arch/m68k/configs/sun3_defconfig
@@ -91,8 +91,8 @@ CONFIG_NF_CONNTRACK_SANE=m
CONFIG_NF_CONNTRACK_SIP=m
CONFIG_NF_CONNTRACK_TFTP=m
CONFIG_NF_TABLES=m
-CONFIG_NF_TABLES_INET=m
-CONFIG_NF_TABLES_NETDEV=m
+CONFIG_NF_TABLES_INET=y
+CONFIG_NF_TABLES_NETDEV=y
CONFIG_NFT_EXTHDR=m
CONFIG_NFT_META=m
CONFIG_NFT_RT=m
@@ -197,7 +197,7 @@ CONFIG_NF_SOCKET_IPV4=m
CONFIG_NFT_CHAIN_ROUTE_IPV4=m
CONFIG_NFT_DUP_IPV4=m
CONFIG_NFT_FIB_IPV4=m
-CONFIG_NF_TABLES_ARP=m
+CONFIG_NF_TABLES_ARP=y
CONFIG_NF_FLOW_TABLE_IPV4=m
CONFIG_NF_LOG_ARP=m
CONFIG_NFT_CHAIN_NAT_IPV4=m
@@ -226,12 +226,12 @@ CONFIG_IP_NF_ARP_MANGLE=m
CONFIG_NF_CONNTRACK_IPV6=m
CONFIG_NF_SOCKET_IPV6=m
CONFIG_NFT_CHAIN_ROUTE_IPV6=m
-CONFIG_NFT_DUP_IPV6=m
-CONFIG_NFT_FIB_IPV6=m
-CONFIG_NF_FLOW_TABLE_IPV6=m
CONFIG_NFT_CHAIN_NAT_IPV6=m
CONFIG_NFT_MASQ_IPV6=m
CONFIG_NFT_REDIR_IPV6=m
+CONFIG_NFT_DUP_IPV6=m
+CONFIG_NFT_FIB_IPV6=m
+CONFIG_NF_FLOW_TABLE_IPV6=m
CONFIG_IP6_NF_IPTABLES=m
CONFIG_IP6_NF_MATCH_AH=m
CONFIG_IP6_NF_MATCH_EUI64=m
@@ -252,7 +252,7 @@ CONFIG_IP6_NF_RAW=m
CONFIG_IP6_NF_NAT=m
CONFIG_IP6_NF_TARGET_MASQUERADE=m
CONFIG_IP6_NF_TARGET_NPT=m
-CONFIG_NF_TABLES_BRIDGE=m
+CONFIG_NF_TABLES_BRIDGE=y
CONFIG_NFT_BRIDGE_META=m
CONFIG_NFT_BRIDGE_REJECT=m
CONFIG_NF_LOG_BRIDGE=m
@@ -303,7 +303,6 @@ CONFIG_NET_MPLS_GSO=m
CONFIG_MPLS_ROUTING=m
CONFIG_MPLS_IPTUNNEL=m
CONFIG_NET_NSH=m
-CONFIG_NET_L3_MASTER_DEV=y
CONFIG_AF_KCM=m
# CONFIG_WIRELESS is not set
CONFIG_PSAMPLE=m
@@ -388,6 +387,7 @@ CONFIG_SUN3_82586=y
# CONFIG_NET_VENDOR_MICREL is not set
# CONFIG_NET_VENDOR_NATSEMI is not set
# CONFIG_NET_VENDOR_NETRONOME is not set
+# CONFIG_NET_VENDOR_NI is not set
# CONFIG_NET_VENDOR_QUALCOMM is not set
# CONFIG_NET_VENDOR_RENESAS is not set
# CONFIG_NET_VENDOR_ROCKER is not set
@@ -441,6 +441,7 @@ CONFIG_RTC_CLASS=y
CONFIG_RTC_DRV_GENERIC=m
# CONFIG_VIRTIO_MENU is not set
# CONFIG_IOMMU_SUPPORT is not set
+CONFIG_DAX=m
CONFIG_PROC_HARDWARE=y
CONFIG_EXT4_FS=y
CONFIG_REISERFS_FS=m
@@ -573,6 +574,7 @@ CONFIG_CRYPTO_CRYPTD=m
CONFIG_CRYPTO_MCRYPTD=m
CONFIG_CRYPTO_TEST=m
CONFIG_CRYPTO_CHACHA20POLY1305=m
+CONFIG_CRYPTO_CFB=m
CONFIG_CRYPTO_LRW=m
CONFIG_CRYPTO_PCBC=m
CONFIG_CRYPTO_KEYWRAP=m
@@ -599,6 +601,8 @@ CONFIG_CRYPTO_KHAZAD=m
CONFIG_CRYPTO_SALSA20=m
CONFIG_CRYPTO_SEED=m
CONFIG_CRYPTO_SERPENT=m
+CONFIG_CRYPTO_SM4=m
+CONFIG_CRYPTO_SPECK=m
CONFIG_CRYPTO_TEA=m
CONFIG_CRYPTO_TWOFISH=m
CONFIG_CRYPTO_LZO=m
diff --git a/arch/m68k/configs/sun3x_defconfig b/arch/m68k/configs/sun3x_defconfig
index d911561137fd..a67e54246023 100644
--- a/arch/m68k/configs/sun3x_defconfig
+++ b/arch/m68k/configs/sun3x_defconfig
@@ -91,8 +91,8 @@ CONFIG_NF_CONNTRACK_SANE=m
CONFIG_NF_CONNTRACK_SIP=m
CONFIG_NF_CONNTRACK_TFTP=m
CONFIG_NF_TABLES=m
-CONFIG_NF_TABLES_INET=m
-CONFIG_NF_TABLES_NETDEV=m
+CONFIG_NF_TABLES_INET=y
+CONFIG_NF_TABLES_NETDEV=y
CONFIG_NFT_EXTHDR=m
CONFIG_NFT_META=m
CONFIG_NFT_RT=m
@@ -197,7 +197,7 @@ CONFIG_NF_SOCKET_IPV4=m
CONFIG_NFT_CHAIN_ROUTE_IPV4=m
CONFIG_NFT_DUP_IPV4=m
CONFIG_NFT_FIB_IPV4=m
-CONFIG_NF_TABLES_ARP=m
+CONFIG_NF_TABLES_ARP=y
CONFIG_NF_FLOW_TABLE_IPV4=m
CONFIG_NF_LOG_ARP=m
CONFIG_NFT_CHAIN_NAT_IPV4=m
@@ -226,12 +226,12 @@ CONFIG_IP_NF_ARP_MANGLE=m
CONFIG_NF_CONNTRACK_IPV6=m
CONFIG_NF_SOCKET_IPV6=m
CONFIG_NFT_CHAIN_ROUTE_IPV6=m
-CONFIG_NFT_DUP_IPV6=m
-CONFIG_NFT_FIB_IPV6=m
-CONFIG_NF_FLOW_TABLE_IPV6=m
CONFIG_NFT_CHAIN_NAT_IPV6=m
CONFIG_NFT_MASQ_IPV6=m
CONFIG_NFT_REDIR_IPV6=m
+CONFIG_NFT_DUP_IPV6=m
+CONFIG_NFT_FIB_IPV6=m
+CONFIG_NF_FLOW_TABLE_IPV6=m
CONFIG_IP6_NF_IPTABLES=m
CONFIG_IP6_NF_MATCH_AH=m
CONFIG_IP6_NF_MATCH_EUI64=m
@@ -252,7 +252,7 @@ CONFIG_IP6_NF_RAW=m
CONFIG_IP6_NF_NAT=m
CONFIG_IP6_NF_TARGET_MASQUERADE=m
CONFIG_IP6_NF_TARGET_NPT=m
-CONFIG_NF_TABLES_BRIDGE=m
+CONFIG_NF_TABLES_BRIDGE=y
CONFIG_NFT_BRIDGE_META=m
CONFIG_NFT_BRIDGE_REJECT=m
CONFIG_NF_LOG_BRIDGE=m
@@ -303,7 +303,6 @@ CONFIG_NET_MPLS_GSO=m
CONFIG_MPLS_ROUTING=m
CONFIG_MPLS_IPTUNNEL=m
CONFIG_NET_NSH=m
-CONFIG_NET_L3_MASTER_DEV=y
CONFIG_AF_KCM=m
# CONFIG_WIRELESS is not set
CONFIG_PSAMPLE=m
@@ -389,6 +388,7 @@ CONFIG_SUN3LANCE=y
# CONFIG_NET_VENDOR_MICREL is not set
# CONFIG_NET_VENDOR_NATSEMI is not set
# CONFIG_NET_VENDOR_NETRONOME is not set
+# CONFIG_NET_VENDOR_NI is not set
# CONFIG_NET_VENDOR_QUALCOMM is not set
# CONFIG_NET_VENDOR_RENESAS is not set
# CONFIG_NET_VENDOR_ROCKER is not set
@@ -441,6 +441,7 @@ CONFIG_RTC_CLASS=y
CONFIG_RTC_DRV_GENERIC=m
# CONFIG_VIRTIO_MENU is not set
# CONFIG_IOMMU_SUPPORT is not set
+CONFIG_DAX=m
CONFIG_PROC_HARDWARE=y
CONFIG_EXT4_FS=y
CONFIG_REISERFS_FS=m
@@ -574,6 +575,7 @@ CONFIG_CRYPTO_CRYPTD=m
CONFIG_CRYPTO_MCRYPTD=m
CONFIG_CRYPTO_TEST=m
CONFIG_CRYPTO_CHACHA20POLY1305=m
+CONFIG_CRYPTO_CFB=m
CONFIG_CRYPTO_LRW=m
CONFIG_CRYPTO_PCBC=m
CONFIG_CRYPTO_KEYWRAP=m
@@ -600,6 +602,8 @@ CONFIG_CRYPTO_KHAZAD=m
CONFIG_CRYPTO_SALSA20=m
CONFIG_CRYPTO_SEED=m
CONFIG_CRYPTO_SERPENT=m
+CONFIG_CRYPTO_SM4=m
+CONFIG_CRYPTO_SPECK=m
CONFIG_CRYPTO_TEA=m
CONFIG_CRYPTO_TWOFISH=m
CONFIG_CRYPTO_LZO=m
diff --git a/arch/m68k/include/asm/delay.h b/arch/m68k/include/asm/delay.h
index 7f474121e4ca..751712f8beea 100644
--- a/arch/m68k/include/asm/delay.h
+++ b/arch/m68k/include/asm/delay.h
@@ -49,8 +49,6 @@ extern void __bad_udelay(void);
* The simpler m68k and ColdFire processors do not have a 32*32->64
* multiply instruction. So we need to handle them a little differently.
* We use a bit of shifting and a single 32*32->32 multiply to get close.
- * This is a macro so that the const version can factor out the first
- * multiply and shift.
*/
#define HZSCALE (268435456 / (1000000 / HZ))
@@ -115,6 +113,13 @@ static inline void __udelay(unsigned long usecs)
*/
#define HZSCALE (268435456 / (1000000 / HZ))
-#define ndelay(n) __delay(DIV_ROUND_UP((n) * ((((HZSCALE) >> 11) * (loops_per_jiffy >> 11)) >> 6), 1000))
+static inline void ndelay(unsigned long nsec)
+{
+ __delay(DIV_ROUND_UP(nsec *
+ ((((HZSCALE) >> 11) *
+ (loops_per_jiffy >> 11)) >> 6),
+ 1000));
+}
+#define ndelay(n) ndelay(n)
#endif /* defined(_M68K_DELAY_H) */
diff --git a/arch/m68k/include/asm/pci.h b/arch/m68k/include/asm/pci.h
index ef26fae8cf0b..5a4bc223743b 100644
--- a/arch/m68k/include/asm/pci.h
+++ b/arch/m68k/include/asm/pci.h
@@ -4,12 +4,6 @@
#include <asm-generic/pci.h>
-/* The PCI address space does equal the physical memory
- * address space. The networking and block device layers use
- * this boolean for bounce buffer decisions.
- */
-#define PCI_DMA_BUS_IS_PHYS (1)
-
#define pcibios_assign_all_busses() 1
#define PCIBIOS_MIN_IO 0x00000100
diff --git a/arch/m68k/include/asm/uaccess_mm.h b/arch/m68k/include/asm/uaccess_mm.h
index 75c172e909ac..c4cb889660aa 100644
--- a/arch/m68k/include/asm/uaccess_mm.h
+++ b/arch/m68k/include/asm/uaccess_mm.h
@@ -141,10 +141,12 @@ asm volatile ("\n" \
case 4: \
__get_user_asm(__gu_err, x, ptr, u32, l, r, -EFAULT); \
break; \
-/* case 8: disabled because gcc-4.1 has a broken typeof \
- { \
- const void *__gu_ptr = (ptr); \
- u64 __gu_val; \
+ case 8: { \
+ const void *__gu_ptr = (ptr); \
+ union { \
+ u64 l; \
+ __typeof__(*(ptr)) t; \
+ } __gu_val; \
asm volatile ("\n" \
"1: "MOVES".l (%2)+,%1\n" \
"2: "MOVES".l (%2),%R1\n" \
@@ -162,13 +164,13 @@ asm volatile ("\n" \
" .long 1b,10b\n" \
" .long 2b,10b\n" \
" .previous" \
- : "+d" (__gu_err), "=&r" (__gu_val), \
+ : "+d" (__gu_err), "=&r" (__gu_val.l), \
"+a" (__gu_ptr) \
: "i" (-EFAULT) \
: "memory"); \
- (x) = (__force typeof(*(ptr)))__gu_val; \
+ (x) = __gu_val.t; \
break; \
- } */ \
+ } \
default: \
__gu_err = __get_user_bad(); \
break; \
diff --git a/arch/m68k/kernel/dma.c b/arch/m68k/kernel/dma.c
index c01b9b8f97bf..463572c4943f 100644
--- a/arch/m68k/kernel/dma.c
+++ b/arch/m68k/kernel/dma.c
@@ -9,6 +9,7 @@
#include <linux/dma-mapping.h>
#include <linux/device.h>
#include <linux/kernel.h>
+#include <linux/platform_device.h>
#include <linux/scatterlist.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
@@ -165,3 +166,12 @@ const struct dma_map_ops m68k_dma_ops = {
.sync_sg_for_device = m68k_dma_sync_sg_for_device,
};
EXPORT_SYMBOL(m68k_dma_ops);
+
+void arch_setup_pdev_archdata(struct platform_device *pdev)
+{
+ if (pdev->dev.coherent_dma_mask == DMA_MASK_NONE &&
+ pdev->dev.dma_mask == NULL) {
+ pdev->dev.coherent_dma_mask = DMA_BIT_MASK(32);
+ pdev->dev.dma_mask = &pdev->dev.coherent_dma_mask;
+ }
+}
diff --git a/arch/m68k/kernel/setup_mm.c b/arch/m68k/kernel/setup_mm.c
index dd25bfc22fb4..f35e3ebd6331 100644
--- a/arch/m68k/kernel/setup_mm.c
+++ b/arch/m68k/kernel/setup_mm.c
@@ -527,21 +527,9 @@ static int hardware_proc_show(struct seq_file *m, void *v)
return 0;
}
-static int hardware_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, hardware_proc_show, NULL);
-}
-
-static const struct file_operations hardware_proc_fops = {
- .open = hardware_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
static int __init proc_hardware_init(void)
{
- proc_create("hardware", 0, NULL, &hardware_proc_fops);
+ proc_create_single("hardware", 0, NULL, hardware_proc_show);
return 0;
}
module_init(proc_hardware_init);
diff --git a/arch/m68k/kernel/signal.c b/arch/m68k/kernel/signal.c
index f7cd5ecfacd3..72850b85ecf8 100644
--- a/arch/m68k/kernel/signal.c
+++ b/arch/m68k/kernel/signal.c
@@ -576,41 +576,42 @@ static inline int rt_save_fpu_state(struct ucontext __user *uc, struct pt_regs *
static inline void siginfo_build_tests(void)
{
- /* This needs to be tested on m68k as it has a lesser
- * alignment requirment than x86 and that can cause surprises.
+ /*
+ * This needs to be tested on m68k as it has a lesser
+ * alignment requirement than x86 and that can cause surprises.
*/
/* This is part of the ABI and can never change in size: */
BUILD_BUG_ON(sizeof(siginfo_t) != 128);
- /* Ensure the know fields never change in location */
+ /* Ensure the known fields never change in location */
BUILD_BUG_ON(offsetof(siginfo_t, si_signo) != 0);
BUILD_BUG_ON(offsetof(siginfo_t, si_errno) != 4);
BUILD_BUG_ON(offsetof(siginfo_t, si_code) != 8);
/* _kill */
- BUILD_BUG_ON(offsetof(siginfo_t, si_pid) != 0x0C);
+ BUILD_BUG_ON(offsetof(siginfo_t, si_pid) != 0x0c);
BUILD_BUG_ON(offsetof(siginfo_t, si_uid) != 0x10);
/* _timer */
- BUILD_BUG_ON(offsetof(siginfo_t, si_tid) != 0x0C);
+ BUILD_BUG_ON(offsetof(siginfo_t, si_tid) != 0x0c);
BUILD_BUG_ON(offsetof(siginfo_t, si_overrun) != 0x10);
BUILD_BUG_ON(offsetof(siginfo_t, si_value) != 0x14);
/* _rt */
- BUILD_BUG_ON(offsetof(siginfo_t, si_pid) != 0x0C);
+ BUILD_BUG_ON(offsetof(siginfo_t, si_pid) != 0x0c);
BUILD_BUG_ON(offsetof(siginfo_t, si_uid) != 0x10);
BUILD_BUG_ON(offsetof(siginfo_t, si_value) != 0x14);
/* _sigchld */
- BUILD_BUG_ON(offsetof(siginfo_t, si_pid) != 0x0C);
+ BUILD_BUG_ON(offsetof(siginfo_t, si_pid) != 0x0c);
BUILD_BUG_ON(offsetof(siginfo_t, si_uid) != 0x10);
BUILD_BUG_ON(offsetof(siginfo_t, si_status) != 0x14);
BUILD_BUG_ON(offsetof(siginfo_t, si_utime) != 0x18);
- BUILD_BUG_ON(offsetof(siginfo_t, si_stime) != 0x1C);
+ BUILD_BUG_ON(offsetof(siginfo_t, si_stime) != 0x1c);
/* _sigfault */
- BUILD_BUG_ON(offsetof(siginfo_t, si_addr) != 0x0C);
+ BUILD_BUG_ON(offsetof(siginfo_t, si_addr) != 0x0c);
/* _sigfault._mcerr */
BUILD_BUG_ON(offsetof(siginfo_t, si_addr_lsb) != 0x10);
@@ -623,11 +624,11 @@ static inline void siginfo_build_tests(void)
BUILD_BUG_ON(offsetof(siginfo_t, si_pkey) != 0x12);
/* _sigpoll */
- BUILD_BUG_ON(offsetof(siginfo_t, si_band) != 0x0C);
+ BUILD_BUG_ON(offsetof(siginfo_t, si_band) != 0x0c);
BUILD_BUG_ON(offsetof(siginfo_t, si_fd) != 0x10);
/* _sigsys */
- BUILD_BUG_ON(offsetof(siginfo_t, si_call_addr) != 0x0C);
+ BUILD_BUG_ON(offsetof(siginfo_t, si_call_addr) != 0x0c);
BUILD_BUG_ON(offsetof(siginfo_t, si_syscall) != 0x10);
BUILD_BUG_ON(offsetof(siginfo_t, si_arch) != 0x14);
diff --git a/arch/m68k/kernel/time.c b/arch/m68k/kernel/time.c
index 97dd4e26f234..3a8b47f8f97b 100644
--- a/arch/m68k/kernel/time.c
+++ b/arch/m68k/kernel/time.c
@@ -71,23 +71,26 @@ static irqreturn_t timer_interrupt(int irq, void *dummy)
return IRQ_HANDLED;
}
-void read_persistent_clock(struct timespec *ts)
+#ifdef CONFIG_M68KCLASSIC
+#if !IS_BUILTIN(CONFIG_RTC_DRV_GENERIC)
+void read_persistent_clock64(struct timespec64 *ts)
{
struct rtc_time time;
+
ts->tv_sec = 0;
ts->tv_nsec = 0;
- if (mach_hwclk) {
- mach_hwclk(0, &time);
+ if (!mach_hwclk)
+ return;
- if ((time.tm_year += 1900) < 1970)
- time.tm_year += 100;
- ts->tv_sec = mktime(time.tm_year, time.tm_mon, time.tm_mday,
- time.tm_hour, time.tm_min, time.tm_sec);
- }
+ mach_hwclk(0, &time);
+
+ ts->tv_sec = mktime64(time.tm_year + 1900, time.tm_mon + 1, time.tm_mday,
+ time.tm_hour, time.tm_min, time.tm_sec);
}
+#endif
-#if defined(CONFIG_ARCH_USES_GETTIMEOFFSET) && IS_ENABLED(CONFIG_RTC_DRV_GENERIC)
+#if IS_ENABLED(CONFIG_RTC_DRV_GENERIC)
static int rtc_generic_get_time(struct device *dev, struct rtc_time *tm)
{
mach_hwclk(0, tm);
@@ -145,8 +148,8 @@ static int __init rtc_init(void)
}
module_init(rtc_init);
-
-#endif /* CONFIG_ARCH_USES_GETTIMEOFFSET */
+#endif /* CONFIG_RTC_DRV_GENERIC */
+#endif /* CONFIG M68KCLASSIC */
void __init time_init(void)
{
diff --git a/arch/m68k/kernel/traps.c b/arch/m68k/kernel/traps.c
index c1cc4e99aa94..b2fd000b9285 100644
--- a/arch/m68k/kernel/traps.c
+++ b/arch/m68k/kernel/traps.c
@@ -1007,9 +1007,9 @@ void bad_super_trap (struct frame *fp)
asmlinkage void trap_c(struct frame *fp)
{
- int sig;
+ int sig, si_code;
+ void __user *addr;
int vector = (fp->ptregs.vector >> 2) & 0xff;
- siginfo_t info;
if (fp->ptregs.sr & PS_S) {
if (vector == VEC_TRACE) {
@@ -1029,21 +1029,21 @@ asmlinkage void trap_c(struct frame *fp)
/* send the appropriate signal to the user program */
switch (vector) {
case VEC_ADDRERR:
- info.si_code = BUS_ADRALN;
+ si_code = BUS_ADRALN;
sig = SIGBUS;
break;
case VEC_ILLEGAL:
case VEC_LINE10:
case VEC_LINE11:
- info.si_code = ILL_ILLOPC;
+ si_code = ILL_ILLOPC;
sig = SIGILL;
break;
case VEC_PRIV:
- info.si_code = ILL_PRVOPC;
+ si_code = ILL_PRVOPC;
sig = SIGILL;
break;
case VEC_COPROC:
- info.si_code = ILL_COPROC;
+ si_code = ILL_COPROC;
sig = SIGILL;
break;
case VEC_TRAP1:
@@ -1060,76 +1060,74 @@ asmlinkage void trap_c(struct frame *fp)
case VEC_TRAP12:
case VEC_TRAP13:
case VEC_TRAP14:
- info.si_code = ILL_ILLTRP;
+ si_code = ILL_ILLTRP;
sig = SIGILL;
break;
case VEC_FPBRUC:
case VEC_FPOE:
case VEC_FPNAN:
- info.si_code = FPE_FLTINV;
+ si_code = FPE_FLTINV;
sig = SIGFPE;
break;
case VEC_FPIR:
- info.si_code = FPE_FLTRES;
+ si_code = FPE_FLTRES;
sig = SIGFPE;
break;
case VEC_FPDIVZ:
- info.si_code = FPE_FLTDIV;
+ si_code = FPE_FLTDIV;
sig = SIGFPE;
break;
case VEC_FPUNDER:
- info.si_code = FPE_FLTUND;
+ si_code = FPE_FLTUND;
sig = SIGFPE;
break;
case VEC_FPOVER:
- info.si_code = FPE_FLTOVF;
+ si_code = FPE_FLTOVF;
sig = SIGFPE;
break;
case VEC_ZERODIV:
- info.si_code = FPE_INTDIV;
+ si_code = FPE_INTDIV;
sig = SIGFPE;
break;
case VEC_CHK:
case VEC_TRAP:
- info.si_code = FPE_INTOVF;
+ si_code = FPE_INTOVF;
sig = SIGFPE;
break;
case VEC_TRACE: /* ptrace single step */
- info.si_code = TRAP_TRACE;
+ si_code = TRAP_TRACE;
sig = SIGTRAP;
break;
case VEC_TRAP15: /* breakpoint */
- info.si_code = TRAP_BRKPT;
+ si_code = TRAP_BRKPT;
sig = SIGTRAP;
break;
default:
- info.si_code = ILL_ILLOPC;
+ si_code = ILL_ILLOPC;
sig = SIGILL;
break;
}
- info.si_signo = sig;
- info.si_errno = 0;
switch (fp->ptregs.format) {
default:
- info.si_addr = (void *) fp->ptregs.pc;
+ addr = (void __user *) fp->ptregs.pc;
break;
case 2:
- info.si_addr = (void *) fp->un.fmt2.iaddr;
+ addr = (void __user *) fp->un.fmt2.iaddr;
break;
case 7:
- info.si_addr = (void *) fp->un.fmt7.effaddr;
+ addr = (void __user *) fp->un.fmt7.effaddr;
break;
case 9:
- info.si_addr = (void *) fp->un.fmt9.iaddr;
+ addr = (void __user *) fp->un.fmt9.iaddr;
break;
case 10:
- info.si_addr = (void *) fp->un.fmta.daddr;
+ addr = (void __user *) fp->un.fmta.daddr;
break;
case 11:
- info.si_addr = (void *) fp->un.fmtb.daddr;
+ addr = (void __user*) fp->un.fmtb.daddr;
break;
}
- force_sig_info (sig, &info, current);
+ force_sig_fault(sig, si_code, addr, current);
}
void die_if_kernel (char *str, struct pt_regs *fp, int nr)
@@ -1161,12 +1159,6 @@ asmlinkage void fpsp040_die(void)
#ifdef CONFIG_M68KFPU_EMU
asmlinkage void fpemu_signal(int signal, int code, void *addr)
{
- siginfo_t info;
-
- info.si_signo = signal;
- info.si_errno = 0;
- info.si_code = code;
- info.si_addr = addr;
- force_sig_info(signal, &info, current);
+ force_sig_fault(signal, code, addr, current);
}
#endif
diff --git a/arch/m68k/mac/config.c b/arch/m68k/mac/config.c
index 0c3275aa0197..e522307db47c 100644
--- a/arch/m68k/mac/config.c
+++ b/arch/m68k/mac/config.c
@@ -1005,7 +1005,7 @@ int __init mac_platform_init(void)
struct resource swim_rsrc = {
.flags = IORESOURCE_MEM,
.start = (resource_size_t)swim_base,
- .end = (resource_size_t)swim_base + 0x2000,
+ .end = (resource_size_t)swim_base + 0x1FFF,
};
platform_device_register_simple("swim", -1, &swim_rsrc, 1);
diff --git a/arch/m68k/mm/fault.c b/arch/m68k/mm/fault.c
index 03253c4f8e6a..f2ff3779875a 100644
--- a/arch/m68k/mm/fault.c
+++ b/arch/m68k/mm/fault.c
@@ -21,35 +21,32 @@ extern void die_if_kernel(char *, struct pt_regs *, long);
int send_fault_sig(struct pt_regs *regs)
{
- siginfo_t siginfo;
+ int signo, si_code;
+ void __user *addr;
- clear_siginfo(&siginfo);
- siginfo.si_signo = current->thread.signo;
- siginfo.si_code = current->thread.code;
- siginfo.si_addr = (void *)current->thread.faddr;
- pr_debug("send_fault_sig: %p,%d,%d\n", siginfo.si_addr,
- siginfo.si_signo, siginfo.si_code);
+ signo = current->thread.signo;
+ si_code = current->thread.code;
+ addr = (void __user *)current->thread.faddr;
+ pr_debug("send_fault_sig: %p,%d,%d\n", addr, signo, si_code);
if (user_mode(regs)) {
- force_sig_info(siginfo.si_signo,
- &siginfo, current);
+ force_sig_fault(signo, si_code, addr, current);
} else {
if (fixup_exception(regs))
return -1;
- //if (siginfo.si_signo == SIGBUS)
- // force_sig_info(siginfo.si_signo,
- // &siginfo, current);
+ //if (signo == SIGBUS)
+ // force_sig_fault(si_signo, si_code, addr, current);
/*
* Oops. The kernel tried to access some bad page. We'll have to
* terminate things with extreme prejudice.
*/
- if ((unsigned long)siginfo.si_addr < PAGE_SIZE)
+ if ((unsigned long)addr < PAGE_SIZE)
pr_alert("Unable to handle kernel NULL pointer dereference");
else
pr_alert("Unable to handle kernel access");
- pr_cont(" at virtual address %p\n", siginfo.si_addr);
+ pr_cont(" at virtual address %p\n", addr);
die_if_kernel("Oops", regs, 0 /*error_code*/);
do_exit(SIGKILL);
}
diff --git a/arch/m68k/mm/kmap.c b/arch/m68k/mm/kmap.c
index c2a38321c96d..3b420f6d8822 100644
--- a/arch/m68k/mm/kmap.c
+++ b/arch/m68k/mm/kmap.c
@@ -89,7 +89,8 @@ static inline void free_io_area(void *addr)
for (p = &iolist ; (tmp = *p) ; p = &tmp->next) {
if (tmp->addr == addr) {
*p = tmp->next;
- __iounmap(tmp->addr, tmp->size);
+ /* remove gap added in get_io_area() */
+ __iounmap(tmp->addr, tmp->size - IO_SIZE);
kfree(tmp);
return;
}
diff --git a/arch/m68k/mvme147/config.c b/arch/m68k/mvme147/config.c
index 8778612d1f31..f8a710fd84cd 100644
--- a/arch/m68k/mvme147/config.c
+++ b/arch/m68k/mvme147/config.c
@@ -153,12 +153,14 @@ int mvme147_hwclk(int op, struct rtc_time *t)
if (!op) {
m147_rtc->ctrl = RTC_READ;
t->tm_year = bcd2int (m147_rtc->bcd_year);
- t->tm_mon = bcd2int (m147_rtc->bcd_mth);
+ t->tm_mon = bcd2int(m147_rtc->bcd_mth) - 1;
t->tm_mday = bcd2int (m147_rtc->bcd_dom);
t->tm_hour = bcd2int (m147_rtc->bcd_hr);
t->tm_min = bcd2int (m147_rtc->bcd_min);
t->tm_sec = bcd2int (m147_rtc->bcd_sec);
m147_rtc->ctrl = 0;
+ if (t->tm_year < 70)
+ t->tm_year += 100;
}
return 0;
}
diff --git a/arch/m68k/mvme16x/config.c b/arch/m68k/mvme16x/config.c
index 6fa06d4d16bf..4ffd9ef98de4 100644
--- a/arch/m68k/mvme16x/config.c
+++ b/arch/m68k/mvme16x/config.c
@@ -400,12 +400,14 @@ int mvme16x_hwclk(int op, struct rtc_time *t)
if (!op) {
rtc->ctrl = RTC_READ;
t->tm_year = bcd2int (rtc->bcd_year);
- t->tm_mon = bcd2int (rtc->bcd_mth);
+ t->tm_mon = bcd2int(rtc->bcd_mth) - 1;
t->tm_mday = bcd2int (rtc->bcd_dom);
t->tm_hour = bcd2int (rtc->bcd_hr);
t->tm_min = bcd2int (rtc->bcd_min);
t->tm_sec = bcd2int (rtc->bcd_sec);
rtc->ctrl = 0;
+ if (t->tm_year < 70)
+ t->tm_year += 100;
}
return 0;
}
diff --git a/arch/m68k/sun3/intersil.c b/arch/m68k/sun3/intersil.c
index 2cd0bcbe6f30..d911070af02a 100644
--- a/arch/m68k/sun3/intersil.c
+++ b/arch/m68k/sun3/intersil.c
@@ -48,9 +48,9 @@ int sun3_hwclk(int set, struct rtc_time *t)
todintersil->hour = t->tm_hour;
todintersil->minute = t->tm_min;
todintersil->second = t->tm_sec;
- todintersil->month = t->tm_mon;
+ todintersil->month = t->tm_mon + 1;
todintersil->day = t->tm_mday;
- todintersil->year = t->tm_year - 68;
+ todintersil->year = (t->tm_year - 68) % 100;
todintersil->weekday = t->tm_wday;
} else {
/* read clock */
@@ -58,10 +58,12 @@ int sun3_hwclk(int set, struct rtc_time *t)
t->tm_hour = todintersil->hour;
t->tm_min = todintersil->minute;
t->tm_sec = todintersil->second;
- t->tm_mon = todintersil->month;
+ t->tm_mon = todintersil->month - 1;
t->tm_mday = todintersil->day;
t->tm_year = todintersil->year + 68;
t->tm_wday = todintersil->weekday;
+ if (t->tm_year < 70)
+ t->tm_year += 100;
}
intersil_clock->cmd_reg = START_VAL;
diff --git a/arch/m68k/sun3x/time.c b/arch/m68k/sun3x/time.c
index 7a2c53d9f779..047e2bcee3d7 100644
--- a/arch/m68k/sun3x/time.c
+++ b/arch/m68k/sun3x/time.c
@@ -52,8 +52,8 @@ int sun3x_hwclk(int set, struct rtc_time *t)
h->hour = bin2bcd(t->tm_hour);
h->wday = bin2bcd(t->tm_wday);
h->mday = bin2bcd(t->tm_mday);
- h->month = bin2bcd(t->tm_mon);
- h->year = bin2bcd(t->tm_year);
+ h->month = bin2bcd(t->tm_mon + 1);
+ h->year = bin2bcd(t->tm_year % 100);
h->csr &= ~C_WRITE;
} else {
h->csr |= C_READ;
@@ -62,9 +62,11 @@ int sun3x_hwclk(int set, struct rtc_time *t)
t->tm_hour = bcd2bin(h->hour);
t->tm_wday = bcd2bin(h->wday);
t->tm_mday = bcd2bin(h->mday);
- t->tm_mon = bcd2bin(h->month);
+ t->tm_mon = bcd2bin(h->month) - 1;
t->tm_year = bcd2bin(h->year);
h->csr &= ~C_READ;
+ if (t->tm_year < 70)
+ t->tm_year += 100;
}
local_irq_restore(flags);
diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig
index 3817a3e2146c..d14782100088 100644
--- a/arch/microblaze/Kconfig
+++ b/arch/microblaze/Kconfig
@@ -19,7 +19,6 @@ config MICROBLAZE
select HAVE_ARCH_HASH
select HAVE_ARCH_KGDB
select HAVE_DEBUG_KMEMLEAK
- select HAVE_DMA_API_DEBUG
select HAVE_DYNAMIC_FTRACE
select HAVE_FTRACE_MCOUNT_RECORD
select HAVE_FUNCTION_GRAPH_TRACER
diff --git a/arch/microblaze/include/asm/pci.h b/arch/microblaze/include/asm/pci.h
index 5de871eb4a59..00337861472e 100644
--- a/arch/microblaze/include/asm/pci.h
+++ b/arch/microblaze/include/asm/pci.h
@@ -62,12 +62,6 @@ extern int pci_mmap_legacy_page_range(struct pci_bus *bus,
#define HAVE_PCI_LEGACY 1
-/* The PCI address space does equal the physical memory
- * address space (no IOMMU). The IDE and SCSI device layers use
- * this boolean for bounce buffer decisions.
- */
-#define PCI_DMA_BUS_IS_PHYS (1)
-
extern void pcibios_claim_one_bus(struct pci_bus *b);
extern void pcibios_finish_adding_to_bus(struct pci_bus *bus);
diff --git a/arch/microblaze/kernel/dma.c b/arch/microblaze/kernel/dma.c
index c91e8cef98dd..3145e7dc8ab1 100644
--- a/arch/microblaze/kernel/dma.c
+++ b/arch/microblaze/kernel/dma.c
@@ -184,14 +184,3 @@ const struct dma_map_ops dma_nommu_ops = {
.sync_sg_for_device = dma_nommu_sync_sg_for_device,
};
EXPORT_SYMBOL(dma_nommu_ops);
-
-/* Number of entries preallocated for DMA-API debugging */
-#define PREALLOC_DMA_DEBUG_ENTRIES (1 << 16)
-
-static int __init dma_init(void)
-{
- dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES);
-
- return 0;
-}
-fs_initcall(dma_init);
diff --git a/arch/microblaze/kernel/exceptions.c b/arch/microblaze/kernel/exceptions.c
index e6f338d0496b..eafff21fcb0e 100644
--- a/arch/microblaze/kernel/exceptions.c
+++ b/arch/microblaze/kernel/exceptions.c
@@ -60,16 +60,10 @@ asmlinkage void sw_exception(struct pt_regs *regs)
void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr)
{
- siginfo_t info;
-
if (kernel_mode(regs))
die("Exception in kernel mode", regs, signr);
- info.si_signo = signr;
- info.si_errno = 0;
- info.si_code = code;
- info.si_addr = (void __user *) addr;
- force_sig_info(signr, &info, current);
+ force_sig_fault(signr, code, (void __user *)addr, current);
}
asmlinkage void full_exception(struct pt_regs *regs, unsigned int type,
diff --git a/arch/microblaze/mm/fault.c b/arch/microblaze/mm/fault.c
index f91b30f8aaa8..af607447c683 100644
--- a/arch/microblaze/mm/fault.c
+++ b/arch/microblaze/mm/fault.c
@@ -88,7 +88,6 @@ void do_page_fault(struct pt_regs *regs, unsigned long address,
{
struct vm_area_struct *vma;
struct mm_struct *mm = current->mm;
- siginfo_t info;
int code = SEGV_MAPERR;
int is_write = error_code & ESR_S;
int fault;
@@ -269,11 +268,6 @@ bad_area_nosemaphore:
/* User mode accesses cause a SIGSEGV */
if (user_mode(regs)) {
_exception(SIGSEGV, regs, code, address);
-/* info.si_signo = SIGSEGV;
- info.si_errno = 0;
- info.si_code = code;
- info.si_addr = (void *) address;
- force_sig_info(SIGSEGV, &info, current);*/
return;
}
@@ -295,11 +289,7 @@ out_of_memory:
do_sigbus:
up_read(&mm->mmap_sem);
if (user_mode(regs)) {
- info.si_signo = SIGBUS;
- info.si_errno = 0;
- info.si_code = BUS_ADRERR;
- info.si_addr = (void __user *)address;
- force_sig_info(SIGBUS, &info, current);
+ force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address, current);
return;
}
bad_page_fault(regs, address, SIGBUS);
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 225c95da23ce..7074b2215f36 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -42,7 +42,6 @@ config MIPS
select HAVE_C_RECORDMCOUNT
select HAVE_DEBUG_KMEMLEAK
select HAVE_DEBUG_STACKOVERFLOW
- select HAVE_DMA_API_DEBUG
select HAVE_DMA_CONTIGUOUS
select HAVE_DYNAMIC_FTRACE
select HAVE_EXIT_THREAD
@@ -132,7 +131,7 @@ config MIPS_GENERIC
config MIPS_ALCHEMY
bool "Alchemy processor based machines"
- select ARCH_PHYS_ADDR_T_64BIT
+ select PHYS_ADDR_T_64BIT
select CEVT_R4K
select CSRC_R4K
select IRQ_MIPS_CPU
@@ -890,7 +889,7 @@ config CAVIUM_OCTEON_SOC
bool "Cavium Networks Octeon SoC based boards"
select CEVT_R4K
select ARCH_HAS_PHYS_TO_DMA
- select ARCH_PHYS_ADDR_T_64BIT
+ select PHYS_ADDR_T_64BIT
select DMA_COHERENT
select SYS_SUPPORTS_64BIT_KERNEL
select SYS_SUPPORTS_BIG_ENDIAN
@@ -912,6 +911,7 @@ config CAVIUM_OCTEON_SOC
select MIPS_NR_CPU_NR_MAP_1024
select BUILTIN_DTB
select MTD_COMPLEX_MAPPINGS
+ select SWIOTLB
select SYS_SUPPORTS_RELOCATABLE
help
This option supports all of the Octeon reference boards from Cavium
@@ -936,7 +936,7 @@ config NLM_XLR_BOARD
select SWAP_IO_SPACE
select SYS_SUPPORTS_32BIT_KERNEL
select SYS_SUPPORTS_64BIT_KERNEL
- select ARCH_PHYS_ADDR_T_64BIT
+ select PHYS_ADDR_T_64BIT
select SYS_SUPPORTS_BIG_ENDIAN
select SYS_SUPPORTS_HIGHMEM
select DMA_COHERENT
@@ -962,7 +962,7 @@ config NLM_XLP_BOARD
select HW_HAS_PCI
select SYS_SUPPORTS_32BIT_KERNEL
select SYS_SUPPORTS_64BIT_KERNEL
- select ARCH_PHYS_ADDR_T_64BIT
+ select PHYS_ADDR_T_64BIT
select GPIOLIB
select SYS_SUPPORTS_BIG_ENDIAN
select SYS_SUPPORTS_LITTLE_ENDIAN
@@ -1101,9 +1101,6 @@ config GPIO_TXX9
config FW_CFE
bool
-config ARCH_DMA_ADDR_T_64BIT
- def_bool (HIGHMEM && ARCH_PHYS_ADDR_T_64BIT) || 64BIT
-
config ARCH_SUPPORTS_UPROBES
bool
@@ -1122,9 +1119,6 @@ config DMA_NONCOHERENT
bool
select NEED_DMA_MAP_STATE
-config NEED_DMA_MAP_STATE
- bool
-
config SYS_HAS_EARLY_PRINTK
bool
@@ -1373,6 +1367,7 @@ config CPU_LOONGSON3
select MIPS_PGD_C0_CONTEXT
select MIPS_L1_CACHE_SHIFT_6
select GPIOLIB
+ select SWIOTLB
help
The Loongson 3 processor implements the MIPS64R2 instruction
set with many extensions.
@@ -1770,7 +1765,7 @@ config CPU_MIPS32_R5_XPA
depends on SYS_SUPPORTS_HIGHMEM
select XPA
select HIGHMEM
- select ARCH_PHYS_ADDR_T_64BIT
+ select PHYS_ADDR_T_64BIT
default n
help
Choose this option if you want to enable the Extended Physical
@@ -2402,9 +2397,6 @@ config SB1_PASS_2_1_WORKAROUNDS
default y
-config ARCH_PHYS_ADDR_T_64BIT
- bool
-
choice
prompt "SmartMIPS or microMIPS ASE support"
@@ -2556,7 +2548,7 @@ config ARCH_DISCONTIGMEM_ENABLE
Say Y to support efficient handling of discontiguous physical memory,
for architectures which are either NUMA (Non-Uniform Memory Access)
or have huge holes in the physical address space for other reasons.
- See <file:Documentation/vm/numa> for more.
+ See <file:Documentation/vm/numa.rst> for more.
config ARCH_SPARSEMEM_ENABLE
bool
diff --git a/arch/mips/boot/compressed/uart-16550.c b/arch/mips/boot/compressed/uart-16550.c
index b3043c08f769..aee8d7b8f091 100644
--- a/arch/mips/boot/compressed/uart-16550.c
+++ b/arch/mips/boot/compressed/uart-16550.c
@@ -18,9 +18,9 @@
#define PORT(offset) (CKSEG1ADDR(AR7_REGS_UART0) + (4 * offset))
#endif
-#if defined(CONFIG_MACH_JZ4740) || defined(CONFIG_MACH_JZ4780)
-#include <asm/mach-jz4740/base.h>
-#define PORT(offset) (CKSEG1ADDR(JZ4740_UART0_BASE_ADDR) + (4 * offset))
+#ifdef CONFIG_MACH_INGENIC
+#define INGENIC_UART0_BASE_ADDR 0x10030000
+#define PORT(offset) (CKSEG1ADDR(INGENIC_UART0_BASE_ADDR) + (4 * offset))
#endif
#ifdef CONFIG_CPU_XLR
diff --git a/arch/mips/boot/dts/xilfpga/Makefile b/arch/mips/boot/dts/xilfpga/Makefile
index 9987e0e378c5..69ca00590b8d 100644
--- a/arch/mips/boot/dts/xilfpga/Makefile
+++ b/arch/mips/boot/dts/xilfpga/Makefile
@@ -1,4 +1,2 @@
# SPDX-License-Identifier: GPL-2.0
dtb-$(CONFIG_FIT_IMAGE_FDT_XILFPGA) += nexys4ddr.dtb
-
-obj-y += $(patsubst %.dtb, %.dtb.o, $(dtb-y))
diff --git a/arch/mips/cavium-octeon/Kconfig b/arch/mips/cavium-octeon/Kconfig
index b5eee1a57d6c..4984e462be30 100644
--- a/arch/mips/cavium-octeon/Kconfig
+++ b/arch/mips/cavium-octeon/Kconfig
@@ -67,18 +67,6 @@ config CAVIUM_OCTEON_LOCK_L2_MEMCPY
help
Lock the kernel's implementation of memcpy() into L2.
-config IOMMU_HELPER
- bool
-
-config NEED_SG_DMA_LENGTH
- bool
-
-config SWIOTLB
- def_bool y
- select DMA_DIRECT_OPS
- select IOMMU_HELPER
- select NEED_SG_DMA_LENGTH
-
config OCTEON_ILM
tristate "Module to measure interrupt latency using Octeon CIU Timer"
help
diff --git a/arch/mips/generic/Platform b/arch/mips/generic/Platform
index b51432dd10b6..0dd0d5d460a5 100644
--- a/arch/mips/generic/Platform
+++ b/arch/mips/generic/Platform
@@ -16,3 +16,4 @@ all-$(CONFIG_MIPS_GENERIC) := vmlinux.gz.itb
its-y := vmlinux.its.S
its-$(CONFIG_FIT_IMAGE_FDT_BOSTON) += board-boston.its.S
its-$(CONFIG_FIT_IMAGE_FDT_NI169445) += board-ni169445.its.S
+its-$(CONFIG_FIT_IMAGE_FDT_XILFPGA) += board-xilfpga.its.S
diff --git a/arch/mips/include/asm/pci.h b/arch/mips/include/asm/pci.h
index 2339f42f047a..436099883022 100644
--- a/arch/mips/include/asm/pci.h
+++ b/arch/mips/include/asm/pci.h
@@ -121,13 +121,6 @@ extern unsigned long PCIBIOS_MIN_MEM;
#include <linux/string.h>
#include <asm/io.h>
-/*
- * The PCI address space does equal the physical memory address space.
- * The networking and block device layers use this boolean for bounce
- * buffer decisions.
- */
-#define PCI_DMA_BUS_IS_PHYS (1)
-
#ifdef CONFIG_PCI_DOMAINS_GENERIC
static inline int pci_proc_domain(struct pci_bus *bus)
{
diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c
index b9e9bf628849..3775a8d694fb 100644
--- a/arch/mips/kernel/process.c
+++ b/arch/mips/kernel/process.c
@@ -721,6 +721,10 @@ int mips_set_process_fp_mode(struct task_struct *task, unsigned int value)
if (value & ~known_bits)
return -EOPNOTSUPP;
+ /* Setting FRE without FR is not supported. */
+ if ((value & (PR_FP_MODE_FR | PR_FP_MODE_FRE)) == PR_FP_MODE_FRE)
+ return -EOPNOTSUPP;
+
/* Avoid inadvertently triggering emulation */
if ((value & PR_FP_MODE_FR) && raw_cpu_has_fpu &&
!(raw_current_cpu_data.fpu_id & MIPS_FPIR_F64))
diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c
index 0b23b1ad99e6..0c0c23c9c9f5 100644
--- a/arch/mips/kernel/ptrace.c
+++ b/arch/mips/kernel/ptrace.c
@@ -463,7 +463,7 @@ static int fpr_get_msa(struct task_struct *target,
/*
* Copy the floating-point context to the supplied NT_PRFPREG buffer.
* Choose the appropriate helper for general registers, and then copy
- * the FCSR register separately.
+ * the FCSR and FIR registers separately.
*/
static int fpr_get(struct task_struct *target,
const struct user_regset *regset,
@@ -471,6 +471,7 @@ static int fpr_get(struct task_struct *target,
void *kbuf, void __user *ubuf)
{
const int fcr31_pos = NUM_FPU_REGS * sizeof(elf_fpreg_t);
+ const int fir_pos = fcr31_pos + sizeof(u32);
int err;
if (sizeof(target->thread.fpu.fpr[0]) == sizeof(elf_fpreg_t))
@@ -483,6 +484,12 @@ static int fpr_get(struct task_struct *target,
err = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
&target->thread.fpu.fcr31,
fcr31_pos, fcr31_pos + sizeof(u32));
+ if (err)
+ return err;
+
+ err = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+ &boot_cpu_data.fpu_id,
+ fir_pos, fir_pos + sizeof(u32));
return err;
}
@@ -531,7 +538,8 @@ static int fpr_set_msa(struct task_struct *target,
/*
* Copy the supplied NT_PRFPREG buffer to the floating-point context.
* Choose the appropriate helper for general registers, and then copy
- * the FCSR register separately.
+ * the FCSR register separately. Ignore the incoming FIR register
+ * contents though, as the register is read-only.
*
* We optimize for the case where `count % sizeof(elf_fpreg_t) == 0',
* which is supposed to have been guaranteed by the kernel before
@@ -545,6 +553,7 @@ static int fpr_set(struct task_struct *target,
const void *kbuf, const void __user *ubuf)
{
const int fcr31_pos = NUM_FPU_REGS * sizeof(elf_fpreg_t);
+ const int fir_pos = fcr31_pos + sizeof(u32);
u32 fcr31;
int err;
@@ -572,6 +581,11 @@ static int fpr_set(struct task_struct *target,
ptrace_setfcr31(target, fcr31);
}
+ if (count > 0)
+ err = user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf,
+ fir_pos,
+ fir_pos + sizeof(u32));
+
return err;
}
@@ -793,7 +807,7 @@ long arch_ptrace(struct task_struct *child, long request,
fregs = get_fpu_regs(child);
#ifdef CONFIG_32BIT
- if (test_thread_flag(TIF_32BIT_FPREGS)) {
+ if (test_tsk_thread_flag(child, TIF_32BIT_FPREGS)) {
/*
* The odd registers are actually the high
* order bits of the values stored in the even
@@ -804,7 +818,7 @@ long arch_ptrace(struct task_struct *child, long request,
break;
}
#endif
- tmp = get_fpr32(&fregs[addr - FPR_BASE], 0);
+ tmp = get_fpr64(&fregs[addr - FPR_BASE], 0);
break;
case PC:
tmp = regs->cp0_epc;
@@ -888,7 +902,7 @@ long arch_ptrace(struct task_struct *child, long request,
init_fp_ctx(child);
#ifdef CONFIG_32BIT
- if (test_thread_flag(TIF_32BIT_FPREGS)) {
+ if (test_tsk_thread_flag(child, TIF_32BIT_FPREGS)) {
/*
* The odd registers are actually the high
* order bits of the values stored in the even
diff --git a/arch/mips/kernel/ptrace32.c b/arch/mips/kernel/ptrace32.c
index 2b9260f92ccd..f30c381d3e1c 100644
--- a/arch/mips/kernel/ptrace32.c
+++ b/arch/mips/kernel/ptrace32.c
@@ -99,7 +99,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
break;
}
fregs = get_fpu_regs(child);
- if (test_thread_flag(TIF_32BIT_FPREGS)) {
+ if (test_tsk_thread_flag(child, TIF_32BIT_FPREGS)) {
/*
* The odd registers are actually the high
* order bits of the values stored in the even
@@ -109,7 +109,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
addr & 1);
break;
}
- tmp = get_fpr32(&fregs[addr - FPR_BASE], 0);
+ tmp = get_fpr64(&fregs[addr - FPR_BASE], 0);
break;
case PC:
tmp = regs->cp0_epc;
@@ -212,7 +212,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
sizeof(child->thread.fpu));
child->thread.fpu.fcr31 = 0;
}
- if (test_thread_flag(TIF_32BIT_FPREGS)) {
+ if (test_tsk_thread_flag(child, TIF_32BIT_FPREGS)) {
/*
* The odd registers are actually the high
* order bits of the values stored in the even
diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
index 967e9e4e795e..d67fa74622ee 100644
--- a/arch/mips/kernel/traps.c
+++ b/arch/mips/kernel/traps.c
@@ -699,17 +699,11 @@ static int simulate_sync(struct pt_regs *regs, unsigned int opcode)
asmlinkage void do_ov(struct pt_regs *regs)
{
enum ctx_state prev_state;
- siginfo_t info;
-
- clear_siginfo(&info);
- info.si_signo = SIGFPE;
- info.si_code = FPE_INTOVF;
- info.si_addr = (void __user *)regs->cp0_epc;
prev_state = exception_enter();
die_if_kernel("Integer overflow", regs);
- force_sig_info(SIGFPE, &info, current);
+ force_sig_fault(SIGFPE, FPE_INTOVF, (void __user *)regs->cp0_epc, current);
exception_exit(prev_state);
}
@@ -722,32 +716,27 @@ asmlinkage void do_ov(struct pt_regs *regs)
void force_fcr31_sig(unsigned long fcr31, void __user *fault_addr,
struct task_struct *tsk)
{
- struct siginfo si;
-
- clear_siginfo(&si);
- si.si_addr = fault_addr;
- si.si_signo = SIGFPE;
+ int si_code = FPE_FLTUNK;
if (fcr31 & FPU_CSR_INV_X)
- si.si_code = FPE_FLTINV;
+ si_code = FPE_FLTINV;
else if (fcr31 & FPU_CSR_DIV_X)
- si.si_code = FPE_FLTDIV;
+ si_code = FPE_FLTDIV;
else if (fcr31 & FPU_CSR_OVF_X)
- si.si_code = FPE_FLTOVF;
+ si_code = FPE_FLTOVF;
else if (fcr31 & FPU_CSR_UDF_X)
- si.si_code = FPE_FLTUND;
+ si_code = FPE_FLTUND;
else if (fcr31 & FPU_CSR_INE_X)
- si.si_code = FPE_FLTRES;
+ si_code = FPE_FLTRES;
- force_sig_info(SIGFPE, &si, tsk);
+ force_sig_fault(SIGFPE, si_code, fault_addr, tsk);
}
int process_fpemu_return(int sig, void __user *fault_addr, unsigned long fcr31)
{
- struct siginfo si;
+ int si_code;
struct vm_area_struct *vma;
- clear_siginfo(&si);
switch (sig) {
case 0:
return 0;
@@ -757,23 +746,18 @@ int process_fpemu_return(int sig, void __user *fault_addr, unsigned long fcr31)
return 1;
case SIGBUS:
- si.si_addr = fault_addr;
- si.si_signo = sig;
- si.si_code = BUS_ADRERR;
- force_sig_info(sig, &si, current);
+ force_sig_fault(SIGBUS, BUS_ADRERR, fault_addr, current);
return 1;
case SIGSEGV:
- si.si_addr = fault_addr;
- si.si_signo = sig;
down_read(&current->mm->mmap_sem);
vma = find_vma(current->mm, (unsigned long)fault_addr);
if (vma && (vma->vm_start <= (unsigned long)fault_addr))
- si.si_code = SEGV_ACCERR;
+ si_code = SEGV_ACCERR;
else
- si.si_code = SEGV_MAPERR;
+ si_code = SEGV_MAPERR;
up_read(&current->mm->mmap_sem);
- force_sig_info(sig, &si, current);
+ force_sig_fault(SIGSEGV, si_code, fault_addr, current);
return 1;
default:
@@ -896,10 +880,8 @@ out:
void do_trap_or_bp(struct pt_regs *regs, unsigned int code, int si_code,
const char *str)
{
- siginfo_t info;
char b[40];
- clear_siginfo(&info);
#ifdef CONFIG_KGDB_LOW_LEVEL_TRAP
if (kgdb_ll_trap(DIE_TRAP, str, regs, code, current->thread.trap_nr,
SIGTRAP) == NOTIFY_STOP)
@@ -921,13 +903,9 @@ void do_trap_or_bp(struct pt_regs *regs, unsigned int code, int si_code,
case BRK_DIVZERO:
scnprintf(b, sizeof(b), "%s instruction in kernel code", str);
die_if_kernel(b, regs);
- if (code == BRK_DIVZERO)
- info.si_code = FPE_INTDIV;
- else
- info.si_code = FPE_INTOVF;
- info.si_signo = SIGFPE;
- info.si_addr = (void __user *) regs->cp0_epc;
- force_sig_info(SIGFPE, &info, current);
+ force_sig_fault(SIGFPE,
+ code == BRK_DIVZERO ? FPE_INTDIV : FPE_INTOVF,
+ (void __user *) regs->cp0_epc, current);
break;
case BRK_BUG:
die_if_kernel("Kernel bug detected", regs);
@@ -952,9 +930,7 @@ void do_trap_or_bp(struct pt_regs *regs, unsigned int code, int si_code,
scnprintf(b, sizeof(b), "%s instruction in kernel code", str);
die_if_kernel(b, regs);
if (si_code) {
- info.si_signo = SIGTRAP;
- info.si_code = si_code;
- force_sig_info(SIGTRAP, &info, current);
+ force_sig_fault(SIGTRAP, si_code, NULL, current);
} else {
force_sig(SIGTRAP, current);
}
@@ -1506,13 +1482,8 @@ asmlinkage void do_mdmx(struct pt_regs *regs)
*/
asmlinkage void do_watch(struct pt_regs *regs)
{
- siginfo_t info;
enum ctx_state prev_state;
- clear_siginfo(&info);
- info.si_signo = SIGTRAP;
- info.si_code = TRAP_HWBKPT;
-
prev_state = exception_enter();
/*
* Clear WP (bit 22) bit of cause register so we don't loop
@@ -1528,7 +1499,7 @@ asmlinkage void do_watch(struct pt_regs *regs)
if (test_tsk_thread_flag(current, TIF_LOAD_WATCH)) {
mips_read_watch_registers();
local_irq_enable();
- force_sig_info(SIGTRAP, &info, current);
+ force_sig_fault(SIGTRAP, TRAP_HWBKPT, NULL, current);
} else {
mips_clear_watch_registers();
local_irq_enable();
diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c
index 2549fdd27ee1..0f725e9cee8f 100644
--- a/arch/mips/kvm/mips.c
+++ b/arch/mips/kvm/mips.c
@@ -45,7 +45,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
{ "cache", VCPU_STAT(cache_exits), KVM_STAT_VCPU },
{ "signal", VCPU_STAT(signal_exits), KVM_STAT_VCPU },
{ "interrupt", VCPU_STAT(int_exits), KVM_STAT_VCPU },
- { "cop_unsuable", VCPU_STAT(cop_unusable_exits), KVM_STAT_VCPU },
+ { "cop_unusable", VCPU_STAT(cop_unusable_exits), KVM_STAT_VCPU },
{ "tlbmod", VCPU_STAT(tlbmod_exits), KVM_STAT_VCPU },
{ "tlbmiss_ld", VCPU_STAT(tlbmiss_ld_exits), KVM_STAT_VCPU },
{ "tlbmiss_st", VCPU_STAT(tlbmiss_st_exits), KVM_STAT_VCPU },
diff --git a/arch/mips/loongson64/Kconfig b/arch/mips/loongson64/Kconfig
index 72af0c183969..c79e6a565572 100644
--- a/arch/mips/loongson64/Kconfig
+++ b/arch/mips/loongson64/Kconfig
@@ -130,21 +130,6 @@ config LOONGSON_UART_BASE
default y
depends on EARLY_PRINTK || SERIAL_8250
-config IOMMU_HELPER
- bool
-
-config NEED_SG_DMA_LENGTH
- bool
-
-config SWIOTLB
- bool "Soft IOMMU Support for All-Memory DMA"
- default y
- depends on CPU_LOONGSON3
- select DMA_DIRECT_OPS
- select IOMMU_HELPER
- select NEED_SG_DMA_LENGTH
- select NEED_DMA_MAP_STATE
-
config PHYS48_TO_HT40
bool
default y if CPU_LOONGSON3
diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c
index 6f534b209971..e12dfa48b478 100644
--- a/arch/mips/mm/c-r4k.c
+++ b/arch/mips/mm/c-r4k.c
@@ -851,9 +851,12 @@ static void r4k_dma_cache_wback_inv(unsigned long addr, unsigned long size)
/*
* Either no secondary cache or the available caches don't have the
* subset property so we have to flush the primary caches
- * explicitly
+ * explicitly.
+ * If we would need IPI to perform an INDEX-type operation, then
+ * we have to use the HIT-type alternative as IPI cannot be used
+ * here due to interrupts possibly being disabled.
*/
- if (size >= dcache_size) {
+ if (!r4k_op_needs_ipi(R4K_INDEX) && size >= dcache_size) {
r4k_blast_dcache();
} else {
R4600_HIT_CACHEOP_WAR_IMPL;
@@ -890,7 +893,7 @@ static void r4k_dma_cache_inv(unsigned long addr, unsigned long size)
return;
}
- if (size >= dcache_size) {
+ if (!r4k_op_needs_ipi(R4K_INDEX) && size >= dcache_size) {
r4k_blast_dcache();
} else {
R4600_HIT_CACHEOP_WAR_IMPL;
diff --git a/arch/mips/mm/dma-default.c b/arch/mips/mm/dma-default.c
index dcafa43613b6..f9fef0028ca2 100644
--- a/arch/mips/mm/dma-default.c
+++ b/arch/mips/mm/dma-default.c
@@ -402,13 +402,3 @@ static const struct dma_map_ops mips_default_dma_map_ops = {
const struct dma_map_ops *mips_dma_map_ops = &mips_default_dma_map_ops;
EXPORT_SYMBOL(mips_dma_map_ops);
-
-#define PREALLOC_DMA_DEBUG_ENTRIES (1 << 16)
-
-static int __init mips_dma_init(void)
-{
- dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES);
-
- return 0;
-}
-fs_initcall(mips_dma_init);
diff --git a/arch/mips/mm/fault.c b/arch/mips/mm/fault.c
index 4f8f5bf46977..5f71f2b903b7 100644
--- a/arch/mips/mm/fault.c
+++ b/arch/mips/mm/fault.c
@@ -42,7 +42,7 @@ static void __kprobes __do_page_fault(struct pt_regs *regs, unsigned long write,
struct task_struct *tsk = current;
struct mm_struct *mm = tsk->mm;
const int field = sizeof(unsigned long) * 2;
- siginfo_t info;
+ int si_code;
int fault;
unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
@@ -63,7 +63,7 @@ static void __kprobes __do_page_fault(struct pt_regs *regs, unsigned long write,
return;
#endif
- info.si_code = SEGV_MAPERR;
+ si_code = SEGV_MAPERR;
/*
* We fault-in kernel-space virtual memory on-demand. The
@@ -112,7 +112,7 @@ retry:
* we can handle it..
*/
good_area:
- info.si_code = SEGV_ACCERR;
+ si_code = SEGV_ACCERR;
if (write) {
if (!(vma->vm_flags & VM_WRITE))
@@ -223,11 +223,7 @@ bad_area_nosemaphore:
pr_cont("\n");
}
current->thread.trap_nr = (regs->cp0_cause >> 2) & 0x1f;
- info.si_signo = SIGSEGV;
- info.si_errno = 0;
- /* info.si_code has been set above */
- info.si_addr = (void __user *) address;
- force_sig_info(SIGSEGV, &info, tsk);
+ force_sig_fault(SIGSEGV, si_code, (void __user *)address, tsk);
return;
}
@@ -283,11 +279,7 @@ do_sigbus:
#endif
current->thread.trap_nr = (regs->cp0_cause >> 2) & 0x1f;
tsk->thread.cp0_badvaddr = address;
- info.si_signo = SIGBUS;
- info.si_errno = 0;
- info.si_code = BUS_ADRERR;
- info.si_addr = (void __user *) address;
- force_sig_info(SIGBUS, &info, tsk);
+ force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address, tsk);
return;
#ifndef CONFIG_64BIT
diff --git a/arch/mips/netlogic/Kconfig b/arch/mips/netlogic/Kconfig
index 7fcfc7fe9f14..412351c5acc6 100644
--- a/arch/mips/netlogic/Kconfig
+++ b/arch/mips/netlogic/Kconfig
@@ -83,10 +83,4 @@ endif
config NLM_COMMON
bool
-config IOMMU_HELPER
- bool
-
-config NEED_SG_DMA_LENGTH
- bool
-
endif
diff --git a/arch/mips/pci/ops-pmcmsp.c b/arch/mips/pci/ops-pmcmsp.c
index dd2d9f7e9412..7649372103af 100644
--- a/arch/mips/pci/ops-pmcmsp.c
+++ b/arch/mips/pci/ops-pmcmsp.c
@@ -83,18 +83,6 @@ static int show_msp_pci_counts(struct seq_file *m, void *v)
return 0;
}
-static int msp_pci_rd_cnt_open(struct inode *inode, struct file *file)
-{
- return single_open(file, show_msp_pci_counts, NULL);
-}
-
-static const struct file_operations msp_pci_rd_cnt_fops = {
- .open = msp_pci_rd_cnt_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
/*****************************************************************************
*
* FUNCTION: gen_pci_cfg_wr_show
@@ -160,18 +148,6 @@ static int gen_pci_cfg_wr_show(struct seq_file *m, void *v)
return 0;
}
-static int gen_pci_cfg_wr_open(struct inode *inode, struct file *file)
-{
- return single_open(file, gen_pci_cfg_wr_show, NULL);
-}
-
-static const struct file_operations gen_pci_cfg_wr_fops = {
- .open = gen_pci_cfg_wr_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
/*****************************************************************************
*
* FUNCTION: pci_proc_init
@@ -188,8 +164,8 @@ static const struct file_operations gen_pci_cfg_wr_fops = {
****************************************************************************/
static void pci_proc_init(void)
{
- proc_create("pmc_msp_pci_rd_cnt", 0, NULL, &msp_pci_rd_cnt_fops);
- proc_create("pmc_msp_pci_cfg_wr", 0, NULL, &gen_pci_cfg_wr_fops);
+ proc_create_single("pmc_msp_pci_rd_cnt", 0, NULL, show_msp_pci_counts);
+ proc_create_single("pmc_msp_pci_cfg_wr", 0, NULL, gen_pci_cfg_wr_show);
}
#endif /* CONFIG_PROC_FS && PCI_COUNTERS */
diff --git a/arch/mips/sibyte/common/bus_watcher.c b/arch/mips/sibyte/common/bus_watcher.c
index a4e55999ecb4..4bb85de9229b 100644
--- a/arch/mips/sibyte/common/bus_watcher.c
+++ b/arch/mips/sibyte/common/bus_watcher.c
@@ -142,24 +142,12 @@ static int bw_proc_show(struct seq_file *m, void *v)
return 0;
}
-static int bw_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, bw_proc_show, PDE_DATA(inode));
-}
-
-static const struct file_operations bw_proc_fops = {
- .open = bw_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
static void create_proc_decoder(struct bw_stats_struct *stats)
{
struct proc_dir_entry *ent;
- ent = proc_create_data("bus_watcher", S_IWUSR | S_IRUGO, NULL,
- &bw_proc_fops, stats);
+ ent = proc_create_single_data("bus_watcher", S_IWUSR | S_IRUGO, NULL,
+ bw_proc_show, stats);
if (!ent) {
printk(KERN_INFO "Unable to initialize bus_watcher /proc entry\n");
return;
diff --git a/arch/nds32/Kconfig b/arch/nds32/Kconfig
index 249f38d3388f..6aed974276d8 100644
--- a/arch/nds32/Kconfig
+++ b/arch/nds32/Kconfig
@@ -5,10 +5,19 @@
config NDS32
def_bool y
+ select ARCH_HAS_SYNC_DMA_FOR_CPU
+ select ARCH_HAS_SYNC_DMA_FOR_DEVICE
select ARCH_WANT_FRAME_POINTERS if FTRACE
select CLKSRC_MMIO
select CLONE_BACKWARDS
select COMMON_CLK
+ select DMA_NONCOHERENT_OPS
+ select GENERIC_ASHLDI3
+ select GENERIC_ASHRDI3
+ select GENERIC_LSHRDI3
+ select GENERIC_CMPDI2
+ select GENERIC_MULDI3
+ select GENERIC_UCMPDI2
select GENERIC_ATOMIC64
select GENERIC_CPU_DEVICES
select GENERIC_CLOCKEVENTS
@@ -82,6 +91,7 @@ endmenu
menu "Kernel Features"
source "kernel/Kconfig.preempt"
+source "kernel/Kconfig.freezer"
source "mm/Kconfig"
source "kernel/Kconfig.hz"
endmenu
diff --git a/arch/nds32/Kconfig.cpu b/arch/nds32/Kconfig.cpu
index ba44cc539da9..b8c8984d1456 100644
--- a/arch/nds32/Kconfig.cpu
+++ b/arch/nds32/Kconfig.cpu
@@ -1,10 +1,11 @@
comment "Processor Features"
config CPU_BIG_ENDIAN
- bool "Big endian"
+ def_bool !CPU_LITTLE_ENDIAN
config CPU_LITTLE_ENDIAN
- def_bool !CPU_BIG_ENDIAN
+ bool "Little endian"
+ default y
config HWZOL
bool "hardware zero overhead loop support"
diff --git a/arch/nds32/Makefile b/arch/nds32/Makefile
index 91f933d5a962..513bb2e9baf9 100644
--- a/arch/nds32/Makefile
+++ b/arch/nds32/Makefile
@@ -23,9 +23,6 @@ export TEXTADDR
# If we have a machine-specific directory, then include it in the build.
core-y += arch/nds32/kernel/ arch/nds32/mm/
libs-y += arch/nds32/lib/
-LIBGCC_PATH := \
- $(shell $(CC) $(KBUILD_CFLAGS) $(KCFLAGS) -print-libgcc-file-name)
-libs-y += $(LIBGCC_PATH)
ifneq '$(CONFIG_NDS32_BUILTIN_DTB)' '""'
BUILTIN_DTB := y
@@ -35,8 +32,12 @@ endif
ifdef CONFIG_CPU_LITTLE_ENDIAN
KBUILD_CFLAGS += $(call cc-option, -EL)
+KBUILD_AFLAGS += $(call cc-option, -EL)
+LDFLAGS += $(call cc-option, -EL)
else
KBUILD_CFLAGS += $(call cc-option, -EB)
+KBUILD_AFLAGS += $(call cc-option, -EB)
+LDFLAGS += $(call cc-option, -EB)
endif
boot := arch/nds32/boot
diff --git a/arch/nds32/include/asm/Kbuild b/arch/nds32/include/asm/Kbuild
index d1b1f89d44f6..dbc4e5422550 100644
--- a/arch/nds32/include/asm/Kbuild
+++ b/arch/nds32/include/asm/Kbuild
@@ -14,9 +14,11 @@ generic-y += cputime.h
generic-y += device.h
generic-y += div64.h
generic-y += dma.h
+generic-y += dma-mapping.h
generic-y += emergency-restart.h
generic-y += errno.h
generic-y += exec.h
+generic-y += export.h
generic-y += fb.h
generic-y += fcntl.h
generic-y += ftrace.h
@@ -50,6 +52,7 @@ generic-y += switch_to.h
generic-y += timex.h
generic-y += topology.h
generic-y += trace_clock.h
+generic-y += xor.h
generic-y += unaligned.h
generic-y += user.h
generic-y += vga.h
diff --git a/arch/nds32/include/asm/bitfield.h b/arch/nds32/include/asm/bitfield.h
index c73f71d67744..8e84fc385b94 100644
--- a/arch/nds32/include/asm/bitfield.h
+++ b/arch/nds32/include/asm/bitfield.h
@@ -336,7 +336,7 @@
#define INT_MASK_mskIDIVZE ( 0x1 << INT_MASK_offIDIVZE )
#define INT_MASK_mskDSSIM ( 0x1 << INT_MASK_offDSSIM )
-#define INT_MASK_INITAIAL_VAL 0x10003
+#define INT_MASK_INITAIAL_VAL (INT_MASK_mskDSSIM|INT_MASK_mskIDIVZE)
/******************************************************************************
* ir15: INT_PEND (Interrupt Pending Register)
@@ -396,6 +396,7 @@
#define MMU_CTL_D8KB 1
#define MMU_CTL_UNA ( 0x1 << MMU_CTL_offUNA )
+#define MMU_CTL_CACHEABLE_NON 0
#define MMU_CTL_CACHEABLE_WB 2
#define MMU_CTL_CACHEABLE_WT 3
diff --git a/arch/nds32/include/asm/cacheflush.h b/arch/nds32/include/asm/cacheflush.h
index 1240f148ec0f..10b48f0d8e85 100644
--- a/arch/nds32/include/asm/cacheflush.h
+++ b/arch/nds32/include/asm/cacheflush.h
@@ -32,6 +32,8 @@ void flush_anon_page(struct vm_area_struct *vma,
#define ARCH_HAS_FLUSH_KERNEL_DCACHE_PAGE
void flush_kernel_dcache_page(struct page *page);
+void flush_kernel_vmap_range(void *addr, int size);
+void invalidate_kernel_vmap_range(void *addr, int size);
void flush_icache_range(unsigned long start, unsigned long end);
void flush_icache_page(struct vm_area_struct *vma, struct page *page);
#define flush_dcache_mmap_lock(mapping) xa_lock_irq(&(mapping)->i_pages)
diff --git a/arch/nds32/include/asm/dma-mapping.h b/arch/nds32/include/asm/dma-mapping.h
deleted file mode 100644
index 2dd47d245c25..000000000000
--- a/arch/nds32/include/asm/dma-mapping.h
+++ /dev/null
@@ -1,14 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (C) 2005-2017 Andes Technology Corporation
-
-#ifndef ASMNDS32_DMA_MAPPING_H
-#define ASMNDS32_DMA_MAPPING_H
-
-extern struct dma_map_ops nds32_dma_ops;
-
-static inline struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus)
-{
- return &nds32_dma_ops;
-}
-
-#endif
diff --git a/arch/nds32/include/asm/io.h b/arch/nds32/include/asm/io.h
index 966e71b3c960..71cd226d6863 100644
--- a/arch/nds32/include/asm/io.h
+++ b/arch/nds32/include/asm/io.h
@@ -4,6 +4,8 @@
#ifndef __ASM_NDS32_IO_H
#define __ASM_NDS32_IO_H
+#include <linux/types.h>
+
extern void iounmap(volatile void __iomem *addr);
#define __raw_writeb __raw_writeb
static inline void __raw_writeb(u8 val, volatile void __iomem *addr)
diff --git a/arch/nds32/include/asm/page.h b/arch/nds32/include/asm/page.h
index e27365c097b6..947f0491c9a7 100644
--- a/arch/nds32/include/asm/page.h
+++ b/arch/nds32/include/asm/page.h
@@ -27,6 +27,9 @@ extern void copy_user_highpage(struct page *to, struct page *from,
unsigned long vaddr, struct vm_area_struct *vma);
extern void clear_user_highpage(struct page *page, unsigned long vaddr);
+void copy_user_page(void *vto, void *vfrom, unsigned long vaddr,
+ struct page *to);
+void clear_user_page(void *addr, unsigned long vaddr, struct page *page);
#define __HAVE_ARCH_COPY_USER_HIGHPAGE
#define clear_user_highpage clear_user_highpage
#else
diff --git a/arch/nds32/include/asm/pgtable.h b/arch/nds32/include/asm/pgtable.h
index 6783937edbeb..d3e19a55cf53 100644
--- a/arch/nds32/include/asm/pgtable.h
+++ b/arch/nds32/include/asm/pgtable.h
@@ -152,6 +152,7 @@ extern void __pgd_error(const char *file, int line, unsigned long val);
#define PAGE_CACHE_L1 __pgprot(_HAVE_PAGE_L | _PAGE_V | _PAGE_M_KRW | _PAGE_D | _PAGE_E | _PAGE_G | _PAGE_CACHE)
#define PAGE_MEMORY __pgprot(_HAVE_PAGE_L | _PAGE_V | _PAGE_M_KRW | _PAGE_D | _PAGE_E | _PAGE_G | _PAGE_CACHE_SHRD)
#define PAGE_KERNEL __pgprot(_PAGE_V | _PAGE_M_KRW | _PAGE_D | _PAGE_E | _PAGE_G | _PAGE_CACHE_SHRD)
+#define PAGE_SHARED __pgprot(_PAGE_V | _PAGE_M_URW_KRW | _PAGE_D | _PAGE_CACHE_SHRD)
#define PAGE_DEVICE __pgprot(_PAGE_V | _PAGE_M_KRW | _PAGE_D | _PAGE_G | _PAGE_C_DEV)
#endif /* __ASSEMBLY__ */
diff --git a/arch/nds32/kernel/dma.c b/arch/nds32/kernel/dma.c
index d291800fc621..d0dbd4fe9645 100644
--- a/arch/nds32/kernel/dma.c
+++ b/arch/nds32/kernel/dma.c
@@ -3,17 +3,14 @@
#include <linux/types.h>
#include <linux/mm.h>
-#include <linux/export.h>
#include <linux/string.h>
-#include <linux/scatterlist.h>
-#include <linux/dma-mapping.h>
+#include <linux/dma-noncoherent.h>
#include <linux/io.h>
#include <linux/cache.h>
#include <linux/highmem.h>
#include <linux/slab.h>
#include <asm/cacheflush.h>
#include <asm/tlbflush.h>
-#include <asm/dma-mapping.h>
#include <asm/proc-fns.h>
/*
@@ -22,11 +19,6 @@
static pte_t *consistent_pte;
static DEFINE_RAW_SPINLOCK(consistent_lock);
-enum master_type {
- FOR_CPU = 0,
- FOR_DEVICE = 1,
-};
-
/*
* VM region handling support.
*
@@ -124,10 +116,8 @@ out:
return c;
}
-/* FIXME: attrs is not used. */
-static void *nds32_dma_alloc_coherent(struct device *dev, size_t size,
- dma_addr_t * handle, gfp_t gfp,
- unsigned long attrs)
+void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
+ gfp_t gfp, unsigned long attrs)
{
struct page *page;
struct arch_vm_region *c;
@@ -232,8 +222,8 @@ no_page:
return NULL;
}
-static void nds32_dma_free(struct device *dev, size_t size, void *cpu_addr,
- dma_addr_t handle, unsigned long attrs)
+void arch_dma_free(struct device *dev, size_t size, void *cpu_addr,
+ dma_addr_t handle, unsigned long attrs)
{
struct arch_vm_region *c;
unsigned long flags, addr;
@@ -333,145 +323,69 @@ static int __init consistent_init(void)
}
core_initcall(consistent_init);
-static void consistent_sync(void *vaddr, size_t size, int direction, int master_type);
-static dma_addr_t nds32_dma_map_page(struct device *dev, struct page *page,
- unsigned long offset, size_t size,
- enum dma_data_direction dir,
- unsigned long attrs)
-{
- if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
- consistent_sync((void *)(page_address(page) + offset), size, dir, FOR_DEVICE);
- return page_to_phys(page) + offset;
-}
-
-static void nds32_dma_unmap_page(struct device *dev, dma_addr_t handle,
- size_t size, enum dma_data_direction dir,
- unsigned long attrs)
-{
- if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
- consistent_sync(phys_to_virt(handle), size, dir, FOR_CPU);
-}
-
-/*
- * Make an area consistent for devices.
- */
-static void consistent_sync(void *vaddr, size_t size, int direction, int master_type)
-{
- unsigned long start = (unsigned long)vaddr;
- unsigned long end = start + size;
-
- if (master_type == FOR_CPU) {
- switch (direction) {
- case DMA_TO_DEVICE:
- break;
- case DMA_FROM_DEVICE:
- case DMA_BIDIRECTIONAL:
- cpu_dma_inval_range(start, end);
- break;
- default:
- BUG();
- }
- } else {
- /* FOR_DEVICE */
- switch (direction) {
- case DMA_FROM_DEVICE:
- break;
- case DMA_TO_DEVICE:
- case DMA_BIDIRECTIONAL:
- cpu_dma_wb_range(start, end);
- break;
- default:
- BUG();
- }
- }
-}
-static int nds32_dma_map_sg(struct device *dev, struct scatterlist *sg,
- int nents, enum dma_data_direction dir,
- unsigned long attrs)
+static inline void cache_op(phys_addr_t paddr, size_t size,
+ void (*fn)(unsigned long start, unsigned long end))
{
- int i;
+ struct page *page = pfn_to_page(paddr >> PAGE_SHIFT);
+ unsigned offset = paddr & ~PAGE_MASK;
+ size_t left = size;
+ unsigned long start;
- for (i = 0; i < nents; i++, sg++) {
- void *virt;
- unsigned long pfn;
- struct page *page = sg_page(sg);
+ do {
+ size_t len = left;
- sg->dma_address = sg_phys(sg);
- pfn = page_to_pfn(page) + sg->offset / PAGE_SIZE;
- page = pfn_to_page(pfn);
if (PageHighMem(page)) {
- virt = kmap_atomic(page);
- consistent_sync(virt, sg->length, dir, FOR_CPU);
- kunmap_atomic(virt);
+ void *addr;
+
+ if (offset + len > PAGE_SIZE) {
+ if (offset >= PAGE_SIZE) {
+ page += offset >> PAGE_SHIFT;
+ offset &= ~PAGE_MASK;
+ }
+ len = PAGE_SIZE - offset;
+ }
+
+ addr = kmap_atomic(page);
+ start = (unsigned long)(addr + offset);
+ fn(start, start + len);
+ kunmap_atomic(addr);
} else {
- if (sg->offset > PAGE_SIZE)
- panic("sg->offset:%08x > PAGE_SIZE\n",
- sg->offset);
- virt = page_address(page) + sg->offset;
- consistent_sync(virt, sg->length, dir, FOR_CPU);
+ start = (unsigned long)phys_to_virt(paddr);
+ fn(start, start + size);
}
- }
- return nents;
+ offset = 0;
+ page++;
+ left -= len;
+ } while (left);
}
-static void nds32_dma_unmap_sg(struct device *dev, struct scatterlist *sg,
- int nhwentries, enum dma_data_direction dir,
- unsigned long attrs)
+void arch_sync_dma_for_device(struct device *dev, phys_addr_t paddr,
+ size_t size, enum dma_data_direction dir)
{
-}
-
-static void
-nds32_dma_sync_single_for_cpu(struct device *dev, dma_addr_t handle,
- size_t size, enum dma_data_direction dir)
-{
- consistent_sync((void *)phys_to_virt(handle), size, dir, FOR_CPU);
-}
-
-static void
-nds32_dma_sync_single_for_device(struct device *dev, dma_addr_t handle,
- size_t size, enum dma_data_direction dir)
-{
- consistent_sync((void *)phys_to_virt(handle), size, dir, FOR_DEVICE);
-}
-
-static void
-nds32_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nents,
- enum dma_data_direction dir)
-{
- int i;
-
- for (i = 0; i < nents; i++, sg++) {
- char *virt =
- page_address((struct page *)sg->page_link) + sg->offset;
- consistent_sync(virt, sg->length, dir, FOR_CPU);
+ switch (dir) {
+ case DMA_FROM_DEVICE:
+ break;
+ case DMA_TO_DEVICE:
+ case DMA_BIDIRECTIONAL:
+ cache_op(paddr, size, cpu_dma_wb_range);
+ break;
+ default:
+ BUG();
}
}
-static void
-nds32_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
- int nents, enum dma_data_direction dir)
+void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr,
+ size_t size, enum dma_data_direction dir)
{
- int i;
-
- for (i = 0; i < nents; i++, sg++) {
- char *virt =
- page_address((struct page *)sg->page_link) + sg->offset;
- consistent_sync(virt, sg->length, dir, FOR_DEVICE);
+ switch (dir) {
+ case DMA_TO_DEVICE:
+ break;
+ case DMA_FROM_DEVICE:
+ case DMA_BIDIRECTIONAL:
+ cache_op(paddr, size, cpu_dma_inval_range);
+ break;
+ default:
+ BUG();
}
}
-
-struct dma_map_ops nds32_dma_ops = {
- .alloc = nds32_dma_alloc_coherent,
- .free = nds32_dma_free,
- .map_page = nds32_dma_map_page,
- .unmap_page = nds32_dma_unmap_page,
- .map_sg = nds32_dma_map_sg,
- .unmap_sg = nds32_dma_unmap_sg,
- .sync_single_for_device = nds32_dma_sync_single_for_device,
- .sync_single_for_cpu = nds32_dma_sync_single_for_cpu,
- .sync_sg_for_cpu = nds32_dma_sync_sg_for_cpu,
- .sync_sg_for_device = nds32_dma_sync_sg_for_device,
-};
-
-EXPORT_SYMBOL(nds32_dma_ops);
diff --git a/arch/nds32/kernel/ex-entry.S b/arch/nds32/kernel/ex-entry.S
index a72e83d804f5..b8ae4e9a6b93 100644
--- a/arch/nds32/kernel/ex-entry.S
+++ b/arch/nds32/kernel/ex-entry.S
@@ -118,7 +118,7 @@ common_exception_handler:
/* interrupt */
2:
#ifdef CONFIG_TRACE_IRQFLAGS
- jal arch_trace_hardirqs_off
+ jal trace_hardirqs_off
#endif
move $r0, $sp
sethi $lp, hi20(ret_from_intr)
diff --git a/arch/nds32/kernel/head.S b/arch/nds32/kernel/head.S
index 71f57bd70f3b..c5fdae174ced 100644
--- a/arch/nds32/kernel/head.S
+++ b/arch/nds32/kernel/head.S
@@ -57,14 +57,32 @@ _nodtb:
isb
mtsr $r4, $L1_PPTB ! load page table pointer\n"
-/* set NTC0 cacheable/writeback, mutliple page size in use */
+#ifdef CONFIG_CPU_DCACHE_DISABLE
+ #define MMU_CTL_NTCC MMU_CTL_CACHEABLE_NON
+#else
+ #ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
+ #define MMU_CTL_NTCC MMU_CTL_CACHEABLE_WT
+ #else
+ #define MMU_CTL_NTCC MMU_CTL_CACHEABLE_WB
+ #endif
+#endif
+
+/* set NTC cacheability, mutliple page size in use */
mfsr $r3, $MMU_CTL
- li $r0, #~MMU_CTL_mskNTC0
- and $r3, $r3, $r0
+#if CONFIG_MEMORY_START >= 0xc0000000
+ ori $r3, $r3, (MMU_CTL_NTCC << MMU_CTL_offNTC3)
+#elif CONFIG_MEMORY_START >= 0x80000000
+ ori $r3, $r3, (MMU_CTL_NTCC << MMU_CTL_offNTC2)
+#elif CONFIG_MEMORY_START >= 0x40000000
+ ori $r3, $r3, (MMU_CTL_NTCC << MMU_CTL_offNTC1)
+#else
+ ori $r3, $r3, (MMU_CTL_NTCC << MMU_CTL_offNTC0)
+#endif
+
#ifdef CONFIG_ANDES_PAGE_SIZE_4KB
- ori $r3, $r3, #(MMU_CTL_mskMPZIU|(MMU_CTL_CACHEABLE_WB << MMU_CTL_offNTC0))
+ ori $r3, $r3, #(MMU_CTL_mskMPZIU)
#else
- ori $r3, $r3, #(MMU_CTL_mskMPZIU|(MMU_CTL_CACHEABLE_WB << MMU_CTL_offNTC0)|MMU_CTL_D8KB)
+ ori $r3, $r3, #(MMU_CTL_mskMPZIU|MMU_CTL_D8KB)
#endif
#ifdef CONFIG_HW_SUPPORT_UNALIGNMENT_ACCESS
li $r0, #MMU_CTL_UNA
diff --git a/arch/nds32/kernel/setup.c b/arch/nds32/kernel/setup.c
index ba910e9e4ecb..2f5b2ccebe47 100644
--- a/arch/nds32/kernel/setup.c
+++ b/arch/nds32/kernel/setup.c
@@ -293,6 +293,9 @@ void __init setup_arch(char **cmdline_p)
/* paging_init() sets up the MMU and marks all pages as reserved */
paging_init();
+ /* invalidate all TLB entries because the new mapping is created */
+ __nds32__tlbop_flua();
+
/* use generic way to parse */
parse_early_param();
diff --git a/arch/nds32/kernel/stacktrace.c b/arch/nds32/kernel/stacktrace.c
index bc70113c0e84..8b231e910ea6 100644
--- a/arch/nds32/kernel/stacktrace.c
+++ b/arch/nds32/kernel/stacktrace.c
@@ -9,6 +9,7 @@ void save_stack_trace(struct stack_trace *trace)
{
save_stack_trace_tsk(current, trace);
}
+EXPORT_SYMBOL_GPL(save_stack_trace);
void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
{
@@ -45,3 +46,4 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
fpn = (unsigned long *)fpp;
}
}
+EXPORT_SYMBOL_GPL(save_stack_trace_tsk);
diff --git a/arch/nds32/kernel/traps.c b/arch/nds32/kernel/traps.c
index 6e34eb9824a4..a6205fd4db52 100644
--- a/arch/nds32/kernel/traps.c
+++ b/arch/nds32/kernel/traps.c
@@ -222,19 +222,13 @@ void die_if_kernel(const char *str, struct pt_regs *regs, int err)
int bad_syscall(int n, struct pt_regs *regs)
{
- siginfo_t info;
-
if (current->personality != PER_LINUX) {
send_sig(SIGSEGV, current, 1);
return regs->uregs[0];
}
- info.si_signo = SIGILL;
- info.si_errno = 0;
- info.si_code = ILL_ILLTRP;
- info.si_addr = (void __user *)instruction_pointer(regs) - 4;
-
- force_sig_info(SIGILL, &info, current);
+ force_sig_fault(SIGILL, ILL_ILLTRP,
+ (void __user *)instruction_pointer(regs) - 4, current);
die_if_kernel("Oops - bad syscall", regs, n);
return regs->uregs[0];
}
@@ -287,16 +281,11 @@ void __init early_trap_init(void)
void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs,
int error_code, int si_code)
{
- struct siginfo info;
-
tsk->thread.trap_no = ENTRY_DEBUG_RELATED;
tsk->thread.error_code = error_code;
- memset(&info, 0, sizeof(info));
- info.si_signo = SIGTRAP;
- info.si_code = si_code;
- info.si_addr = (void __user *)instruction_pointer(regs);
- force_sig_info(SIGTRAP, &info, tsk);
+ force_sig_fault(SIGTRAP, si_code,
+ (void __user *)instruction_pointer(regs), tsk);
}
void do_debug_trap(unsigned long entry, unsigned long addr,
@@ -318,29 +307,22 @@ void do_debug_trap(unsigned long entry, unsigned long addr,
void unhandled_interruption(struct pt_regs *regs)
{
- siginfo_t si;
pr_emerg("unhandled_interruption\n");
show_regs(regs);
if (!user_mode(regs))
do_exit(SIGKILL);
- si.si_signo = SIGKILL;
- si.si_errno = 0;
- force_sig_info(SIGKILL, &si, current);
+ force_sig(SIGKILL, current);
}
void unhandled_exceptions(unsigned long entry, unsigned long addr,
unsigned long type, struct pt_regs *regs)
{
- siginfo_t si;
pr_emerg("Unhandled Exception: entry: %lx addr:%lx itype:%lx\n", entry,
addr, type);
show_regs(regs);
if (!user_mode(regs))
do_exit(SIGKILL);
- si.si_signo = SIGKILL;
- si.si_errno = 0;
- si.si_addr = (void *)addr;
- force_sig_info(SIGKILL, &si, current);
+ force_sig(SIGKILL, current);
}
extern int do_page_fault(unsigned long entry, unsigned long addr,
@@ -363,14 +345,11 @@ void do_dispatch_tlb_misc(unsigned long entry, unsigned long addr,
void do_revinsn(struct pt_regs *regs)
{
- siginfo_t si;
pr_emerg("Reserved Instruction\n");
show_regs(regs);
if (!user_mode(regs))
do_exit(SIGILL);
- si.si_signo = SIGILL;
- si.si_errno = 0;
- force_sig_info(SIGILL, &si, current);
+ force_sig(SIGILL, current);
}
#ifdef CONFIG_ALIGNMENT_TRAP
diff --git a/arch/nds32/kernel/vdso.c b/arch/nds32/kernel/vdso.c
index f1198d7a5654..016f15891f6d 100644
--- a/arch/nds32/kernel/vdso.c
+++ b/arch/nds32/kernel/vdso.c
@@ -23,7 +23,7 @@
#include <asm/vdso_timer_info.h>
#include <asm/cache_info.h>
extern struct cache_info L1_cache_info[2];
-extern char vdso_start, vdso_end;
+extern char vdso_start[], vdso_end[];
static unsigned long vdso_pages __ro_after_init;
static unsigned long timer_mapping_base;
@@ -66,16 +66,16 @@ static int __init vdso_init(void)
int i;
struct page **vdso_pagelist;
- if (memcmp(&vdso_start, "\177ELF", 4)) {
+ if (memcmp(vdso_start, "\177ELF", 4)) {
pr_err("vDSO is not a valid ELF object!\n");
return -EINVAL;
}
/* Creat a timer io mapping to get clock cycles counter */
get_timer_node_info();
- vdso_pages = (&vdso_end - &vdso_start) >> PAGE_SHIFT;
+ vdso_pages = (vdso_end - vdso_start) >> PAGE_SHIFT;
pr_info("vdso: %ld pages (%ld code @ %p, %ld data @ %p)\n",
- vdso_pages + 1, vdso_pages, &vdso_start, 1L, vdso_data);
+ vdso_pages + 1, vdso_pages, vdso_start, 1L, vdso_data);
/* Allocate the vDSO pagelist */
vdso_pagelist = kcalloc(vdso_pages, sizeof(struct page *), GFP_KERNEL);
@@ -83,7 +83,7 @@ static int __init vdso_init(void)
return -ENOMEM;
for (i = 0; i < vdso_pages; i++)
- vdso_pagelist[i] = virt_to_page(&vdso_start + i * PAGE_SIZE);
+ vdso_pagelist[i] = virt_to_page(vdso_start + i * PAGE_SIZE);
vdso_spec[1].pages = &vdso_pagelist[0];
return 0;
diff --git a/arch/nds32/lib/copy_page.S b/arch/nds32/lib/copy_page.S
index 4a2ff85f17ee..f8701ed161a8 100644
--- a/arch/nds32/lib/copy_page.S
+++ b/arch/nds32/lib/copy_page.S
@@ -2,6 +2,7 @@
// Copyright (C) 2005-2017 Andes Technology Corporation
#include <linux/linkage.h>
+#include <asm/export.h>
#include <asm/page.h>
.text
@@ -16,6 +17,7 @@ ENTRY(copy_page)
popm $r2, $r10
ret
ENDPROC(copy_page)
+EXPORT_SYMBOL(copy_page)
ENTRY(clear_page)
pushm $r1, $r9
@@ -35,3 +37,4 @@ ENTRY(clear_page)
popm $r1, $r9
ret
ENDPROC(clear_page)
+EXPORT_SYMBOL(clear_page)
diff --git a/arch/nds32/mm/alignment.c b/arch/nds32/mm/alignment.c
index b96a01b10ca7..e1aed9dc692d 100644
--- a/arch/nds32/mm/alignment.c
+++ b/arch/nds32/mm/alignment.c
@@ -19,7 +19,7 @@
#define RA(inst) (((inst) >> 15) & 0x1FUL)
#define RB(inst) (((inst) >> 10) & 0x1FUL)
#define SV(inst) (((inst) >> 8) & 0x3UL)
-#define IMM(inst) (((inst) >> 0) & 0x3FFFUL)
+#define IMM(inst) (((inst) >> 0) & 0x7FFFUL)
#define RA3(inst) (((inst) >> 3) & 0x7UL)
#define RT3(inst) (((inst) >> 6) & 0x7UL)
@@ -28,6 +28,9 @@
#define RA5(inst) (((inst) >> 0) & 0x1FUL)
#define RT4(inst) (((inst) >> 5) & 0xFUL)
+#define GET_IMMSVAL(imm_value) \
+ (((imm_value >> 14) & 0x1) ? (imm_value - 0x8000) : imm_value)
+
#define __get8_data(val,addr,err) \
__asm__( \
"1: lbi.bi %1, [%2], #1\n" \
@@ -467,7 +470,7 @@ static inline int do_32(unsigned long inst, struct pt_regs *regs)
}
if (imm)
- shift = IMM(inst) * len;
+ shift = GET_IMMSVAL(IMM(inst)) * len;
else
shift = *idx_to_addr(regs, RB(inst)) << SV(inst);
@@ -552,7 +555,7 @@ static struct ctl_table alignment_tbl[3] = {
static struct ctl_table nds32_sysctl_table[2] = {
{
- .procname = "unaligned_acess",
+ .procname = "unaligned_access",
.mode = 0555,
.child = alignment_tbl},
{}
diff --git a/arch/nds32/mm/cacheflush.c b/arch/nds32/mm/cacheflush.c
index 6eb786a399a2..ce8fd34497bf 100644
--- a/arch/nds32/mm/cacheflush.c
+++ b/arch/nds32/mm/cacheflush.c
@@ -147,6 +147,25 @@ void flush_cache_vunmap(unsigned long start, unsigned long end)
cpu_icache_inval_all();
}
+void copy_user_page(void *vto, void *vfrom, unsigned long vaddr,
+ struct page *to)
+{
+ cpu_dcache_wbinval_page((unsigned long)vaddr);
+ cpu_icache_inval_page((unsigned long)vaddr);
+ copy_page(vto, vfrom);
+ cpu_dcache_wbinval_page((unsigned long)vto);
+ cpu_icache_inval_page((unsigned long)vto);
+}
+
+void clear_user_page(void *addr, unsigned long vaddr, struct page *page)
+{
+ cpu_dcache_wbinval_page((unsigned long)vaddr);
+ cpu_icache_inval_page((unsigned long)vaddr);
+ clear_page(addr);
+ cpu_dcache_wbinval_page((unsigned long)addr);
+ cpu_icache_inval_page((unsigned long)addr);
+}
+
void copy_user_highpage(struct page *to, struct page *from,
unsigned long vaddr, struct vm_area_struct *vma)
{
@@ -156,11 +175,9 @@ void copy_user_highpage(struct page *to, struct page *from,
pto = page_to_phys(to);
pfrom = page_to_phys(from);
+ local_irq_save(flags);
if (aliasing(vaddr, (unsigned long)kfrom))
cpu_dcache_wb_page((unsigned long)kfrom);
- if (aliasing(vaddr, (unsigned long)kto))
- cpu_dcache_inval_page((unsigned long)kto);
- local_irq_save(flags);
vto = kremap0(vaddr, pto);
vfrom = kremap1(vaddr, pfrom);
copy_page((void *)vto, (void *)vfrom);
@@ -198,21 +215,25 @@ void flush_dcache_page(struct page *page)
if (mapping && !mapping_mapped(mapping))
set_bit(PG_dcache_dirty, &page->flags);
else {
- int i, pc;
- unsigned long vto, kaddr, flags;
+ unsigned long kaddr, flags;
+
kaddr = (unsigned long)page_address(page);
- cpu_dcache_wbinval_page(kaddr);
- pc = CACHE_SET(DCACHE) * CACHE_LINE_SIZE(DCACHE) / PAGE_SIZE;
local_irq_save(flags);
- for (i = 0; i < pc; i++) {
- vto =
- kremap0(kaddr + i * PAGE_SIZE, page_to_phys(page));
- cpu_dcache_wbinval_page(vto);
- kunmap01(vto);
+ cpu_dcache_wbinval_page(kaddr);
+ if (mapping) {
+ unsigned long vaddr, kto;
+
+ vaddr = page->index << PAGE_SHIFT;
+ if (aliasing(vaddr, kaddr)) {
+ kto = kremap0(vaddr, page_to_phys(page));
+ cpu_dcache_wbinval_page(kto);
+ kunmap01(kto);
+ }
}
local_irq_restore(flags);
}
}
+EXPORT_SYMBOL(flush_dcache_page);
void copy_to_user_page(struct vm_area_struct *vma, struct page *page,
unsigned long vaddr, void *dst, void *src, int len)
@@ -251,7 +272,7 @@ void copy_from_user_page(struct vm_area_struct *vma, struct page *page,
void flush_anon_page(struct vm_area_struct *vma,
struct page *page, unsigned long vaddr)
{
- unsigned long flags;
+ unsigned long kaddr, flags, ktmp;
if (!PageAnon(page))
return;
@@ -261,7 +282,12 @@ void flush_anon_page(struct vm_area_struct *vma,
local_irq_save(flags);
if (vma->vm_flags & VM_EXEC)
cpu_icache_inval_page(vaddr & PAGE_MASK);
- cpu_dcache_wbinval_page((unsigned long)page_address(page));
+ kaddr = (unsigned long)page_address(page);
+ if (aliasing(vaddr, kaddr)) {
+ ktmp = kremap0(vaddr, page_to_phys(page));
+ cpu_dcache_wbinval_page(ktmp);
+ kunmap01(ktmp);
+ }
local_irq_restore(flags);
}
@@ -272,6 +298,25 @@ void flush_kernel_dcache_page(struct page *page)
cpu_dcache_wbinval_page((unsigned long)page_address(page));
local_irq_restore(flags);
}
+EXPORT_SYMBOL(flush_kernel_dcache_page);
+
+void flush_kernel_vmap_range(void *addr, int size)
+{
+ unsigned long flags;
+ local_irq_save(flags);
+ cpu_dcache_wb_range((unsigned long)addr, (unsigned long)addr + size);
+ local_irq_restore(flags);
+}
+EXPORT_SYMBOL(flush_kernel_vmap_range);
+
+void invalidate_kernel_vmap_range(void *addr, int size)
+{
+ unsigned long flags;
+ local_irq_save(flags);
+ cpu_dcache_inval_range((unsigned long)addr, (unsigned long)addr + size);
+ local_irq_restore(flags);
+}
+EXPORT_SYMBOL(invalidate_kernel_vmap_range);
void flush_icache_range(unsigned long start, unsigned long end)
{
@@ -283,6 +328,7 @@ void flush_icache_range(unsigned long start, unsigned long end)
cpu_cache_wbinval_range(start, end, 1);
local_irq_restore(flags);
}
+EXPORT_SYMBOL(flush_icache_range);
void flush_icache_page(struct vm_area_struct *vma, struct page *page)
{
diff --git a/arch/nds32/mm/fault.c b/arch/nds32/mm/fault.c
index 3a246fb8098c..9bdb7c3ecbb6 100644
--- a/arch/nds32/mm/fault.c
+++ b/arch/nds32/mm/fault.c
@@ -72,7 +72,7 @@ void do_page_fault(unsigned long entry, unsigned long addr,
struct task_struct *tsk;
struct mm_struct *mm;
struct vm_area_struct *vma;
- siginfo_t info;
+ int si_code;
int fault;
unsigned int mask = VM_READ | VM_WRITE | VM_EXEC;
unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
@@ -80,7 +80,7 @@ void do_page_fault(unsigned long entry, unsigned long addr,
error_code = error_code & (ITYPE_mskINST | ITYPE_mskETYPE);
tsk = current;
mm = tsk->mm;
- info.si_code = SEGV_MAPERR;
+ si_code = SEGV_MAPERR;
/*
* We fault-in kernel-space virtual memory on-demand. The
* 'reference' page table is init_mm.pgd.
@@ -161,7 +161,7 @@ retry:
*/
good_area:
- info.si_code = SEGV_ACCERR;
+ si_code = SEGV_ACCERR;
/* first do some preliminary protection checks */
if (entry == ENTRY_PTE_NOT_PRESENT) {
@@ -266,11 +266,7 @@ bad_area_nosemaphore:
tsk->thread.address = addr;
tsk->thread.error_code = error_code;
tsk->thread.trap_no = entry;
- info.si_signo = SIGSEGV;
- info.si_errno = 0;
- /* info.si_code has been set above */
- info.si_addr = (void *)addr;
- force_sig_info(SIGSEGV, &info, tsk);
+ force_sig_fault(SIGSEGV, si_code, (void __user *)addr, tsk);
return;
}
@@ -339,11 +335,7 @@ do_sigbus:
tsk->thread.address = addr;
tsk->thread.error_code = error_code;
tsk->thread.trap_no = entry;
- info.si_signo = SIGBUS;
- info.si_errno = 0;
- info.si_code = BUS_ADRERR;
- info.si_addr = (void *)addr;
- force_sig_info(SIGBUS, &info, tsk);
+ force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)addr, tsk);
return;
diff --git a/arch/nds32/mm/init.c b/arch/nds32/mm/init.c
index 93ee0160720b..c713d2ad55dc 100644
--- a/arch/nds32/mm/init.c
+++ b/arch/nds32/mm/init.c
@@ -30,6 +30,7 @@ extern unsigned long phys_initrd_size;
* zero-initialized data and COW.
*/
struct page *empty_zero_page;
+EXPORT_SYMBOL(empty_zero_page);
static void __init zone_sizes_init(void)
{
diff --git a/arch/nios2/kernel/traps.c b/arch/nios2/kernel/traps.c
index 8184e7d6b385..3bc3cd22b750 100644
--- a/arch/nios2/kernel/traps.c
+++ b/arch/nios2/kernel/traps.c
@@ -26,13 +26,7 @@ static DEFINE_SPINLOCK(die_lock);
static void _send_sig(int signo, int code, unsigned long addr)
{
- siginfo_t info;
-
- info.si_signo = signo;
- info.si_errno = 0;
- info.si_code = code;
- info.si_addr = (void __user *) addr;
- force_sig_info(signo, &info, current);
+ force_sig_fault(signo, code, (void __user *) addr, current);
}
void die(const char *str, struct pt_regs *regs, long err)
diff --git a/arch/openrisc/kernel/dma.c b/arch/openrisc/kernel/dma.c
index a945f00011b4..ec7fd45704d2 100644
--- a/arch/openrisc/kernel/dma.c
+++ b/arch/openrisc/kernel/dma.c
@@ -247,14 +247,3 @@ const struct dma_map_ops or1k_dma_map_ops = {
.sync_single_for_device = or1k_sync_single_for_device,
};
EXPORT_SYMBOL(or1k_dma_map_ops);
-
-/* Number of entries preallocated for DMA-API debugging */
-#define PREALLOC_DMA_DEBUG_ENTRIES (1 << 16)
-
-static int __init dma_init(void)
-{
- dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES);
-
- return 0;
-}
-fs_initcall(dma_init);
diff --git a/arch/openrisc/kernel/traps.c b/arch/openrisc/kernel/traps.c
index 113c175fe469..fac246e6f37a 100644
--- a/arch/openrisc/kernel/traps.c
+++ b/arch/openrisc/kernel/traps.c
@@ -250,27 +250,16 @@ void __init trap_init(void)
asmlinkage void do_trap(struct pt_regs *regs, unsigned long address)
{
- siginfo_t info;
- memset(&info, 0, sizeof(info));
- info.si_signo = SIGTRAP;
- info.si_code = TRAP_TRACE;
- info.si_addr = (void *)address;
- force_sig_info(SIGTRAP, &info, current);
+ force_sig_fault(SIGTRAP, TRAP_TRACE, (void __user *)address, current);
regs->pc += 4;
}
asmlinkage void do_unaligned_access(struct pt_regs *regs, unsigned long address)
{
- siginfo_t info;
-
if (user_mode(regs)) {
/* Send a SIGBUS */
- info.si_signo = SIGBUS;
- info.si_errno = 0;
- info.si_code = BUS_ADRALN;
- info.si_addr = (void __user *)address;
- force_sig_info(SIGBUS, &info, current);
+ force_sig_fault(SIGBUS, BUS_ADRALN, (void __user *)address, current);
} else {
printk("KERNEL: Unaligned Access 0x%.8lx\n", address);
show_registers(regs);
@@ -281,15 +270,9 @@ asmlinkage void do_unaligned_access(struct pt_regs *regs, unsigned long address)
asmlinkage void do_bus_fault(struct pt_regs *regs, unsigned long address)
{
- siginfo_t info;
-
if (user_mode(regs)) {
/* Send a SIGBUS */
- info.si_signo = SIGBUS;
- info.si_errno = 0;
- info.si_code = BUS_ADRERR;
- info.si_addr = (void *)address;
- force_sig_info(SIGBUS, &info, current);
+ force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address, current);
} else { /* Kernel mode */
printk("KERNEL: Bus error (SIGBUS) 0x%.8lx\n", address);
show_registers(regs);
@@ -464,7 +447,6 @@ static inline void simulate_swa(struct pt_regs *regs, unsigned long address,
asmlinkage void do_illegal_instruction(struct pt_regs *regs,
unsigned long address)
{
- siginfo_t info;
unsigned int op;
unsigned int insn = *((unsigned int *)address);
@@ -485,11 +467,7 @@ asmlinkage void do_illegal_instruction(struct pt_regs *regs,
if (user_mode(regs)) {
/* Send a SIGILL */
- info.si_signo = SIGILL;
- info.si_errno = 0;
- info.si_code = ILL_ILLOPC;
- info.si_addr = (void *)address;
- force_sig_info(SIGBUS, &info, current);
+ force_sig_fault(SIGILL, ILL_ILLOPC, (void __user *)address, current);
} else { /* Kernel mode */
printk("KERNEL: Illegal instruction (SIGILL) 0x%.8lx\n",
address);
diff --git a/arch/openrisc/mm/fault.c b/arch/openrisc/mm/fault.c
index d0021dfae20a..9f011d16cc46 100644
--- a/arch/openrisc/mm/fault.c
+++ b/arch/openrisc/mm/fault.c
@@ -52,7 +52,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long address,
struct task_struct *tsk;
struct mm_struct *mm;
struct vm_area_struct *vma;
- siginfo_t info;
+ int si_code;
int fault;
unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
@@ -97,7 +97,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long address,
}
mm = tsk->mm;
- info.si_code = SEGV_MAPERR;
+ si_code = SEGV_MAPERR;
/*
* If we're in an interrupt or have no user
@@ -139,7 +139,7 @@ retry:
*/
good_area:
- info.si_code = SEGV_ACCERR;
+ si_code = SEGV_ACCERR;
/* first do some preliminary protection checks */
@@ -213,11 +213,7 @@ bad_area_nosemaphore:
/* User mode accesses just cause a SIGSEGV */
if (user_mode(regs)) {
- info.si_signo = SIGSEGV;
- info.si_errno = 0;
- /* info.si_code has been set above */
- info.si_addr = (void *)address;
- force_sig_info(SIGSEGV, &info, tsk);
+ force_sig_fault(SIGSEGV, si_code, (void __user *)address, tsk);
return;
}
@@ -282,11 +278,7 @@ do_sigbus:
* Send a sigbus, regardless of whether we were in kernel
* or user mode.
*/
- info.si_signo = SIGBUS;
- info.si_errno = 0;
- info.si_code = BUS_ADRERR;
- info.si_addr = (void *)address;
- force_sig_info(SIGBUS, &info, tsk);
+ force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address, tsk);
/* Kernel mode? Handle exceptions or die */
if (!user_mode(regs))
diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
index fc5a574c3482..4d8f64d48597 100644
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -51,6 +51,8 @@ config PARISC
select GENERIC_CLOCKEVENTS
select ARCH_NO_COHERENT_DMA_MMAP
select CPU_NO_EFFICIENT_FFS
+ select NEED_DMA_MAP_STATE
+ select NEED_SG_DMA_LENGTH
help
The PA-RISC microprocessor is designed by Hewlett-Packard and used
@@ -111,12 +113,6 @@ config PM
config STACKTRACE_SUPPORT
def_bool y
-config NEED_DMA_MAP_STATE
- def_bool y
-
-config NEED_SG_DMA_LENGTH
- def_bool y
-
config ISA_DMA_API
bool
diff --git a/arch/parisc/Makefile b/arch/parisc/Makefile
index e2364ff59180..34ac503e28ad 100644
--- a/arch/parisc/Makefile
+++ b/arch/parisc/Makefile
@@ -123,6 +123,9 @@ INSTALL_TARGETS = zinstall install
PHONY += bzImage $(BOOT_TARGETS) $(INSTALL_TARGETS)
+# Default kernel to build
+all: bzImage
+
zImage: vmlinuz
Image: vmlinux
diff --git a/arch/parisc/include/asm/hardirq.h b/arch/parisc/include/asm/hardirq.h
index 077815169258..1a1235a9d533 100644
--- a/arch/parisc/include/asm/hardirq.h
+++ b/arch/parisc/include/asm/hardirq.h
@@ -34,14 +34,6 @@ DECLARE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
#define __IRQ_STAT(cpu, member) (irq_stat[cpu].member)
#define inc_irq_stat(member) this_cpu_inc(irq_stat.member)
#define __inc_irq_stat(member) __this_cpu_inc(irq_stat.member)
-#define local_softirq_pending() this_cpu_read(irq_stat.__softirq_pending)
-
-#define __ARCH_SET_SOFTIRQ_PENDING
-
-#define set_softirq_pending(x) \
- this_cpu_write(irq_stat.__softirq_pending, (x))
-#define or_softirq_pending(x) this_cpu_or(irq_stat.__softirq_pending, (x))
-
#define ack_bad_irq(irq) WARN(1, "unexpected IRQ trap at vector %02x\n", irq)
#endif /* _PARISC_HARDIRQ_H */
diff --git a/arch/parisc/include/asm/pci.h b/arch/parisc/include/asm/pci.h
index 96b7deec512d..3328fd17c19d 100644
--- a/arch/parisc/include/asm/pci.h
+++ b/arch/parisc/include/asm/pci.h
@@ -88,29 +88,6 @@ struct pci_hba_data {
#endif /* !CONFIG_64BIT */
/*
- * If the PCI device's view of memory is the same as the CPU's view of memory,
- * PCI_DMA_BUS_IS_PHYS is true. The networking and block device layers use
- * this boolean for bounce buffer decisions.
- */
-#ifdef CONFIG_PA20
-/* All PA-2.0 machines have an IOMMU. */
-#define PCI_DMA_BUS_IS_PHYS 0
-#define parisc_has_iommu() do { } while (0)
-#else
-
-#if defined(CONFIG_IOMMU_CCIO) || defined(CONFIG_IOMMU_SBA)
-extern int parisc_bus_is_phys; /* in arch/parisc/kernel/setup.c */
-#define PCI_DMA_BUS_IS_PHYS parisc_bus_is_phys
-#define parisc_has_iommu() do { parisc_bus_is_phys = 0; } while (0)
-#else
-#define PCI_DMA_BUS_IS_PHYS 1
-#define parisc_has_iommu() do { } while (0)
-#endif
-
-#endif /* !CONFIG_PA20 */
-
-
-/*
** Most PCI devices (eg Tulip, NCR720) also export the same registers
** to both MMIO and I/O port space. Due to poor performance of I/O Port
** access under HP PCI bus adapters, strongly recommend the use of MMIO
diff --git a/arch/parisc/kernel/drivers.c b/arch/parisc/kernel/drivers.c
index 3b8507f71050..e0e1c9775c32 100644
--- a/arch/parisc/kernel/drivers.c
+++ b/arch/parisc/kernel/drivers.c
@@ -268,7 +268,7 @@ static struct parisc_device *find_device_by_addr(unsigned long hpa)
* Walks up the device tree looking for a device of the specified type.
* If it finds it, it returns it. If not, it returns NULL.
*/
-const struct parisc_device * __init
+const struct parisc_device *
find_pa_parent_type(const struct parisc_device *padev, int type)
{
const struct device *dev = &padev->dev;
@@ -448,7 +448,8 @@ static int match_by_id(struct device * dev, void * data)
* Checks all the children of @parent for a matching @id. If none
* found, it allocates a new device and returns it.
*/
-static struct parisc_device * alloc_tree_node(struct device *parent, char id)
+static struct parisc_device * __init alloc_tree_node(
+ struct device *parent, char id)
{
struct match_id_data d = {
.id = id,
@@ -825,8 +826,8 @@ static void walk_lower_bus(struct parisc_device *dev)
* devices which are not physically connected (such as extra serial &
* keyboard ports). This problem is not yet solved.
*/
-static void walk_native_bus(unsigned long io_io_low, unsigned long io_io_high,
- struct device *parent)
+static void __init walk_native_bus(unsigned long io_io_low,
+ unsigned long io_io_high, struct device *parent)
{
int i, devices_found = 0;
unsigned long hpa = io_io_low;
diff --git a/arch/parisc/kernel/pci-dma.c b/arch/parisc/kernel/pci-dma.c
index 91bc0cac03a1..6df07ce4f3c2 100644
--- a/arch/parisc/kernel/pci-dma.c
+++ b/arch/parisc/kernel/pci-dma.c
@@ -367,19 +367,6 @@ static int proc_pcxl_dma_show(struct seq_file *m, void *v)
return 0;
}
-static int proc_pcxl_dma_open(struct inode *inode, struct file *file)
-{
- return single_open(file, proc_pcxl_dma_show, NULL);
-}
-
-static const struct file_operations proc_pcxl_dma_ops = {
- .owner = THIS_MODULE,
- .open = proc_pcxl_dma_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
static int __init
pcxl_dma_init(void)
{
@@ -397,8 +384,8 @@ pcxl_dma_init(void)
"pcxl_dma_init: Unable to create gsc /proc dir entry\n");
else {
struct proc_dir_entry* ent;
- ent = proc_create("pcxl_dma", 0, proc_gsc_root,
- &proc_pcxl_dma_ops);
+ ent = proc_create_single("pcxl_dma", 0, proc_gsc_root,
+ proc_pcxl_dma_show);
if (!ent)
printk(KERN_WARNING
"pci-dma.c: Unable to create pcxl_dma /proc entry.\n");
diff --git a/arch/parisc/kernel/pci.c b/arch/parisc/kernel/pci.c
index 13ee3569959a..ae684ac6efb6 100644
--- a/arch/parisc/kernel/pci.c
+++ b/arch/parisc/kernel/pci.c
@@ -174,7 +174,7 @@ void pcibios_set_master(struct pci_dev *dev)
* pcibios_init_bridge() initializes cache line and default latency
* for pci controllers and pci-pci bridges
*/
-void __init pcibios_init_bridge(struct pci_dev *dev)
+void __ref pcibios_init_bridge(struct pci_dev *dev)
{
unsigned short bridge_ctl, bridge_ctl_new;
diff --git a/arch/parisc/kernel/pdc_chassis.c b/arch/parisc/kernel/pdc_chassis.c
index 3e04242de5a7..28e07482b0f1 100644
--- a/arch/parisc/kernel/pdc_chassis.c
+++ b/arch/parisc/kernel/pdc_chassis.c
@@ -266,18 +266,6 @@ static int pdc_chassis_warn_show(struct seq_file *m, void *v)
return 0;
}
-static int pdc_chassis_warn_open(struct inode *inode, struct file *file)
-{
- return single_open(file, pdc_chassis_warn_show, NULL);
-}
-
-static const struct file_operations pdc_chassis_warn_fops = {
- .open = pdc_chassis_warn_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
static int __init pdc_chassis_create_procfs(void)
{
unsigned long test;
@@ -292,7 +280,7 @@ static int __init pdc_chassis_create_procfs(void)
printk(KERN_INFO "Enabling PDC chassis warnings support v%s\n",
PDC_CHASSIS_VER);
- proc_create("chassis", 0400, NULL, &pdc_chassis_warn_fops);
+ proc_create_single("chassis", 0400, NULL, pdc_chassis_warn_show);
return 0;
}
diff --git a/arch/parisc/kernel/ptrace.c b/arch/parisc/kernel/ptrace.c
index 1a2be6e639b5..7aa1d4d0d444 100644
--- a/arch/parisc/kernel/ptrace.c
+++ b/arch/parisc/kernel/ptrace.c
@@ -76,8 +76,6 @@ void user_enable_single_step(struct task_struct *task)
set_tsk_thread_flag(task, TIF_SINGLESTEP);
if (pa_psw(task)->n) {
- struct siginfo si;
-
/* Nullified, just crank over the queue. */
task_regs(task)->iaoq[0] = task_regs(task)->iaoq[1];
task_regs(task)->iasq[0] = task_regs(task)->iasq[1];
@@ -90,11 +88,9 @@ void user_enable_single_step(struct task_struct *task)
ptrace_disable(task);
/* Don't wake up the task, but let the
parent know something happened. */
- si.si_code = TRAP_TRACE;
- si.si_addr = (void __user *) (task_regs(task)->iaoq[0] & ~3);
- si.si_signo = SIGTRAP;
- si.si_errno = 0;
- force_sig_info(SIGTRAP, &si, task);
+ force_sig_fault(SIGTRAP, TRAP_TRACE,
+ (void __user *) (task_regs(task)->iaoq[0] & ~3),
+ task);
/* notify_parent(task, SIGCHLD); */
return;
}
diff --git a/arch/parisc/kernel/setup.c b/arch/parisc/kernel/setup.c
index 0e9675f857a5..8d3a7b80ac42 100644
--- a/arch/parisc/kernel/setup.c
+++ b/arch/parisc/kernel/setup.c
@@ -58,11 +58,6 @@ struct proc_dir_entry * proc_runway_root __read_mostly = NULL;
struct proc_dir_entry * proc_gsc_root __read_mostly = NULL;
struct proc_dir_entry * proc_mckinley_root __read_mostly = NULL;
-#if !defined(CONFIG_PA20) && (defined(CONFIG_IOMMU_CCIO) || defined(CONFIG_IOMMU_SBA))
-int parisc_bus_is_phys __read_mostly = 1; /* Assume no IOMMU is present */
-EXPORT_SYMBOL(parisc_bus_is_phys);
-#endif
-
void __init setup_cmdline(char **cmdline_p)
{
extern unsigned int boot_args[];
diff --git a/arch/parisc/kernel/smp.c b/arch/parisc/kernel/smp.c
index 4065b5e48c9d..5e26dbede5fc 100644
--- a/arch/parisc/kernel/smp.c
+++ b/arch/parisc/kernel/smp.c
@@ -423,8 +423,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle)
}
#ifdef CONFIG_PROC_FS
-int __init
-setup_profiling_timer(unsigned int multiplier)
+int setup_profiling_timer(unsigned int multiplier)
{
return -EINVAL;
}
diff --git a/arch/parisc/kernel/time.c b/arch/parisc/kernel/time.c
index c3830400ca28..a1e772f909cb 100644
--- a/arch/parisc/kernel/time.c
+++ b/arch/parisc/kernel/time.c
@@ -205,7 +205,7 @@ static int __init rtc_init(void)
device_initcall(rtc_init);
#endif
-void read_persistent_clock(struct timespec *ts)
+void read_persistent_clock64(struct timespec64 *ts)
{
static struct pdc_tod tod_data;
if (pdc_tod_read(&tod_data) == 0) {
diff --git a/arch/parisc/kernel/traps.c b/arch/parisc/kernel/traps.c
index 68e671a11987..4309ad31a874 100644
--- a/arch/parisc/kernel/traps.c
+++ b/arch/parisc/kernel/traps.c
@@ -297,13 +297,8 @@ void die_if_kernel(char *str, struct pt_regs *regs, long err)
#define GDB_BREAK_INSN 0x10004
static void handle_gdb_break(struct pt_regs *regs, int wot)
{
- struct siginfo si;
-
- si.si_signo = SIGTRAP;
- si.si_errno = 0;
- si.si_code = wot;
- si.si_addr = (void __user *) (regs->iaoq[0] & ~3);
- force_sig_info(SIGTRAP, &si, current);
+ force_sig_fault(SIGTRAP, wot,
+ (void __user *) (regs->iaoq[0] & ~3), current);
}
static void handle_break(struct pt_regs *regs)
@@ -487,7 +482,7 @@ void notrace handle_interruption(int code, struct pt_regs *regs)
{
unsigned long fault_address = 0;
unsigned long fault_space = 0;
- struct siginfo si;
+ int si_code;
if (code == 1)
pdc_console_restart(); /* switch back to pdc if HPMC */
@@ -571,7 +566,7 @@ void notrace handle_interruption(int code, struct pt_regs *regs)
case 8:
/* Illegal instruction trap */
die_if_kernel("Illegal instruction", regs, code);
- si.si_code = ILL_ILLOPC;
+ si_code = ILL_ILLOPC;
goto give_sigill;
case 9:
@@ -582,7 +577,7 @@ void notrace handle_interruption(int code, struct pt_regs *regs)
case 10:
/* Privileged operation trap */
die_if_kernel("Privileged operation", regs, code);
- si.si_code = ILL_PRVOPC;
+ si_code = ILL_PRVOPC;
goto give_sigill;
case 11:
@@ -605,20 +600,16 @@ void notrace handle_interruption(int code, struct pt_regs *regs)
}
die_if_kernel("Privileged register usage", regs, code);
- si.si_code = ILL_PRVREG;
+ si_code = ILL_PRVREG;
give_sigill:
- si.si_signo = SIGILL;
- si.si_errno = 0;
- si.si_addr = (void __user *) regs->iaoq[0];
- force_sig_info(SIGILL, &si, current);
+ force_sig_fault(SIGILL, si_code,
+ (void __user *) regs->iaoq[0], current);
return;
case 12:
/* Overflow Trap, let the userland signal handler do the cleanup */
- si.si_signo = SIGFPE;
- si.si_code = FPE_INTOVF;
- si.si_addr = (void __user *) regs->iaoq[0];
- force_sig_info(SIGFPE, &si, current);
+ force_sig_fault(SIGFPE, FPE_INTOVF,
+ (void __user *) regs->iaoq[0], current);
return;
case 13:
@@ -626,13 +617,11 @@ void notrace handle_interruption(int code, struct pt_regs *regs)
The condition succeeds in an instruction which traps
on condition */
if(user_mode(regs)){
- si.si_signo = SIGFPE;
/* Let userspace app figure it out from the insn pointed
* to by si_addr.
*/
- si.si_code = FPE_CONDTRAP;
- si.si_addr = (void __user *) regs->iaoq[0];
- force_sig_info(SIGFPE, &si, current);
+ force_sig_fault(SIGFPE, FPE_CONDTRAP,
+ (void __user *) regs->iaoq[0], current);
return;
}
/* The kernel doesn't want to handle condition codes */
@@ -741,14 +730,10 @@ void notrace handle_interruption(int code, struct pt_regs *regs)
return;
die_if_kernel("Protection id trap", regs, code);
- si.si_code = SEGV_MAPERR;
- si.si_signo = SIGSEGV;
- si.si_errno = 0;
- if (code == 7)
- si.si_addr = (void __user *) regs->iaoq[0];
- else
- si.si_addr = (void __user *) regs->ior;
- force_sig_info(SIGSEGV, &si, current);
+ force_sig_fault(SIGSEGV, SEGV_MAPERR,
+ (code == 7)?
+ ((void __user *) regs->iaoq[0]) :
+ ((void __user *) regs->ior), current);
return;
case 28:
@@ -762,11 +747,8 @@ void notrace handle_interruption(int code, struct pt_regs *regs)
"handle_interruption() pid=%d command='%s'\n",
task_pid_nr(current), current->comm);
/* SIGBUS, for lack of a better one. */
- si.si_signo = SIGBUS;
- si.si_code = BUS_OBJERR;
- si.si_errno = 0;
- si.si_addr = (void __user *) regs->ior;
- force_sig_info(SIGBUS, &si, current);
+ force_sig_fault(SIGBUS, BUS_OBJERR,
+ (void __user *)regs->ior, current);
return;
}
pdc_chassis_send_status(PDC_CHASSIS_DIRECT_PANIC);
@@ -781,11 +763,8 @@ void notrace handle_interruption(int code, struct pt_regs *regs)
"User fault %d on space 0x%08lx, pid=%d command='%s'\n",
code, fault_space,
task_pid_nr(current), current->comm);
- si.si_signo = SIGSEGV;
- si.si_errno = 0;
- si.si_code = SEGV_MAPERR;
- si.si_addr = (void __user *) regs->ior;
- force_sig_info(SIGSEGV, &si, current);
+ force_sig_fault(SIGSEGV, SEGV_MAPERR,
+ (void __user *)regs->ior, current);
return;
}
}
@@ -837,6 +816,17 @@ void __init initialize_ivt(const void *iva)
if (pdc_instr(&instr) == PDC_OK)
ivap[0] = instr;
+ /*
+ * Rules for the checksum of the HPMC handler:
+ * 1. The IVA does not point to PDC/PDH space (ie: the OS has installed
+ * its own IVA).
+ * 2. The word at IVA + 32 is nonzero.
+ * 3. If Length (IVA + 60) is not zero, then Length (IVA + 60) and
+ * Address (IVA + 56) are word-aligned.
+ * 4. The checksum of the 8 words starting at IVA + 32 plus the sum of
+ * the Length/4 words starting at Address is zero.
+ */
+
/* Compute Checksum for HPMC handler */
length = os_hpmc_size;
ivap[7] = length;
diff --git a/arch/parisc/kernel/unaligned.c b/arch/parisc/kernel/unaligned.c
index e36f7b75ab07..932bfc0b7cd8 100644
--- a/arch/parisc/kernel/unaligned.c
+++ b/arch/parisc/kernel/unaligned.c
@@ -452,7 +452,6 @@ void handle_unaligned(struct pt_regs *regs)
unsigned long newbase = R1(regs->iir)?regs->gr[R1(regs->iir)]:0;
int modify = 0;
int ret = ERR_NOTHANDLED;
- struct siginfo si;
register int flop=0; /* true if this is a flop */
__inc_irq_stat(irq_unaligned_count);
@@ -690,21 +689,15 @@ void handle_unaligned(struct pt_regs *regs)
if (ret == ERR_PAGEFAULT)
{
- si.si_signo = SIGSEGV;
- si.si_errno = 0;
- si.si_code = SEGV_MAPERR;
- si.si_addr = (void __user *)regs->ior;
- force_sig_info(SIGSEGV, &si, current);
+ force_sig_fault(SIGSEGV, SEGV_MAPERR,
+ (void __user *)regs->ior, current);
}
else
{
force_sigbus:
/* couldn't handle it ... */
- si.si_signo = SIGBUS;
- si.si_errno = 0;
- si.si_code = BUS_ADRALN;
- si.si_addr = (void __user *)regs->ior;
- force_sig_info(SIGBUS, &si, current);
+ force_sig_fault(SIGBUS, BUS_ADRALN,
+ (void __user *)regs->ior, current);
}
return;
diff --git a/arch/parisc/math-emu/driver.c b/arch/parisc/math-emu/driver.c
index 2fb59d2e2b29..0590e05571d1 100644
--- a/arch/parisc/math-emu/driver.c
+++ b/arch/parisc/math-emu/driver.c
@@ -81,7 +81,6 @@ int
handle_fpe(struct pt_regs *regs)
{
extern void printbinary(unsigned long x, int nbits);
- struct siginfo si;
unsigned int orig_sw, sw;
int signalcode;
/* need an intermediate copy of float regs because FPU emulation
@@ -117,11 +116,8 @@ handle_fpe(struct pt_regs *regs)
memcpy(regs->fr, frcopy, sizeof regs->fr);
if (signalcode != 0) {
- si.si_signo = signalcode >> 24;
- si.si_errno = 0;
- si.si_code = signalcode & 0xffffff;
- si.si_addr = (void __user *) regs->iaoq[0];
- force_sig_info(si.si_signo, &si, current);
+ force_sig_fault(signalcode >> 24, signalcode & 0xffffff,
+ (void __user *) regs->iaoq[0], current);
return -1;
}
diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c
index e247edbca68e..a80117980fc2 100644
--- a/arch/parisc/mm/fault.c
+++ b/arch/parisc/mm/fault.c
@@ -353,23 +353,22 @@ bad_area:
up_read(&mm->mmap_sem);
if (user_mode(regs)) {
- struct siginfo si;
- unsigned int lsb = 0;
+ int signo, si_code;
switch (code) {
case 15: /* Data TLB miss fault/Data page fault */
/* send SIGSEGV when outside of vma */
if (!vma ||
address < vma->vm_start || address >= vma->vm_end) {
- si.si_signo = SIGSEGV;
- si.si_code = SEGV_MAPERR;
+ signo = SIGSEGV;
+ si_code = SEGV_MAPERR;
break;
}
/* send SIGSEGV for wrong permissions */
if ((vma->vm_flags & acc_type) != acc_type) {
- si.si_signo = SIGSEGV;
- si.si_code = SEGV_ACCERR;
+ signo = SIGSEGV;
+ si_code = SEGV_ACCERR;
break;
}
@@ -377,43 +376,40 @@ bad_area:
/* fall through */
case 17: /* NA data TLB miss / page fault */
case 18: /* Unaligned access - PCXS only */
- si.si_signo = SIGBUS;
- si.si_code = (code == 18) ? BUS_ADRALN : BUS_ADRERR;
+ signo = SIGBUS;
+ si_code = (code == 18) ? BUS_ADRALN : BUS_ADRERR;
break;
case 16: /* Non-access instruction TLB miss fault */
case 26: /* PCXL: Data memory access rights trap */
default:
- si.si_signo = SIGSEGV;
- si.si_code = (code == 26) ? SEGV_ACCERR : SEGV_MAPERR;
+ signo = SIGSEGV;
+ si_code = (code == 26) ? SEGV_ACCERR : SEGV_MAPERR;
break;
}
-
#ifdef CONFIG_MEMORY_FAILURE
if (fault & (VM_FAULT_HWPOISON|VM_FAULT_HWPOISON_LARGE)) {
+ unsigned int lsb = 0;
printk(KERN_ERR
"MCE: Killing %s:%d due to hardware memory corruption fault at %08lx\n",
tsk->comm, tsk->pid, address);
- si.si_signo = SIGBUS;
- si.si_code = BUS_MCEERR_AR;
+ /*
+ * Either small page or large page may be poisoned.
+ * In other words, VM_FAULT_HWPOISON_LARGE and
+ * VM_FAULT_HWPOISON are mutually exclusive.
+ */
+ if (fault & VM_FAULT_HWPOISON_LARGE)
+ lsb = hstate_index_to_shift(VM_FAULT_GET_HINDEX(fault));
+ else if (fault & VM_FAULT_HWPOISON)
+ lsb = PAGE_SHIFT;
+
+ force_sig_mceerr(BUS_MCEERR_AR, (void __user *) address,
+ lsb, current);
+ return;
}
#endif
+ show_signal_msg(regs, code, address, tsk, vma);
- /*
- * Either small page or large page may be poisoned.
- * In other words, VM_FAULT_HWPOISON_LARGE and
- * VM_FAULT_HWPOISON are mutually exclusive.
- */
- if (fault & VM_FAULT_HWPOISON_LARGE)
- lsb = hstate_index_to_shift(VM_FAULT_GET_HINDEX(fault));
- else if (fault & VM_FAULT_HWPOISON)
- lsb = PAGE_SHIFT;
- else
- show_signal_msg(regs, code, address, tsk, vma);
- si.si_addr_lsb = lsb;
-
- si.si_errno = 0;
- si.si_addr = (void __user *) address;
- force_sig_info(si.si_signo, &si, current);
+ force_sig_fault(signo, si_code, (void __user *) address, current);
return;
}
diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c
index cab32ee824d2..2607d2d33405 100644
--- a/arch/parisc/mm/init.c
+++ b/arch/parisc/mm/init.c
@@ -516,7 +516,7 @@ static void __init map_pages(unsigned long start_vaddr,
}
}
-void free_initmem(void)
+void __ref free_initmem(void)
{
unsigned long init_begin = (unsigned long)__init_begin;
unsigned long init_end = (unsigned long)__init_end;
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index c32a181a7cbb..f674006dea2f 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -13,12 +13,6 @@ config 64BIT
bool
default y if PPC64
-config ARCH_PHYS_ADDR_T_64BIT
- def_bool PPC64 || PHYS_64BIT
-
-config ARCH_DMA_ADDR_T_64BIT
- def_bool ARCH_PHYS_ADDR_T_64BIT
-
config MMU
bool
default y
@@ -187,7 +181,6 @@ config PPC
select HAVE_CONTEXT_TRACKING if PPC64
select HAVE_DEBUG_KMEMLEAK
select HAVE_DEBUG_STACKOVERFLOW
- select HAVE_DMA_API_DEBUG
select HAVE_DYNAMIC_FTRACE
select HAVE_DYNAMIC_FTRACE_WITH_REGS if MPROFILE_KERNEL
select HAVE_EBPF_JIT if PPC64
@@ -223,9 +216,11 @@ config PPC
select HAVE_SYSCALL_TRACEPOINTS
select HAVE_VIRT_CPU_ACCOUNTING
select HAVE_IRQ_TIME_ACCOUNTING
+ select IOMMU_HELPER if PPC64
select IRQ_DOMAIN
select IRQ_FORCED_THREADING
select MODULES_USE_ELF_RELA
+ select NEED_SG_DMA_LENGTH
select NO_BOOTMEM
select OF
select OF_EARLY_FLATTREE
@@ -478,19 +473,6 @@ config MPROFILE_KERNEL
depends on PPC64 && CPU_LITTLE_ENDIAN
def_bool !DISABLE_MPROFILE_KERNEL
-config IOMMU_HELPER
- def_bool PPC64
-
-config SWIOTLB
- bool "SWIOTLB support"
- default n
- select IOMMU_HELPER
- ---help---
- Support for IO bounce buffering for systems without an IOMMU.
- This allows us to DMA to the full physical address space on
- platforms where the size of a physical address is larger
- than the bus address. Not all platforms support this.
-
config HOTPLUG_CPU
bool "Support for enabling/disabling CPUs"
depends on SMP && (PPC_PSERIES || \
@@ -883,7 +865,7 @@ config PPC_MEM_KEYS
page-based protections, but without requiring modification of the
page tables when an application changes protection domains.
- For details, see Documentation/vm/protection-keys.txt
+ For details, see Documentation/vm/protection-keys.rst
If unsure, say y.
@@ -913,9 +895,6 @@ config ZONE_DMA
config NEED_DMA_MAP_STATE
def_bool (PPC64 || NOT_COHERENT_CACHE)
-config NEED_SG_DMA_LENGTH
- def_bool y
-
config GENERIC_ISA_DMA
bool
depends on ISA_DMA_API
diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h
index 471b2274fbeb..c40b4380951c 100644
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -74,6 +74,27 @@
*/
#define EX_R3 EX_DAR
+#define STF_ENTRY_BARRIER_SLOT \
+ STF_ENTRY_BARRIER_FIXUP_SECTION; \
+ nop; \
+ nop; \
+ nop
+
+#define STF_EXIT_BARRIER_SLOT \
+ STF_EXIT_BARRIER_FIXUP_SECTION; \
+ nop; \
+ nop; \
+ nop; \
+ nop; \
+ nop; \
+ nop
+
+/*
+ * r10 must be free to use, r13 must be paca
+ */
+#define INTERRUPT_TO_KERNEL \
+ STF_ENTRY_BARRIER_SLOT
+
/*
* Macros for annotating the expected destination of (h)rfid
*
@@ -90,16 +111,19 @@
rfid
#define RFI_TO_USER \
+ STF_EXIT_BARRIER_SLOT; \
RFI_FLUSH_SLOT; \
rfid; \
b rfi_flush_fallback
#define RFI_TO_USER_OR_KERNEL \
+ STF_EXIT_BARRIER_SLOT; \
RFI_FLUSH_SLOT; \
rfid; \
b rfi_flush_fallback
#define RFI_TO_GUEST \
+ STF_EXIT_BARRIER_SLOT; \
RFI_FLUSH_SLOT; \
rfid; \
b rfi_flush_fallback
@@ -108,21 +132,25 @@
hrfid
#define HRFI_TO_USER \
+ STF_EXIT_BARRIER_SLOT; \
RFI_FLUSH_SLOT; \
hrfid; \
b hrfi_flush_fallback
#define HRFI_TO_USER_OR_KERNEL \
+ STF_EXIT_BARRIER_SLOT; \
RFI_FLUSH_SLOT; \
hrfid; \
b hrfi_flush_fallback
#define HRFI_TO_GUEST \
+ STF_EXIT_BARRIER_SLOT; \
RFI_FLUSH_SLOT; \
hrfid; \
b hrfi_flush_fallback
#define HRFI_TO_UNKNOWN \
+ STF_EXIT_BARRIER_SLOT; \
RFI_FLUSH_SLOT; \
hrfid; \
b hrfi_flush_fallback
@@ -254,6 +282,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
#define __EXCEPTION_PROLOG_1_PRE(area) \
OPT_SAVE_REG_TO_PACA(area+EX_PPR, r9, CPU_FTR_HAS_PPR); \
OPT_SAVE_REG_TO_PACA(area+EX_CFAR, r10, CPU_FTR_CFAR); \
+ INTERRUPT_TO_KERNEL; \
SAVE_CTR(r10, area); \
mfcr r9;
diff --git a/arch/powerpc/include/asm/feature-fixups.h b/arch/powerpc/include/asm/feature-fixups.h
index 1e82eb3caabd..a9b64df34e2a 100644
--- a/arch/powerpc/include/asm/feature-fixups.h
+++ b/arch/powerpc/include/asm/feature-fixups.h
@@ -187,6 +187,22 @@ label##3: \
FTR_ENTRY_OFFSET label##1b-label##3b; \
.popsection;
+#define STF_ENTRY_BARRIER_FIXUP_SECTION \
+953: \
+ .pushsection __stf_entry_barrier_fixup,"a"; \
+ .align 2; \
+954: \
+ FTR_ENTRY_OFFSET 953b-954b; \
+ .popsection;
+
+#define STF_EXIT_BARRIER_FIXUP_SECTION \
+955: \
+ .pushsection __stf_exit_barrier_fixup,"a"; \
+ .align 2; \
+956: \
+ FTR_ENTRY_OFFSET 955b-956b; \
+ .popsection;
+
#define RFI_FLUSH_FIXUP_SECTION \
951: \
.pushsection __rfi_flush_fixup,"a"; \
@@ -199,6 +215,9 @@ label##3: \
#ifndef __ASSEMBLY__
#include <linux/types.h>
+extern long stf_barrier_fallback;
+extern long __start___stf_entry_barrier_fixup, __stop___stf_entry_barrier_fixup;
+extern long __start___stf_exit_barrier_fixup, __stop___stf_exit_barrier_fixup;
extern long __start___rfi_flush_fixup, __stop___rfi_flush_fixup;
void apply_feature_fixups(void);
diff --git a/arch/powerpc/include/asm/ftrace.h b/arch/powerpc/include/asm/ftrace.h
index 9abddde372ab..b2dabd06659d 100644
--- a/arch/powerpc/include/asm/ftrace.h
+++ b/arch/powerpc/include/asm/ftrace.h
@@ -69,17 +69,30 @@ struct dyn_arch_ftrace {
#endif
#if defined(CONFIG_FTRACE_SYSCALLS) && !defined(__ASSEMBLY__)
-#ifdef PPC64_ELF_ABI_v1
+/*
+ * Some syscall entry functions on powerpc start with "ppc_" (fork and clone,
+ * for instance) or ppc32_/ppc64_. We should also match the sys_ variant with
+ * those.
+ */
#define ARCH_HAS_SYSCALL_MATCH_SYM_NAME
+#ifdef PPC64_ELF_ABI_v1
+static inline bool arch_syscall_match_sym_name(const char *sym, const char *name)
+{
+ /* We need to skip past the initial dot, and the __se_sys alias */
+ return !strcmp(sym + 1, name) ||
+ (!strncmp(sym, ".__se_sys", 9) && !strcmp(sym + 6, name)) ||
+ (!strncmp(sym, ".ppc_", 5) && !strcmp(sym + 5, name + 4)) ||
+ (!strncmp(sym, ".ppc32_", 7) && !strcmp(sym + 7, name + 4)) ||
+ (!strncmp(sym, ".ppc64_", 7) && !strcmp(sym + 7, name + 4));
+}
+#else
static inline bool arch_syscall_match_sym_name(const char *sym, const char *name)
{
- /*
- * Compare the symbol name with the system call name. Skip the .sys or .SyS
- * prefix from the symbol name and the sys prefix from the system call name and
- * just match the rest. This is only needed on ppc64 since symbol names on
- * 32bit do not start with a period so the generic function will work.
- */
- return !strcmp(sym + 4, name + 3);
+ return !strcmp(sym, name) ||
+ (!strncmp(sym, "__se_sys", 8) && !strcmp(sym + 5, name)) ||
+ (!strncmp(sym, "ppc_", 4) && !strcmp(sym + 4, name + 4)) ||
+ (!strncmp(sym, "ppc32_", 6) && !strcmp(sym + 6, name + 4)) ||
+ (!strncmp(sym, "ppc64_", 6) && !strcmp(sym + 6, name + 4));
}
#endif
#endif /* CONFIG_FTRACE_SYSCALLS && !__ASSEMBLY__ */
diff --git a/arch/powerpc/include/asm/hardirq.h b/arch/powerpc/include/asm/hardirq.h
index 5986d473722b..383f628acbf8 100644
--- a/arch/powerpc/include/asm/hardirq.h
+++ b/arch/powerpc/include/asm/hardirq.h
@@ -25,15 +25,8 @@ typedef struct {
DECLARE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
#define __ARCH_IRQ_STAT
-
-#define local_softirq_pending() __this_cpu_read(irq_stat.__softirq_pending)
-
-#define __ARCH_SET_SOFTIRQ_PENDING
#define __ARCH_IRQ_EXIT_IRQS_DISABLED
-#define set_softirq_pending(x) __this_cpu_write(irq_stat.__softirq_pending, (x))
-#define or_softirq_pending(x) __this_cpu_or(irq_stat.__softirq_pending, (x))
-
static inline void ack_bad_irq(unsigned int irq)
{
printk(KERN_CRIT "unexpected IRQ trap at vector %02x\n", irq);
diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index 4c02a7378d06..e7377b73cfec 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -96,6 +96,7 @@ struct kvmppc_vcore {
struct kvm_vcpu *runner;
struct kvm *kvm;
u64 tb_offset; /* guest timebase - host timebase */
+ u64 tb_offset_applied; /* timebase offset currently in force */
ulong lpcr;
u32 arch_compat;
ulong pcr;
diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index 4185f1c96125..3f109a3e3edb 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -165,7 +165,6 @@ struct paca_struct {
u64 saved_msr; /* MSR saved here by enter_rtas */
u16 trap_save; /* Used when bad stack is encountered */
u8 irq_soft_mask; /* mask for irq soft masking */
- u8 soft_enabled; /* irq soft-enable flag */
u8 irq_happened; /* irq happened while soft-disabled */
u8 io_sync; /* writel() needs spin_unlock sync */
u8 irq_work_pending; /* IRQ_WORK interrupt while soft-disable */
diff --git a/arch/powerpc/include/asm/pci.h b/arch/powerpc/include/asm/pci.h
index 401c62aad5e4..2af9ded80540 100644
--- a/arch/powerpc/include/asm/pci.h
+++ b/arch/powerpc/include/asm/pci.h
@@ -92,24 +92,6 @@ extern int pci_mmap_legacy_page_range(struct pci_bus *bus,
#define HAVE_PCI_LEGACY 1
-#ifdef CONFIG_PPC64
-
-/* The PCI address space does not equal the physical memory address
- * space (we have an IOMMU). The IDE and SCSI device layers use
- * this boolean for bounce buffer decisions.
- */
-#define PCI_DMA_BUS_IS_PHYS (0)
-
-#else /* 32-bit */
-
-/* The PCI address space does equal the physical memory
- * address space (no IOMMU). The IDE and SCSI device layers use
- * this boolean for bounce buffer decisions.
- */
-#define PCI_DMA_BUS_IS_PHYS (1)
-
-#endif /* CONFIG_PPC64 */
-
extern void pcibios_claim_one_bus(struct pci_bus *b);
extern void pcibios_finish_adding_to_bus(struct pci_bus *bus);
diff --git a/arch/powerpc/include/asm/security_features.h b/arch/powerpc/include/asm/security_features.h
index fa4d2e1cf772..44989b22383c 100644
--- a/arch/powerpc/include/asm/security_features.h
+++ b/arch/powerpc/include/asm/security_features.h
@@ -12,6 +12,17 @@
extern unsigned long powerpc_security_features;
extern bool rfi_flush;
+/* These are bit flags */
+enum stf_barrier_type {
+ STF_BARRIER_NONE = 0x1,
+ STF_BARRIER_FALLBACK = 0x2,
+ STF_BARRIER_EIEIO = 0x4,
+ STF_BARRIER_SYNC_ORI = 0x8,
+};
+
+void setup_stf_barrier(void);
+void do_stf_barrier_fixups(enum stf_barrier_type types);
+
static inline void security_ftr_set(unsigned long feature)
{
powerpc_security_features |= feature;
diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h
index 9f421641a35c..16b077801a5f 100644
--- a/arch/powerpc/include/asm/topology.h
+++ b/arch/powerpc/include/asm/topology.h
@@ -91,6 +91,7 @@ extern int start_topology_update(void);
extern int stop_topology_update(void);
extern int prrn_is_enabled(void);
extern int find_and_online_cpu_nid(int cpu);
+extern int timed_topology_update(int nsecs);
#else
static inline int start_topology_update(void)
{
@@ -108,16 +109,12 @@ static inline int find_and_online_cpu_nid(int cpu)
{
return 0;
}
+static inline int timed_topology_update(int nsecs)
+{
+ return 0;
+}
#endif /* CONFIG_NUMA && CONFIG_PPC_SPLPAR */
-#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_NEED_MULTIPLE_NODES)
-#if defined(CONFIG_PPC_SPLPAR)
-extern int timed_topology_update(int nsecs);
-#else
-#define timed_topology_update(nsecs)
-#endif /* CONFIG_PPC_SPLPAR */
-#endif /* CONFIG_HOTPLUG_CPU || CONFIG_NEED_MULTIPLE_NODES */
-
#include <asm-generic/topology.h>
#ifdef CONFIG_SMP
diff --git a/arch/powerpc/include/uapi/asm/siginfo.h b/arch/powerpc/include/uapi/asm/siginfo.h
index 9f142451a01f..1d51d9b88221 100644
--- a/arch/powerpc/include/uapi/asm/siginfo.h
+++ b/arch/powerpc/include/uapi/asm/siginfo.h
@@ -15,19 +15,4 @@
#include <asm-generic/siginfo.h>
-/*
- * SIGFPE si_codes
- */
-#ifdef __KERNEL__
-#define FPE_FIXME 0 /* Broken dup of SI_USER */
-#endif /* __KERNEL__ */
-
-/*
- * SIGTRAP si_codes
- */
-#ifdef __KERNEL__
-#define TRAP_FIXME 0 /* Broken dup of SI_USER */
-#endif /* __KERNEL__ */
-
-
#endif /* _ASM_POWERPC_SIGINFO_H */
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index a77528db474c..8817c5a6bcc2 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -562,6 +562,7 @@ int main(void)
OFFSET(VCORE_NAPPING_THREADS, kvmppc_vcore, napping_threads);
OFFSET(VCORE_KVM, kvmppc_vcore, kvm);
OFFSET(VCORE_TB_OFFSET, kvmppc_vcore, tb_offset);
+ OFFSET(VCORE_TB_OFFSET_APPL, kvmppc_vcore, tb_offset_applied);
OFFSET(VCORE_LPCR, kvmppc_vcore, lpcr);
OFFSET(VCORE_PCR, kvmppc_vcore, pcr);
OFFSET(VCORE_DPDES, kvmppc_vcore, dpdes);
diff --git a/arch/powerpc/kernel/cpu_setup_power.S b/arch/powerpc/kernel/cpu_setup_power.S
index 3f30c994e931..458b928dbd84 100644
--- a/arch/powerpc/kernel/cpu_setup_power.S
+++ b/arch/powerpc/kernel/cpu_setup_power.S
@@ -28,6 +28,7 @@ _GLOBAL(__setup_cpu_power7)
beqlr
li r0,0
mtspr SPRN_LPID,r0
+ mtspr SPRN_PCR,r0
mfspr r3,SPRN_LPCR
li r4,(LPCR_LPES1 >> LPCR_LPES_SH)
bl __init_LPCR_ISA206
@@ -41,6 +42,7 @@ _GLOBAL(__restore_cpu_power7)
beqlr
li r0,0
mtspr SPRN_LPID,r0
+ mtspr SPRN_PCR,r0
mfspr r3,SPRN_LPCR
li r4,(LPCR_LPES1 >> LPCR_LPES_SH)
bl __init_LPCR_ISA206
@@ -57,6 +59,7 @@ _GLOBAL(__setup_cpu_power8)
beqlr
li r0,0
mtspr SPRN_LPID,r0
+ mtspr SPRN_PCR,r0
mfspr r3,SPRN_LPCR
ori r3, r3, LPCR_PECEDH
li r4,0 /* LPES = 0 */
@@ -78,6 +81,7 @@ _GLOBAL(__restore_cpu_power8)
beqlr
li r0,0
mtspr SPRN_LPID,r0
+ mtspr SPRN_PCR,r0
mfspr r3,SPRN_LPCR
ori r3, r3, LPCR_PECEDH
li r4,0 /* LPES = 0 */
@@ -99,6 +103,7 @@ _GLOBAL(__setup_cpu_power9)
mtspr SPRN_PSSCR,r0
mtspr SPRN_LPID,r0
mtspr SPRN_PID,r0
+ mtspr SPRN_PCR,r0
mfspr r3,SPRN_LPCR
LOAD_REG_IMMEDIATE(r4, LPCR_PECEDH | LPCR_PECE_HVEE | LPCR_HVICE | LPCR_HEIC)
or r3, r3, r4
@@ -123,6 +128,7 @@ _GLOBAL(__restore_cpu_power9)
mtspr SPRN_PSSCR,r0
mtspr SPRN_LPID,r0
mtspr SPRN_PID,r0
+ mtspr SPRN_PCR,r0
mfspr r3,SPRN_LPCR
LOAD_REG_IMMEDIATE(r4, LPCR_PECEDH | LPCR_PECE_HVEE | LPCR_HVICE | LPCR_HEIC)
or r3, r3, r4
diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c
index da20569de9d4..138157deeadf 100644
--- a/arch/powerpc/kernel/dma.c
+++ b/arch/powerpc/kernel/dma.c
@@ -309,8 +309,6 @@ int dma_set_coherent_mask(struct device *dev, u64 mask)
}
EXPORT_SYMBOL(dma_set_coherent_mask);
-#define PREALLOC_DMA_DEBUG_ENTRIES (1 << 16)
-
int dma_set_mask(struct device *dev, u64 dma_mask)
{
if (ppc_md.dma_set_mask)
@@ -361,7 +359,6 @@ EXPORT_SYMBOL_GPL(dma_get_required_mask);
static int __init dma_init(void)
{
- dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES);
#ifdef CONFIG_PCI
dma_debug_add_bus(&pci_bus_type);
#endif
diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c
index 8ab51f6ca03a..c904477abaf3 100644
--- a/arch/powerpc/kernel/dt_cpu_ftrs.c
+++ b/arch/powerpc/kernel/dt_cpu_ftrs.c
@@ -101,6 +101,7 @@ static void __restore_cpu_cpufeatures(void)
if (hv_mode) {
mtspr(SPRN_LPID, 0);
mtspr(SPRN_HFSCR, system_registers.hfscr);
+ mtspr(SPRN_PCR, 0);
}
mtspr(SPRN_FSCR, system_registers.fscr);
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index bc640e4c5ca5..90bb39b1a23c 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -1775,18 +1775,6 @@ static int proc_eeh_show(struct seq_file *m, void *v)
return 0;
}
-static int proc_eeh_open(struct inode *inode, struct file *file)
-{
- return single_open(file, proc_eeh_show, NULL);
-}
-
-static const struct file_operations proc_eeh_operations = {
- .open = proc_eeh_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
#ifdef CONFIG_DEBUG_FS
static int eeh_enable_dbgfs_set(void *data, u64 val)
{
@@ -1828,7 +1816,7 @@ DEFINE_SIMPLE_ATTRIBUTE(eeh_freeze_dbgfs_ops, eeh_freeze_dbgfs_get,
static int __init eeh_init_proc(void)
{
if (machine_is(pseries) || machine_is(powernv)) {
- proc_create("powerpc/eeh", 0, NULL, &proc_eeh_operations);
+ proc_create_single("powerpc/eeh", 0, NULL, proc_eeh_show);
#ifdef CONFIG_DEBUG_FS
debugfs_create_file("eeh_enable", 0600,
powerpc_debugfs_root, NULL,
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index ae6a849db60b..f283958129f2 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -885,7 +885,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_TM)
#endif
-EXC_REAL_MASKABLE(decrementer, 0x900, 0x80, IRQS_DISABLED)
+EXC_REAL_OOL_MASKABLE(decrementer, 0x900, 0x80, IRQS_DISABLED)
EXC_VIRT_MASKABLE(decrementer, 0x4900, 0x80, 0x900, IRQS_DISABLED)
TRAMP_KVM(PACA_EXGEN, 0x900)
EXC_COMMON_ASYNC(decrementer_common, 0x900, timer_interrupt)
@@ -961,6 +961,7 @@ EXC_COMMON(trap_0b_common, 0xb00, unknown_exception)
mtctr r13; \
GET_PACA(r13); \
std r10,PACA_EXGEN+EX_R10(r13); \
+ INTERRUPT_TO_KERNEL; \
KVMTEST_PR(0xc00); /* uses r10, branch to do_kvm_0xc00_system_call */ \
HMT_MEDIUM; \
mfctr r9;
@@ -969,7 +970,8 @@ EXC_COMMON(trap_0b_common, 0xb00, unknown_exception)
#define SYSCALL_KVMTEST \
HMT_MEDIUM; \
mr r9,r13; \
- GET_PACA(r13);
+ GET_PACA(r13); \
+ INTERRUPT_TO_KERNEL;
#endif
#define LOAD_SYSCALL_HANDLER(reg) \
@@ -1507,6 +1509,19 @@ masked_##_H##interrupt: \
b .; \
MASKED_DEC_HANDLER(_H)
+TRAMP_REAL_BEGIN(stf_barrier_fallback)
+ std r9,PACA_EXRFI+EX_R9(r13)
+ std r10,PACA_EXRFI+EX_R10(r13)
+ sync
+ ld r9,PACA_EXRFI+EX_R9(r13)
+ ld r10,PACA_EXRFI+EX_R10(r13)
+ ori 31,31,0
+ .rept 14
+ b 1f
+1:
+ .endr
+ blr
+
TRAMP_REAL_BEGIN(rfi_flush_fallback)
SET_SCRATCH0(r13);
GET_PACA(r13);
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 1237f13fed51..26ea9793d290 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -632,6 +632,7 @@ void do_break (struct pt_regs *regs, unsigned long address,
hw_breakpoint_disable();
/* Deliver the signal to userspace */
+ clear_siginfo(&info);
info.si_signo = SIGTRAP;
info.si_errno = 0;
info.si_code = TRAP_HWBKPT;
diff --git a/arch/powerpc/kernel/rtas-proc.c b/arch/powerpc/kernel/rtas-proc.c
index fb070d8cad07..d49063d0baa4 100644
--- a/arch/powerpc/kernel/rtas-proc.c
+++ b/arch/powerpc/kernel/rtas-proc.c
@@ -154,18 +154,6 @@ static ssize_t ppc_rtas_tone_volume_write(struct file *file,
static int ppc_rtas_tone_volume_show(struct seq_file *m, void *v);
static int ppc_rtas_rmo_buf_show(struct seq_file *m, void *v);
-static int sensors_open(struct inode *inode, struct file *file)
-{
- return single_open(file, ppc_rtas_sensors_show, NULL);
-}
-
-static const struct file_operations ppc_rtas_sensors_operations = {
- .open = sensors_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
static int poweron_open(struct inode *inode, struct file *file)
{
return single_open(file, ppc_rtas_poweron_show, NULL);
@@ -231,18 +219,6 @@ static const struct file_operations ppc_rtas_tone_volume_operations = {
.release = single_release,
};
-static int rmo_buf_open(struct inode *inode, struct file *file)
-{
- return single_open(file, ppc_rtas_rmo_buf_show, NULL);
-}
-
-static const struct file_operations ppc_rtas_rmo_buf_ops = {
- .open = rmo_buf_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
static int ppc_rtas_find_all_sensors(void);
static void ppc_rtas_process_sensor(struct seq_file *m,
struct individual_sensor *s, int state, int error, const char *loc);
@@ -267,14 +243,14 @@ static int __init proc_rtas_init(void)
&ppc_rtas_clock_operations);
proc_create("powerpc/rtas/poweron", 0644, NULL,
&ppc_rtas_poweron_operations);
- proc_create("powerpc/rtas/sensors", 0444, NULL,
- &ppc_rtas_sensors_operations);
+ proc_create_single("powerpc/rtas/sensors", 0444, NULL,
+ ppc_rtas_sensors_show);
proc_create("powerpc/rtas/frequency", 0644, NULL,
&ppc_rtas_tone_freq_operations);
proc_create("powerpc/rtas/volume", 0644, NULL,
&ppc_rtas_tone_volume_operations);
- proc_create("powerpc/rtas/rmo_buffer", 0400, NULL,
- &ppc_rtas_rmo_buf_ops);
+ proc_create_single("powerpc/rtas/rmo_buffer", 0400, NULL,
+ ppc_rtas_rmo_buf_show);
return 0;
}
diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c
index bab5a27ea805..b98a722da915 100644
--- a/arch/powerpc/kernel/security.c
+++ b/arch/powerpc/kernel/security.c
@@ -8,6 +8,7 @@
#include <linux/device.h>
#include <linux/seq_buf.h>
+#include <asm/debugfs.h>
#include <asm/security_features.h>
@@ -86,3 +87,151 @@ ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, c
return s.len;
}
+
+/*
+ * Store-forwarding barrier support.
+ */
+
+static enum stf_barrier_type stf_enabled_flush_types;
+static bool no_stf_barrier;
+bool stf_barrier;
+
+static int __init handle_no_stf_barrier(char *p)
+{
+ pr_info("stf-barrier: disabled on command line.");
+ no_stf_barrier = true;
+ return 0;
+}
+
+early_param("no_stf_barrier", handle_no_stf_barrier);
+
+/* This is the generic flag used by other architectures */
+static int __init handle_ssbd(char *p)
+{
+ if (!p || strncmp(p, "auto", 5) == 0 || strncmp(p, "on", 2) == 0 ) {
+ /* Until firmware tells us, we have the barrier with auto */
+ return 0;
+ } else if (strncmp(p, "off", 3) == 0) {
+ handle_no_stf_barrier(NULL);
+ return 0;
+ } else
+ return 1;
+
+ return 0;
+}
+early_param("spec_store_bypass_disable", handle_ssbd);
+
+/* This is the generic flag used by other architectures */
+static int __init handle_no_ssbd(char *p)
+{
+ handle_no_stf_barrier(NULL);
+ return 0;
+}
+early_param("nospec_store_bypass_disable", handle_no_ssbd);
+
+static void stf_barrier_enable(bool enable)
+{
+ if (enable)
+ do_stf_barrier_fixups(stf_enabled_flush_types);
+ else
+ do_stf_barrier_fixups(STF_BARRIER_NONE);
+
+ stf_barrier = enable;
+}
+
+void setup_stf_barrier(void)
+{
+ enum stf_barrier_type type;
+ bool enable, hv;
+
+ hv = cpu_has_feature(CPU_FTR_HVMODE);
+
+ /* Default to fallback in case fw-features are not available */
+ if (cpu_has_feature(CPU_FTR_ARCH_300))
+ type = STF_BARRIER_EIEIO;
+ else if (cpu_has_feature(CPU_FTR_ARCH_207S))
+ type = STF_BARRIER_SYNC_ORI;
+ else if (cpu_has_feature(CPU_FTR_ARCH_206))
+ type = STF_BARRIER_FALLBACK;
+ else
+ type = STF_BARRIER_NONE;
+
+ enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) &&
+ (security_ftr_enabled(SEC_FTR_L1D_FLUSH_PR) ||
+ (security_ftr_enabled(SEC_FTR_L1D_FLUSH_HV) && hv));
+
+ if (type == STF_BARRIER_FALLBACK) {
+ pr_info("stf-barrier: fallback barrier available\n");
+ } else if (type == STF_BARRIER_SYNC_ORI) {
+ pr_info("stf-barrier: hwsync barrier available\n");
+ } else if (type == STF_BARRIER_EIEIO) {
+ pr_info("stf-barrier: eieio barrier available\n");
+ }
+
+ stf_enabled_flush_types = type;
+
+ if (!no_stf_barrier)
+ stf_barrier_enable(enable);
+}
+
+ssize_t cpu_show_spec_store_bypass(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ if (stf_barrier && stf_enabled_flush_types != STF_BARRIER_NONE) {
+ const char *type;
+ switch (stf_enabled_flush_types) {
+ case STF_BARRIER_EIEIO:
+ type = "eieio";
+ break;
+ case STF_BARRIER_SYNC_ORI:
+ type = "hwsync";
+ break;
+ case STF_BARRIER_FALLBACK:
+ type = "fallback";
+ break;
+ default:
+ type = "unknown";
+ }
+ return sprintf(buf, "Mitigation: Kernel entry/exit barrier (%s)\n", type);
+ }
+
+ if (!security_ftr_enabled(SEC_FTR_L1D_FLUSH_HV) &&
+ !security_ftr_enabled(SEC_FTR_L1D_FLUSH_PR))
+ return sprintf(buf, "Not affected\n");
+
+ return sprintf(buf, "Vulnerable\n");
+}
+
+#ifdef CONFIG_DEBUG_FS
+static int stf_barrier_set(void *data, u64 val)
+{
+ bool enable;
+
+ if (val == 1)
+ enable = true;
+ else if (val == 0)
+ enable = false;
+ else
+ return -EINVAL;
+
+ /* Only do anything if we're changing state */
+ if (enable != stf_barrier)
+ stf_barrier_enable(enable);
+
+ return 0;
+}
+
+static int stf_barrier_get(void *data, u64 *val)
+{
+ *val = stf_barrier ? 1 : 0;
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_stf_barrier, stf_barrier_get, stf_barrier_set, "%llu\n");
+
+static __init int stf_barrier_debugfs_init(void)
+{
+ debugfs_create_file("stf_barrier", 0600, powerpc_debugfs_root, NULL, &fops_stf_barrier);
+ return 0;
+}
+device_initcall(stf_barrier_debugfs_init);
+#endif /* CONFIG_DEBUG_FS */
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 0904492e7032..0e17dcb48720 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -296,7 +296,6 @@ NOKPROBE_SYMBOL(die);
void user_single_step_siginfo(struct task_struct *tsk,
struct pt_regs *regs, siginfo_t *info)
{
- memset(info, 0, sizeof(*info));
info->si_signo = SIGTRAP;
info->si_code = TRAP_TRACE;
info->si_addr = (void __user *)regs->nip;
@@ -334,7 +333,7 @@ void _exception_pkey(int signr, struct pt_regs *regs, int code,
*/
thread_pkey_regs_save(&current->thread);
- memset(&info, 0, sizeof(info));
+ clear_siginfo(&info);
info.si_signo = signr;
info.si_code = code;
info.si_addr = (void __user *) addr;
@@ -970,7 +969,7 @@ void unknown_exception(struct pt_regs *regs)
printk("Bad trap at PC: %lx, SR: %lx, vector=%lx\n",
regs->nip, regs->msr, regs->trap);
- _exception(SIGTRAP, regs, TRAP_FIXME, 0);
+ _exception(SIGTRAP, regs, TRAP_UNK, 0);
exception_exit(prev_state);
}
@@ -992,7 +991,7 @@ bail:
void RunModeException(struct pt_regs *regs)
{
- _exception(SIGTRAP, regs, TRAP_FIXME, 0);
+ _exception(SIGTRAP, regs, TRAP_UNK, 0);
}
void single_step_exception(struct pt_regs *regs)
@@ -1032,7 +1031,7 @@ static void emulate_single_step(struct pt_regs *regs)
static inline int __parse_fpscr(unsigned long fpscr)
{
- int ret = FPE_FIXME;
+ int ret = FPE_FLTUNK;
/* Invalid operation */
if ((fpscr & FPSCR_VE) && (fpscr & FPSCR_VX))
@@ -1973,7 +1972,7 @@ void SPEFloatingPointException(struct pt_regs *regs)
extern int do_spe_mathemu(struct pt_regs *regs);
unsigned long spefscr;
int fpexc_mode;
- int code = FPE_FIXME;
+ int code = FPE_FLTUNK;
int err;
flush_spe_to_thread(current);
@@ -2042,7 +2041,7 @@ void SPEFloatingPointRoundException(struct pt_regs *regs)
printk(KERN_ERR "unrecognized spe instruction "
"in %s at %lx\n", current->comm, regs->nip);
} else {
- _exception(SIGFPE, regs, FPE_FIXME, regs->nip);
+ _exception(SIGFPE, regs, FPE_FLTUNK, regs->nip);
return;
}
}
diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S
index c8af90ff49f0..b8d82678f8b4 100644
--- a/arch/powerpc/kernel/vmlinux.lds.S
+++ b/arch/powerpc/kernel/vmlinux.lds.S
@@ -134,6 +134,20 @@ SECTIONS
#ifdef CONFIG_PPC64
. = ALIGN(8);
+ __stf_entry_barrier_fixup : AT(ADDR(__stf_entry_barrier_fixup) - LOAD_OFFSET) {
+ __start___stf_entry_barrier_fixup = .;
+ *(__stf_entry_barrier_fixup)
+ __stop___stf_entry_barrier_fixup = .;
+ }
+
+ . = ALIGN(8);
+ __stf_exit_barrier_fixup : AT(ADDR(__stf_exit_barrier_fixup) - LOAD_OFFSET) {
+ __start___stf_exit_barrier_fixup = .;
+ *(__stf_exit_barrier_fixup)
+ __stop___stf_exit_barrier_fixup = .;
+ }
+
+ . = ALIGN(8);
__rfi_flush_fixup : AT(ADDR(__rfi_flush_fixup) - LOAD_OFFSET) {
__start___rfi_flush_fixup = .;
*(__rfi_flush_fixup)
diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c
index a57eafec4dc2..361f42c8c73e 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
@@ -162,7 +162,7 @@ static void kvmppc_radix_tlbie_page(struct kvm *kvm, unsigned long addr,
if (cpu_has_feature(CPU_FTR_P9_TLBIE_BUG))
asm volatile(PPC_TLBIE_5(%0, %1, 0, 0, 1)
: : "r" (addr), "r" (kvm->arch.lpid) : "memory");
- asm volatile("ptesync": : :"memory");
+ asm volatile("eieio ; tlbsync ; ptesync": : :"memory");
}
static void kvmppc_radix_flush_pwc(struct kvm *kvm, unsigned long addr)
@@ -173,7 +173,7 @@ static void kvmppc_radix_flush_pwc(struct kvm *kvm, unsigned long addr)
/* RIC=1 PRS=0 R=1 IS=2 */
asm volatile(PPC_TLBIE_5(%0, %1, 1, 0, 1)
: : "r" (rb), "r" (kvm->arch.lpid) : "memory");
- asm volatile("ptesync": : :"memory");
+ asm volatile("eieio ; tlbsync ; ptesync": : :"memory");
}
unsigned long kvmppc_radix_update_pte(struct kvm *kvm, pte_t *ptep,
@@ -584,7 +584,7 @@ int kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift);
if (ptep && pte_present(*ptep)) {
- old = kvmppc_radix_update_pte(kvm, ptep, _PAGE_PRESENT, 0,
+ old = kvmppc_radix_update_pte(kvm, ptep, ~0UL, 0,
gpa, shift);
kvmppc_radix_tlbie_page(kvm, gpa, shift);
if ((old & _PAGE_DIRTY) && memslot->dirty_bitmap) {
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 4d07fca5121c..9963f65c212b 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -2441,6 +2441,7 @@ static void init_vcore_to_run(struct kvmppc_vcore *vc)
vc->in_guest = 0;
vc->napping_threads = 0;
vc->conferring_threads = 0;
+ vc->tb_offset_applied = 0;
}
static bool can_dynamic_split(struct kvmppc_vcore *vc, struct core_info *cip)
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index bd63fa8a08b5..07ca1b2a7966 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -692,6 +692,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
22: ld r8,VCORE_TB_OFFSET(r5)
cmpdi r8,0
beq 37f
+ std r8, VCORE_TB_OFFSET_APPL(r5)
mftb r6 /* current host timebase */
add r8,r8,r6
mtspr SPRN_TBU40,r8 /* update upper 40 bits */
@@ -940,18 +941,6 @@ FTR_SECTION_ELSE
ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
8:
- /*
- * Set the decrementer to the guest decrementer.
- */
- ld r8,VCPU_DEC_EXPIRES(r4)
- /* r8 is a host timebase value here, convert to guest TB */
- ld r5,HSTATE_KVM_VCORE(r13)
- ld r6,VCORE_TB_OFFSET(r5)
- add r8,r8,r6
- mftb r7
- subf r3,r7,r8
- mtspr SPRN_DEC,r3
-
ld r5, VCPU_SPRG0(r4)
ld r6, VCPU_SPRG1(r4)
ld r7, VCPU_SPRG2(r4)
@@ -1005,6 +994,18 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
mtspr SPRN_LPCR,r8
isync
+ /*
+ * Set the decrementer to the guest decrementer.
+ */
+ ld r8,VCPU_DEC_EXPIRES(r4)
+ /* r8 is a host timebase value here, convert to guest TB */
+ ld r5,HSTATE_KVM_VCORE(r13)
+ ld r6,VCORE_TB_OFFSET_APPL(r5)
+ add r8,r8,r6
+ mftb r7
+ subf r3,r7,r8
+ mtspr SPRN_DEC,r3
+
/* Check if HDEC expires soon */
mfspr r3, SPRN_HDEC
EXTEND_HDEC(r3)
@@ -1597,8 +1598,27 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX)
guest_bypass:
stw r12, STACK_SLOT_TRAP(r1)
- mr r3, r12
+
+ /* Save DEC */
+ /* Do this before kvmhv_commence_exit so we know TB is guest TB */
+ ld r3, HSTATE_KVM_VCORE(r13)
+ mfspr r5,SPRN_DEC
+ mftb r6
+ /* On P9, if the guest has large decr enabled, don't sign extend */
+BEGIN_FTR_SECTION
+ ld r4, VCORE_LPCR(r3)
+ andis. r4, r4, LPCR_LD@h
+ bne 16f
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
+ extsw r5,r5
+16: add r5,r5,r6
+ /* r5 is a guest timebase value here, convert to host TB */
+ ld r4,VCORE_TB_OFFSET_APPL(r3)
+ subf r5,r4,r5
+ std r5,VCPU_DEC_EXPIRES(r9)
+
/* Increment exit count, poke other threads to exit */
+ mr r3, r12
bl kvmhv_commence_exit
nop
ld r9, HSTATE_KVM_VCPU(r13)
@@ -1639,23 +1659,6 @@ guest_bypass:
mtspr SPRN_PURR,r3
mtspr SPRN_SPURR,r4
- /* Save DEC */
- ld r3, HSTATE_KVM_VCORE(r13)
- mfspr r5,SPRN_DEC
- mftb r6
- /* On P9, if the guest has large decr enabled, don't sign extend */
-BEGIN_FTR_SECTION
- ld r4, VCORE_LPCR(r3)
- andis. r4, r4, LPCR_LD@h
- bne 16f
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
- extsw r5,r5
-16: add r5,r5,r6
- /* r5 is a guest timebase value here, convert to host TB */
- ld r4,VCORE_TB_OFFSET(r3)
- subf r5,r4,r5
- std r5,VCPU_DEC_EXPIRES(r9)
-
BEGIN_FTR_SECTION
b 8f
END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
@@ -1905,6 +1908,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
cmpwi cr2, r0, 0
beq cr2, 4f
+ /*
+ * Radix: do eieio; tlbsync; ptesync sequence in case we
+ * interrupted the guest between a tlbie and a ptesync.
+ */
+ eieio
+ tlbsync
+ ptesync
+
/* Radix: Handle the case where the guest used an illegal PID */
LOAD_REG_ADDR(r4, mmu_base_pid)
lwz r3, VCPU_GUEST_PID(r9)
@@ -2017,9 +2028,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
27:
/* Subtract timebase offset from timebase */
- ld r8,VCORE_TB_OFFSET(r5)
+ ld r8, VCORE_TB_OFFSET_APPL(r5)
cmpdi r8,0
beq 17f
+ li r0, 0
+ std r0, VCORE_TB_OFFSET_APPL(r5)
mftb r6 /* current guest timebase */
subf r8,r8,r6
mtspr SPRN_TBU40,r8 /* update upper 40 bits */
@@ -2700,7 +2713,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
add r3, r3, r5
ld r4, HSTATE_KVM_VCPU(r13)
ld r5, HSTATE_KVM_VCORE(r13)
- ld r6, VCORE_TB_OFFSET(r5)
+ ld r6, VCORE_TB_OFFSET_APPL(r5)
subf r3, r6, r3 /* convert to host TB value */
std r3, VCPU_DEC_EXPIRES(r4)
@@ -2799,7 +2812,7 @@ END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0)
/* Restore guest decrementer */
ld r3, VCPU_DEC_EXPIRES(r4)
ld r5, HSTATE_KVM_VCORE(r13)
- ld r6, VCORE_TB_OFFSET(r5)
+ ld r6, VCORE_TB_OFFSET_APPL(r5)
add r3, r3, r6 /* convert host TB to guest TB value */
mftb r7
subf r3, r7, r3
@@ -3606,12 +3619,9 @@ kvmppc_fix_pmao:
*/
kvmhv_start_timing:
ld r5, HSTATE_KVM_VCORE(r13)
- lbz r6, VCORE_IN_GUEST(r5)
- cmpwi r6, 0
- beq 5f /* if in guest, need to */
- ld r6, VCORE_TB_OFFSET(r5) /* subtract timebase offset */
-5: mftb r5
- subf r5, r6, r5
+ ld r6, VCORE_TB_OFFSET_APPL(r5)
+ mftb r5
+ subf r5, r6, r5 /* subtract current timebase offset */
std r3, VCPU_CUR_ACTIVITY(r4)
std r5, VCPU_ACTIVITY_START(r4)
blr
@@ -3622,15 +3632,12 @@ kvmhv_start_timing:
*/
kvmhv_accumulate_time:
ld r5, HSTATE_KVM_VCORE(r13)
- lbz r8, VCORE_IN_GUEST(r5)
- cmpwi r8, 0
- beq 4f /* if in guest, need to */
- ld r8, VCORE_TB_OFFSET(r5) /* subtract timebase offset */
-4: ld r5, VCPU_CUR_ACTIVITY(r4)
+ ld r8, VCORE_TB_OFFSET_APPL(r5)
+ ld r5, VCPU_CUR_ACTIVITY(r4)
ld r6, VCPU_ACTIVITY_START(r4)
std r3, VCPU_CUR_ACTIVITY(r4)
mftb r7
- subf r7, r8, r7
+ subf r7, r8, r7 /* subtract current timebase offset */
std r7, VCPU_ACTIVITY_START(r4)
cmpdi r5, 0
beqlr
diff --git a/arch/powerpc/kvm/book3s_xive_template.c b/arch/powerpc/kvm/book3s_xive_template.c
index c7a5deadd1cc..99c3620b40d9 100644
--- a/arch/powerpc/kvm/book3s_xive_template.c
+++ b/arch/powerpc/kvm/book3s_xive_template.c
@@ -11,6 +11,9 @@
#define XGLUE(a,b) a##b
#define GLUE(a,b) XGLUE(a,b)
+/* Dummy interrupt used when taking interrupts out of a queue in H_CPPR */
+#define XICS_DUMMY 1
+
static void GLUE(X_PFX,ack_pending)(struct kvmppc_xive_vcpu *xc)
{
u8 cppr;
@@ -205,6 +208,10 @@ skip_ipi:
goto skip_ipi;
}
+ /* If it's the dummy interrupt, continue searching */
+ if (hirq == XICS_DUMMY)
+ goto skip_ipi;
+
/* If fetching, update queue pointers */
if (scan_type == scan_fetch) {
q->idx = idx;
@@ -385,9 +392,76 @@ static void GLUE(X_PFX,push_pending_to_hw)(struct kvmppc_xive_vcpu *xc)
__x_writeb(prio, __x_tima + TM_SPC_SET_OS_PENDING);
}
+static void GLUE(X_PFX,scan_for_rerouted_irqs)(struct kvmppc_xive *xive,
+ struct kvmppc_xive_vcpu *xc)
+{
+ unsigned int prio;
+
+ /* For each priority that is now masked */
+ for (prio = xc->cppr; prio < KVMPPC_XIVE_Q_COUNT; prio++) {
+ struct xive_q *q = &xc->queues[prio];
+ struct kvmppc_xive_irq_state *state;
+ struct kvmppc_xive_src_block *sb;
+ u32 idx, toggle, entry, irq, hw_num;
+ struct xive_irq_data *xd;
+ __be32 *qpage;
+ u16 src;
+
+ idx = q->idx;
+ toggle = q->toggle;
+ qpage = READ_ONCE(q->qpage);
+ if (!qpage)
+ continue;
+
+ /* For each interrupt in the queue */
+ for (;;) {
+ entry = be32_to_cpup(qpage + idx);
+
+ /* No more ? */
+ if ((entry >> 31) == toggle)
+ break;
+ irq = entry & 0x7fffffff;
+
+ /* Skip dummies and IPIs */
+ if (irq == XICS_DUMMY || irq == XICS_IPI)
+ goto next;
+ sb = kvmppc_xive_find_source(xive, irq, &src);
+ if (!sb)
+ goto next;
+ state = &sb->irq_state[src];
+
+ /* Has it been rerouted ? */
+ if (xc->server_num == state->act_server)
+ goto next;
+
+ /*
+ * Allright, it *has* been re-routed, kill it from
+ * the queue.
+ */
+ qpage[idx] = cpu_to_be32((entry & 0x80000000) | XICS_DUMMY);
+
+ /* Find the HW interrupt */
+ kvmppc_xive_select_irq(state, &hw_num, &xd);
+
+ /* If it's not an LSI, set PQ to 11 the EOI will force a resend */
+ if (!(xd->flags & XIVE_IRQ_FLAG_LSI))
+ GLUE(X_PFX,esb_load)(xd, XIVE_ESB_SET_PQ_11);
+
+ /* EOI the source */
+ GLUE(X_PFX,source_eoi)(hw_num, xd);
+
+ next:
+ idx = (idx + 1) & q->msk;
+ if (idx == 0)
+ toggle ^= 1;
+ }
+ }
+}
+
X_STATIC int GLUE(X_PFX,h_cppr)(struct kvm_vcpu *vcpu, unsigned long cppr)
{
struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+ struct kvmppc_xive *xive = vcpu->kvm->arch.xive;
u8 old_cppr;
pr_devel("H_CPPR(cppr=%ld)\n", cppr);
@@ -407,14 +481,34 @@ X_STATIC int GLUE(X_PFX,h_cppr)(struct kvm_vcpu *vcpu, unsigned long cppr)
*/
smp_mb();
- /*
- * We are masking less, we need to look for pending things
- * to deliver and set VP pending bits accordingly to trigger
- * a new interrupt otherwise we might miss MFRR changes for
- * which we have optimized out sending an IPI signal.
- */
- if (cppr > old_cppr)
+ if (cppr > old_cppr) {
+ /*
+ * We are masking less, we need to look for pending things
+ * to deliver and set VP pending bits accordingly to trigger
+ * a new interrupt otherwise we might miss MFRR changes for
+ * which we have optimized out sending an IPI signal.
+ */
GLUE(X_PFX,push_pending_to_hw)(xc);
+ } else {
+ /*
+ * We are masking more, we need to check the queue for any
+ * interrupt that has been routed to another CPU, take
+ * it out (replace it with the dummy) and retrigger it.
+ *
+ * This is necessary since those interrupts may otherwise
+ * never be processed, at least not until this CPU restores
+ * its CPPR.
+ *
+ * This is in theory racy vs. HW adding new interrupts to
+ * the queue. In practice this works because the interesting
+ * cases are when the guest has done a set_xive() to move the
+ * interrupt away, which flushes the xive, followed by the
+ * target CPU doing a H_CPPR. So any new interrupt coming into
+ * the queue must still be routed to us and isn't a source
+ * of concern.
+ */
+ GLUE(X_PFX,scan_for_rerouted_irqs)(xive, xc);
+ }
/* Apply new CPPR */
xc->hw_cppr = cppr;
diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c
index 288fe4f0db4e..e1bcdc32a851 100644
--- a/arch/powerpc/lib/feature-fixups.c
+++ b/arch/powerpc/lib/feature-fixups.c
@@ -23,6 +23,7 @@
#include <asm/page.h>
#include <asm/sections.h>
#include <asm/setup.h>
+#include <asm/security_features.h>
#include <asm/firmware.h>
struct fixup_entry {
@@ -117,6 +118,120 @@ void do_feature_fixups(unsigned long value, void *fixup_start, void *fixup_end)
}
#ifdef CONFIG_PPC_BOOK3S_64
+void do_stf_entry_barrier_fixups(enum stf_barrier_type types)
+{
+ unsigned int instrs[3], *dest;
+ long *start, *end;
+ int i;
+
+ start = PTRRELOC(&__start___stf_entry_barrier_fixup),
+ end = PTRRELOC(&__stop___stf_entry_barrier_fixup);
+
+ instrs[0] = 0x60000000; /* nop */
+ instrs[1] = 0x60000000; /* nop */
+ instrs[2] = 0x60000000; /* nop */
+
+ i = 0;
+ if (types & STF_BARRIER_FALLBACK) {
+ instrs[i++] = 0x7d4802a6; /* mflr r10 */
+ instrs[i++] = 0x60000000; /* branch patched below */
+ instrs[i++] = 0x7d4803a6; /* mtlr r10 */
+ } else if (types & STF_BARRIER_EIEIO) {
+ instrs[i++] = 0x7e0006ac; /* eieio + bit 6 hint */
+ } else if (types & STF_BARRIER_SYNC_ORI) {
+ instrs[i++] = 0x7c0004ac; /* hwsync */
+ instrs[i++] = 0xe94d0000; /* ld r10,0(r13) */
+ instrs[i++] = 0x63ff0000; /* ori 31,31,0 speculation barrier */
+ }
+
+ for (i = 0; start < end; start++, i++) {
+ dest = (void *)start + *start;
+
+ pr_devel("patching dest %lx\n", (unsigned long)dest);
+
+ patch_instruction(dest, instrs[0]);
+
+ if (types & STF_BARRIER_FALLBACK)
+ patch_branch(dest + 1, (unsigned long)&stf_barrier_fallback,
+ BRANCH_SET_LINK);
+ else
+ patch_instruction(dest + 1, instrs[1]);
+
+ patch_instruction(dest + 2, instrs[2]);
+ }
+
+ printk(KERN_DEBUG "stf-barrier: patched %d entry locations (%s barrier)\n", i,
+ (types == STF_BARRIER_NONE) ? "no" :
+ (types == STF_BARRIER_FALLBACK) ? "fallback" :
+ (types == STF_BARRIER_EIEIO) ? "eieio" :
+ (types == (STF_BARRIER_SYNC_ORI)) ? "hwsync"
+ : "unknown");
+}
+
+void do_stf_exit_barrier_fixups(enum stf_barrier_type types)
+{
+ unsigned int instrs[6], *dest;
+ long *start, *end;
+ int i;
+
+ start = PTRRELOC(&__start___stf_exit_barrier_fixup),
+ end = PTRRELOC(&__stop___stf_exit_barrier_fixup);
+
+ instrs[0] = 0x60000000; /* nop */
+ instrs[1] = 0x60000000; /* nop */
+ instrs[2] = 0x60000000; /* nop */
+ instrs[3] = 0x60000000; /* nop */
+ instrs[4] = 0x60000000; /* nop */
+ instrs[5] = 0x60000000; /* nop */
+
+ i = 0;
+ if (types & STF_BARRIER_FALLBACK || types & STF_BARRIER_SYNC_ORI) {
+ if (cpu_has_feature(CPU_FTR_HVMODE)) {
+ instrs[i++] = 0x7db14ba6; /* mtspr 0x131, r13 (HSPRG1) */
+ instrs[i++] = 0x7db04aa6; /* mfspr r13, 0x130 (HSPRG0) */
+ } else {
+ instrs[i++] = 0x7db243a6; /* mtsprg 2,r13 */
+ instrs[i++] = 0x7db142a6; /* mfsprg r13,1 */
+ }
+ instrs[i++] = 0x7c0004ac; /* hwsync */
+ instrs[i++] = 0xe9ad0000; /* ld r13,0(r13) */
+ instrs[i++] = 0x63ff0000; /* ori 31,31,0 speculation barrier */
+ if (cpu_has_feature(CPU_FTR_HVMODE)) {
+ instrs[i++] = 0x7db14aa6; /* mfspr r13, 0x131 (HSPRG1) */
+ } else {
+ instrs[i++] = 0x7db242a6; /* mfsprg r13,2 */
+ }
+ } else if (types & STF_BARRIER_EIEIO) {
+ instrs[i++] = 0x7e0006ac; /* eieio + bit 6 hint */
+ }
+
+ for (i = 0; start < end; start++, i++) {
+ dest = (void *)start + *start;
+
+ pr_devel("patching dest %lx\n", (unsigned long)dest);
+
+ patch_instruction(dest, instrs[0]);
+ patch_instruction(dest + 1, instrs[1]);
+ patch_instruction(dest + 2, instrs[2]);
+ patch_instruction(dest + 3, instrs[3]);
+ patch_instruction(dest + 4, instrs[4]);
+ patch_instruction(dest + 5, instrs[5]);
+ }
+ printk(KERN_DEBUG "stf-barrier: patched %d exit locations (%s barrier)\n", i,
+ (types == STF_BARRIER_NONE) ? "no" :
+ (types == STF_BARRIER_FALLBACK) ? "fallback" :
+ (types == STF_BARRIER_EIEIO) ? "eieio" :
+ (types == (STF_BARRIER_SYNC_ORI)) ? "hwsync"
+ : "unknown");
+}
+
+
+void do_stf_barrier_fixups(enum stf_barrier_type types)
+{
+ do_stf_entry_barrier_fixups(types);
+ do_stf_exit_barrier_fixups(types);
+}
+
void do_rfi_flush_fixups(enum l1d_flush_type types)
{
unsigned int instrs[3], *dest;
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index c01d627e687a..ef268d5d9db7 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -168,6 +168,7 @@ static int do_sigbus(struct pt_regs *regs, unsigned long address,
return SIGBUS;
current->thread.trap_nr = BUS_ADRERR;
+ clear_siginfo(&info);
info.si_signo = SIGBUS;
info.si_errno = 0;
info.si_code = BUS_ADRERR;
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index 67d3125d0610..84b58abc08ee 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -222,6 +222,7 @@ config PTE_64BIT
config PHYS_64BIT
bool 'Large physical address support' if E500 || PPC_86xx
depends on (44x || E500 || PPC_86xx) && !PPC_83xx && !PPC_82xx
+ select PHYS_ADDR_T_64BIT
---help---
This option enables kernel support for larger than 32-bit physical
addresses. This feature may not be available on all cores.
diff --git a/arch/powerpc/platforms/cell/spufs/fault.c b/arch/powerpc/platforms/cell/spufs/fault.c
index 870c0a82d560..1e002e94d0f6 100644
--- a/arch/powerpc/platforms/cell/spufs/fault.c
+++ b/arch/powerpc/platforms/cell/spufs/fault.c
@@ -44,7 +44,7 @@ static void spufs_handle_event(struct spu_context *ctx,
return;
}
- memset(&info, 0, sizeof(info));
+ clear_siginfo(&info);
switch (type) {
case SPE_EVENT_INVALID_DMA:
diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c
index ccc421503363..c9ef3c532169 100644
--- a/arch/powerpc/platforms/cell/spufs/sched.c
+++ b/arch/powerpc/platforms/cell/spufs/sched.c
@@ -1095,18 +1095,6 @@ static int show_spu_loadavg(struct seq_file *s, void *private)
atomic_read(&nr_spu_contexts),
idr_get_cursor(&task_active_pid_ns(current)->idr) - 1);
return 0;
-}
-
-static int spu_loadavg_open(struct inode *inode, struct file *file)
-{
- return single_open(file, show_spu_loadavg, NULL);
-}
-
-static const struct file_operations spu_loadavg_fops = {
- .open = spu_loadavg_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
};
int __init spu_sched_init(void)
@@ -1135,7 +1123,7 @@ int __init spu_sched_init(void)
mod_timer(&spuloadavg_timer, 0);
- entry = proc_create("spu_loadavg", 0, NULL, &spu_loadavg_fops);
+ entry = proc_create_single("spu_loadavg", 0, NULL, show_spu_loadavg);
if (!entry)
goto out_stop_kthread;
diff --git a/arch/powerpc/platforms/powernv/opal-nvram.c b/arch/powerpc/platforms/powernv/opal-nvram.c
index 1bceb95f422d..5584247f5029 100644
--- a/arch/powerpc/platforms/powernv/opal-nvram.c
+++ b/arch/powerpc/platforms/powernv/opal-nvram.c
@@ -44,6 +44,10 @@ static ssize_t opal_nvram_read(char *buf, size_t count, loff_t *index)
return count;
}
+/*
+ * This can be called in the panic path with interrupts off, so use
+ * mdelay in that case.
+ */
static ssize_t opal_nvram_write(char *buf, size_t count, loff_t *index)
{
s64 rc = OPAL_BUSY;
@@ -58,10 +62,16 @@ static ssize_t opal_nvram_write(char *buf, size_t count, loff_t *index)
while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) {
rc = opal_write_nvram(__pa(buf), count, off);
if (rc == OPAL_BUSY_EVENT) {
- msleep(OPAL_BUSY_DELAY_MS);
+ if (in_interrupt() || irqs_disabled())
+ mdelay(OPAL_BUSY_DELAY_MS);
+ else
+ msleep(OPAL_BUSY_DELAY_MS);
opal_poll_events(NULL);
} else if (rc == OPAL_BUSY) {
- msleep(OPAL_BUSY_DELAY_MS);
+ if (in_interrupt() || irqs_disabled())
+ mdelay(OPAL_BUSY_DELAY_MS);
+ else
+ msleep(OPAL_BUSY_DELAY_MS);
}
}
diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c
index ef8c9ce53a61..a6648ec99ca7 100644
--- a/arch/powerpc/platforms/powernv/setup.c
+++ b/arch/powerpc/platforms/powernv/setup.c
@@ -131,6 +131,7 @@ static void __init pnv_setup_arch(void)
set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT);
pnv_setup_rfi_flush();
+ setup_stf_barrier();
/* Initialize SMP */
pnv_smp_init();
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index b55ad4286dc7..fdb32e056ef4 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -710,6 +710,7 @@ static void __init pSeries_setup_arch(void)
fwnmi_init();
pseries_setup_rfi_flush();
+ setup_stf_barrier();
/* By default, only probe PCI (can be overridden by rtas_pci) */
pci_add_flags(PCI_PROBE_ONLY);
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index cd4fd85fde84..274bc064c41f 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -3,8 +3,16 @@
# see Documentation/kbuild/kconfig-language.txt.
#
+config 64BIT
+ bool
+
+config 32BIT
+ bool
+
config RISCV
def_bool y
+ # even on 32-bit, physical (and DMA) addresses are > 32-bits
+ select PHYS_ADDR_T_64BIT
select OF
select OF_EARLY_FLATTREE
select OF_IRQ
@@ -22,7 +30,6 @@ config RISCV
select GENERIC_ATOMIC64 if !64BIT || !RISCV_ISA_A
select HAVE_MEMBLOCK
select HAVE_MEMBLOCK_NODE_MAP
- select HAVE_DMA_API_DEBUG
select HAVE_DMA_CONTIGUOUS
select HAVE_GENERIC_DMA_COHERENT
select IRQ_DOMAIN
@@ -39,16 +46,9 @@ config RISCV
config MMU
def_bool y
-# even on 32-bit, physical (and DMA) addresses are > 32-bits
-config ARCH_PHYS_ADDR_T_64BIT
- def_bool y
-
config ZONE_DMA32
bool
- default y
-
-config ARCH_DMA_ADDR_T_64BIT
- def_bool y
+ default y if 64BIT
config PAGE_OFFSET
hex
@@ -101,7 +101,6 @@ choice
config ARCH_RV32I
bool "RV32I"
- select CPU_SUPPORTS_32BIT_KERNEL
select 32BIT
select GENERIC_ASHLDI3
select GENERIC_ASHRDI3
@@ -109,13 +108,13 @@ config ARCH_RV32I
config ARCH_RV64I
bool "RV64I"
- select CPU_SUPPORTS_64BIT_KERNEL
select 64BIT
select HAVE_FUNCTION_TRACER
select HAVE_FUNCTION_GRAPH_TRACER
select HAVE_FTRACE_MCOUNT_RECORD
select HAVE_DYNAMIC_FTRACE
select HAVE_DYNAMIC_FTRACE_WITH_REGS
+ select SWIOTLB
endchoice
@@ -171,11 +170,6 @@ config NR_CPUS
depends on SMP
default "8"
-config CPU_SUPPORTS_32BIT_KERNEL
- bool
-config CPU_SUPPORTS_64BIT_KERNEL
- bool
-
choice
prompt "CPU Tuning"
default TUNE_GENERIC
@@ -202,24 +196,6 @@ endmenu
menu "Kernel type"
-choice
- prompt "Kernel code model"
- default 64BIT
-
-config 32BIT
- bool "32-bit kernel"
- depends on CPU_SUPPORTS_32BIT_KERNEL
- help
- Select this option to build a 32-bit kernel.
-
-config 64BIT
- bool "64-bit kernel"
- depends on CPU_SUPPORTS_64BIT_KERNEL
- help
- Select this option to build a 64-bit kernel.
-
-endchoice
-
source "mm/Kconfig"
source "kernel/Kconfig.preempt"
diff --git a/arch/riscv/include/asm/dma-mapping.h b/arch/riscv/include/asm/dma-mapping.h
new file mode 100644
index 000000000000..8facc1c8fa05
--- /dev/null
+++ b/arch/riscv/include/asm/dma-mapping.h
@@ -0,0 +1,15 @@
+// SPDX-License-Identifier: GPL-2.0
+#ifndef _RISCV_ASM_DMA_MAPPING_H
+#define _RISCV_ASM_DMA_MAPPING_H 1
+
+#ifdef CONFIG_SWIOTLB
+#include <linux/swiotlb.h>
+static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus)
+{
+ return &swiotlb_dma_ops;
+}
+#else
+#include <asm-generic/dma-mapping.h>
+#endif /* CONFIG_SWIOTLB */
+
+#endif /* _RISCV_ASM_DMA_MAPPING_H */
diff --git a/arch/riscv/include/asm/pci.h b/arch/riscv/include/asm/pci.h
index 0f2fc9ef20fc..b3638c505728 100644
--- a/arch/riscv/include/asm/pci.h
+++ b/arch/riscv/include/asm/pci.h
@@ -26,9 +26,6 @@
/* RISC-V shim does not initialize PCI bus */
#define pcibios_assign_all_busses() 1
-/* We do not have an IOMMU */
-#define PCI_DMA_BUS_IS_PHYS 1
-
extern int isa_dma_bridge_buggy;
#ifdef CONFIG_PCI
diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
index c11f40c1b2a8..ee44a48faf79 100644
--- a/arch/riscv/kernel/setup.c
+++ b/arch/riscv/kernel/setup.c
@@ -29,6 +29,7 @@
#include <linux/of_fdt.h>
#include <linux/of_platform.h>
#include <linux/sched/task.h>
+#include <linux/swiotlb.h>
#include <asm/setup.h>
#include <asm/sections.h>
@@ -206,6 +207,7 @@ void __init setup_arch(char **cmdline_p)
setup_bootmem();
paging_init();
unflatten_device_tree();
+ swiotlb_init(1);
#ifdef CONFIG_SMP
setup_smp();
diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c
index 93132cb59184..b99d9dd21fd0 100644
--- a/arch/riscv/kernel/traps.c
+++ b/arch/riscv/kernel/traps.c
@@ -63,18 +63,6 @@ void die(struct pt_regs *regs, const char *str)
do_exit(SIGSEGV);
}
-static inline void do_trap_siginfo(int signo, int code,
- unsigned long addr, struct task_struct *tsk)
-{
- siginfo_t info;
-
- info.si_signo = signo;
- info.si_errno = 0;
- info.si_code = code;
- info.si_addr = (void __user *)addr;
- force_sig_info(signo, &info, tsk);
-}
-
void do_trap(struct pt_regs *regs, int signo, int code,
unsigned long addr, struct task_struct *tsk)
{
@@ -87,7 +75,7 @@ void do_trap(struct pt_regs *regs, int signo, int code,
show_regs(regs);
}
- do_trap_siginfo(signo, code, addr, tsk);
+ force_sig_fault(signo, code, (void __user *)addr, tsk);
}
static void do_trap_error(struct pt_regs *regs, int signo, int code,
@@ -149,7 +137,7 @@ asmlinkage void do_trap_break(struct pt_regs *regs)
}
#endif /* CONFIG_GENERIC_BUG */
- do_trap_siginfo(SIGTRAP, TRAP_BRKPT, regs->sepc, current);
+ force_sig_fault(SIGTRAP, TRAP_BRKPT, (void __user *)(regs->sepc), current);
regs->sepc += 0x4;
}
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 199ac3e4da1d..6a64287ec1da 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -35,9 +35,6 @@ config GENERIC_BUG
config GENERIC_BUG_RELATIVE_POINTERS
def_bool y
-config ARCH_DMA_ADDR_T_64BIT
- def_bool y
-
config GENERIC_LOCKBREAK
def_bool y if SMP && PREEMPT
@@ -133,7 +130,6 @@ config S390
select HAVE_CMPXCHG_LOCAL
select HAVE_COPY_THREAD_TLS
select HAVE_DEBUG_KMEMLEAK
- select HAVE_DMA_API_DEBUG
select HAVE_DMA_CONTIGUOUS
select DMA_DIRECT_OPS
select HAVE_DYNAMIC_FTRACE
@@ -709,7 +705,11 @@ config QDIO
menuconfig PCI
bool "PCI support"
select PCI_MSI
+ select IOMMU_HELPER
select IOMMU_SUPPORT
+ select NEED_DMA_MAP_STATE
+ select NEED_SG_DMA_LENGTH
+
help
Enable PCI support.
@@ -733,15 +733,6 @@ config PCI_DOMAINS
config HAS_IOMEM
def_bool PCI
-config IOMMU_HELPER
- def_bool PCI
-
-config NEED_SG_DMA_LENGTH
- def_bool PCI
-
-config NEED_DMA_MAP_STATE
- def_bool PCI
-
config CHSC_SCH
def_tristate m
prompt "Support for CHSC subchannels"
diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig
index 6176fe9795ca..941d8cc6c9f5 100644
--- a/arch/s390/configs/debug_defconfig
+++ b/arch/s390/configs/debug_defconfig
@@ -261,9 +261,9 @@ CONFIG_IP_VS_NQ=m
CONFIG_IP_VS_FTP=m
CONFIG_IP_VS_PE_SIP=m
CONFIG_NF_CONNTRACK_IPV4=m
-CONFIG_NF_TABLES_IPV4=m
+CONFIG_NF_TABLES_IPV4=y
CONFIG_NFT_CHAIN_ROUTE_IPV4=m
-CONFIG_NF_TABLES_ARP=m
+CONFIG_NF_TABLES_ARP=y
CONFIG_NFT_CHAIN_NAT_IPV4=m
CONFIG_IP_NF_IPTABLES=m
CONFIG_IP_NF_MATCH_AH=m
@@ -284,7 +284,7 @@ CONFIG_IP_NF_ARPTABLES=m
CONFIG_IP_NF_ARPFILTER=m
CONFIG_IP_NF_ARP_MANGLE=m
CONFIG_NF_CONNTRACK_IPV6=m
-CONFIG_NF_TABLES_IPV6=m
+CONFIG_NF_TABLES_IPV6=y
CONFIG_NFT_CHAIN_ROUTE_IPV6=m
CONFIG_NFT_CHAIN_NAT_IPV6=m
CONFIG_IP6_NF_IPTABLES=m
@@ -305,7 +305,7 @@ CONFIG_IP6_NF_RAW=m
CONFIG_IP6_NF_SECURITY=m
CONFIG_IP6_NF_NAT=m
CONFIG_IP6_NF_TARGET_MASQUERADE=m
-CONFIG_NF_TABLES_BRIDGE=m
+CONFIG_NF_TABLES_BRIDGE=y
CONFIG_RDS=m
CONFIG_RDS_RDMA=m
CONFIG_RDS_TCP=m
@@ -604,7 +604,6 @@ CONFIG_DETECT_HUNG_TASK=y
CONFIG_WQ_WATCHDOG=y
CONFIG_PANIC_ON_OOPS=y
CONFIG_DEBUG_TIMEKEEPING=y
-CONFIG_DEBUG_WW_MUTEX_SLOWPATH=y
CONFIG_PROVE_LOCKING=y
CONFIG_LOCK_STAT=y
CONFIG_DEBUG_LOCKDEP=y
diff --git a/arch/s390/configs/performance_defconfig b/arch/s390/configs/performance_defconfig
index c105bcc6d7a6..eb6f75f24208 100644
--- a/arch/s390/configs/performance_defconfig
+++ b/arch/s390/configs/performance_defconfig
@@ -259,9 +259,9 @@ CONFIG_IP_VS_NQ=m
CONFIG_IP_VS_FTP=m
CONFIG_IP_VS_PE_SIP=m
CONFIG_NF_CONNTRACK_IPV4=m
-CONFIG_NF_TABLES_IPV4=m
+CONFIG_NF_TABLES_IPV4=y
CONFIG_NFT_CHAIN_ROUTE_IPV4=m
-CONFIG_NF_TABLES_ARP=m
+CONFIG_NF_TABLES_ARP=y
CONFIG_NFT_CHAIN_NAT_IPV4=m
CONFIG_IP_NF_IPTABLES=m
CONFIG_IP_NF_MATCH_AH=m
@@ -282,7 +282,7 @@ CONFIG_IP_NF_ARPTABLES=m
CONFIG_IP_NF_ARPFILTER=m
CONFIG_IP_NF_ARP_MANGLE=m
CONFIG_NF_CONNTRACK_IPV6=m
-CONFIG_NF_TABLES_IPV6=m
+CONFIG_NF_TABLES_IPV6=y
CONFIG_NFT_CHAIN_ROUTE_IPV6=m
CONFIG_NFT_CHAIN_NAT_IPV6=m
CONFIG_IP6_NF_IPTABLES=m
@@ -303,7 +303,7 @@ CONFIG_IP6_NF_RAW=m
CONFIG_IP6_NF_SECURITY=m
CONFIG_IP6_NF_NAT=m
CONFIG_IP6_NF_TARGET_MASQUERADE=m
-CONFIG_NF_TABLES_BRIDGE=m
+CONFIG_NF_TABLES_BRIDGE=y
CONFIG_RDS=m
CONFIG_RDS_RDMA=m
CONFIG_RDS_TCP=m
diff --git a/arch/s390/crypto/crc32be-vx.S b/arch/s390/crypto/crc32be-vx.S
index e8077f0971f8..2bf01ba44107 100644
--- a/arch/s390/crypto/crc32be-vx.S
+++ b/arch/s390/crypto/crc32be-vx.S
@@ -13,6 +13,7 @@
*/
#include <linux/linkage.h>
+#include <asm/nospec-insn.h>
#include <asm/vx-insn.h>
/* Vector register range containing CRC-32 constants */
@@ -67,6 +68,8 @@
.previous
+ GEN_BR_THUNK %r14
+
.text
/*
* The CRC-32 function(s) use these calling conventions:
@@ -203,6 +206,6 @@ ENTRY(crc32_be_vgfm_16)
.Ldone:
VLGVF %r2,%v2,3
- br %r14
+ BR_EX %r14
.previous
diff --git a/arch/s390/crypto/crc32le-vx.S b/arch/s390/crypto/crc32le-vx.S
index d8c67a58c0c5..7d6f568bd3ad 100644
--- a/arch/s390/crypto/crc32le-vx.S
+++ b/arch/s390/crypto/crc32le-vx.S
@@ -14,6 +14,7 @@
*/
#include <linux/linkage.h>
+#include <asm/nospec-insn.h>
#include <asm/vx-insn.h>
/* Vector register range containing CRC-32 constants */
@@ -76,6 +77,7 @@
.previous
+ GEN_BR_THUNK %r14
.text
@@ -264,6 +266,6 @@ crc32_le_vgfm_generic:
.Ldone:
VLGVF %r2,%v2,2
- br %r14
+ BR_EX %r14
.previous
diff --git a/arch/s390/include/asm/hardirq.h b/arch/s390/include/asm/hardirq.h
index a296c6acfd07..dfbc3c6c0674 100644
--- a/arch/s390/include/asm/hardirq.h
+++ b/arch/s390/include/asm/hardirq.h
@@ -14,6 +14,8 @@
#include <asm/lowcore.h>
#define local_softirq_pending() (S390_lowcore.softirq_pending)
+#define set_softirq_pending(x) (S390_lowcore.softirq_pending = (x))
+#define or_softirq_pending(x) (S390_lowcore.softirq_pending |= (x))
#define __ARCH_IRQ_STAT
#define __ARCH_HAS_DO_SOFTIRQ
diff --git a/arch/s390/include/asm/nospec-insn.h b/arch/s390/include/asm/nospec-insn.h
new file mode 100644
index 000000000000..a01f81186e86
--- /dev/null
+++ b/arch/s390/include/asm/nospec-insn.h
@@ -0,0 +1,196 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_S390_NOSPEC_ASM_H
+#define _ASM_S390_NOSPEC_ASM_H
+
+#include <asm/alternative-asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/dwarf.h>
+
+#ifdef __ASSEMBLY__
+
+#ifdef CONFIG_EXPOLINE
+
+_LC_BR_R1 = __LC_BR_R1
+
+/*
+ * The expoline macros are used to create thunks in the same format
+ * as gcc generates them. The 'comdat' section flag makes sure that
+ * the various thunks are merged into a single copy.
+ */
+ .macro __THUNK_PROLOG_NAME name
+ .pushsection .text.\name,"axG",@progbits,\name,comdat
+ .globl \name
+ .hidden \name
+ .type \name,@function
+\name:
+ CFI_STARTPROC
+ .endm
+
+ .macro __THUNK_EPILOG
+ CFI_ENDPROC
+ .popsection
+ .endm
+
+ .macro __THUNK_PROLOG_BR r1,r2
+ __THUNK_PROLOG_NAME __s390x_indirect_jump_r\r2\()use_r\r1
+ .endm
+
+ .macro __THUNK_PROLOG_BC d0,r1,r2
+ __THUNK_PROLOG_NAME __s390x_indirect_branch_\d0\()_\r2\()use_\r1
+ .endm
+
+ .macro __THUNK_BR r1,r2
+ jg __s390x_indirect_jump_r\r2\()use_r\r1
+ .endm
+
+ .macro __THUNK_BC d0,r1,r2
+ jg __s390x_indirect_branch_\d0\()_\r2\()use_\r1
+ .endm
+
+ .macro __THUNK_BRASL r1,r2,r3
+ brasl \r1,__s390x_indirect_jump_r\r3\()use_r\r2
+ .endm
+
+ .macro __DECODE_RR expand,reg,ruse
+ .set __decode_fail,1
+ .irp r1,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
+ .ifc \reg,%r\r1
+ .irp r2,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
+ .ifc \ruse,%r\r2
+ \expand \r1,\r2
+ .set __decode_fail,0
+ .endif
+ .endr
+ .endif
+ .endr
+ .if __decode_fail == 1
+ .error "__DECODE_RR failed"
+ .endif
+ .endm
+
+ .macro __DECODE_RRR expand,rsave,rtarget,ruse
+ .set __decode_fail,1
+ .irp r1,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
+ .ifc \rsave,%r\r1
+ .irp r2,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
+ .ifc \rtarget,%r\r2
+ .irp r3,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
+ .ifc \ruse,%r\r3
+ \expand \r1,\r2,\r3
+ .set __decode_fail,0
+ .endif
+ .endr
+ .endif
+ .endr
+ .endif
+ .endr
+ .if __decode_fail == 1
+ .error "__DECODE_RRR failed"
+ .endif
+ .endm
+
+ .macro __DECODE_DRR expand,disp,reg,ruse
+ .set __decode_fail,1
+ .irp r1,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
+ .ifc \reg,%r\r1
+ .irp r2,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
+ .ifc \ruse,%r\r2
+ \expand \disp,\r1,\r2
+ .set __decode_fail,0
+ .endif
+ .endr
+ .endif
+ .endr
+ .if __decode_fail == 1
+ .error "__DECODE_DRR failed"
+ .endif
+ .endm
+
+ .macro __THUNK_EX_BR reg,ruse
+ # Be very careful when adding instructions to this macro!
+ # The ALTERNATIVE replacement code has a .+10 which targets
+ # the "br \reg" after the code has been patched.
+#ifdef CONFIG_HAVE_MARCH_Z10_FEATURES
+ exrl 0,555f
+ j .
+#else
+ .ifc \reg,%r1
+ ALTERNATIVE "ex %r0,_LC_BR_R1", ".insn ril,0xc60000000000,0,.+10", 35
+ j .
+ .else
+ larl \ruse,555f
+ ex 0,0(\ruse)
+ j .
+ .endif
+#endif
+555: br \reg
+ .endm
+
+ .macro __THUNK_EX_BC disp,reg,ruse
+#ifdef CONFIG_HAVE_MARCH_Z10_FEATURES
+ exrl 0,556f
+ j .
+#else
+ larl \ruse,556f
+ ex 0,0(\ruse)
+ j .
+#endif
+556: b \disp(\reg)
+ .endm
+
+ .macro GEN_BR_THUNK reg,ruse=%r1
+ __DECODE_RR __THUNK_PROLOG_BR,\reg,\ruse
+ __THUNK_EX_BR \reg,\ruse
+ __THUNK_EPILOG
+ .endm
+
+ .macro GEN_B_THUNK disp,reg,ruse=%r1
+ __DECODE_DRR __THUNK_PROLOG_BC,\disp,\reg,\ruse
+ __THUNK_EX_BC \disp,\reg,\ruse
+ __THUNK_EPILOG
+ .endm
+
+ .macro BR_EX reg,ruse=%r1
+557: __DECODE_RR __THUNK_BR,\reg,\ruse
+ .pushsection .s390_indirect_branches,"a",@progbits
+ .long 557b-.
+ .popsection
+ .endm
+
+ .macro B_EX disp,reg,ruse=%r1
+558: __DECODE_DRR __THUNK_BC,\disp,\reg,\ruse
+ .pushsection .s390_indirect_branches,"a",@progbits
+ .long 558b-.
+ .popsection
+ .endm
+
+ .macro BASR_EX rsave,rtarget,ruse=%r1
+559: __DECODE_RRR __THUNK_BRASL,\rsave,\rtarget,\ruse
+ .pushsection .s390_indirect_branches,"a",@progbits
+ .long 559b-.
+ .popsection
+ .endm
+
+#else
+ .macro GEN_BR_THUNK reg,ruse=%r1
+ .endm
+
+ .macro GEN_B_THUNK disp,reg,ruse=%r1
+ .endm
+
+ .macro BR_EX reg,ruse=%r1
+ br \reg
+ .endm
+
+ .macro B_EX disp,reg,ruse=%r1
+ b \disp(\reg)
+ .endm
+
+ .macro BASR_EX rsave,rtarget,ruse=%r1
+ basr \rsave,\rtarget
+ .endm
+#endif
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* _ASM_S390_NOSPEC_ASM_H */
diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h
index 12fe3591034f..94f8db468c9b 100644
--- a/arch/s390/include/asm/pci.h
+++ b/arch/s390/include/asm/pci.h
@@ -2,8 +2,6 @@
#ifndef __ASM_S390_PCI_H
#define __ASM_S390_PCI_H
-/* must be set before including asm-generic/pci.h */
-#define PCI_DMA_BUS_IS_PHYS (0)
/* must be set before including pci_clp.h */
#define PCI_BAR_COUNT 6
diff --git a/arch/s390/include/asm/purgatory.h b/arch/s390/include/asm/purgatory.h
index e297bcfc476f..6090670df51f 100644
--- a/arch/s390/include/asm/purgatory.h
+++ b/arch/s390/include/asm/purgatory.h
@@ -13,5 +13,11 @@
int verify_sha256_digest(void);
+extern u64 kernel_entry;
+extern u64 kernel_type;
+
+extern u64 crash_start;
+extern u64 crash_size;
+
#endif /* __ASSEMBLY__ */
#endif /* _S390_PURGATORY_H_ */
diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile
index 84ea6225efb4..f92dd8ed3884 100644
--- a/arch/s390/kernel/Makefile
+++ b/arch/s390/kernel/Makefile
@@ -65,6 +65,7 @@ obj-y += nospec-branch.o
extra-y += head.o head64.o vmlinux.lds
+obj-$(CONFIG_SYSFS) += nospec-sysfs.o
CFLAGS_REMOVE_nospec-branch.o += $(CC_FLAGS_EXPOLINE)
obj-$(CONFIG_MODULES) += module.o
diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c
index eb2a5c0443cd..11aea745a2a6 100644
--- a/arch/s390/kernel/asm-offsets.c
+++ b/arch/s390/kernel/asm-offsets.c
@@ -181,6 +181,7 @@ int main(void)
OFFSET(__LC_MACHINE_FLAGS, lowcore, machine_flags);
OFFSET(__LC_PREEMPT_COUNT, lowcore, preempt_count);
OFFSET(__LC_GMAP, lowcore, gmap);
+ OFFSET(__LC_BR_R1, lowcore, br_r1_trampoline);
/* software defined ABI-relevant lowcore locations 0xe00 - 0xe20 */
OFFSET(__LC_DUMP_REIPL, lowcore, ipib);
/* hardware defined lowcore locations 0x1000 - 0x18ff */
diff --git a/arch/s390/kernel/base.S b/arch/s390/kernel/base.S
index f6c56009e822..b65874b0b412 100644
--- a/arch/s390/kernel/base.S
+++ b/arch/s390/kernel/base.S
@@ -9,18 +9,22 @@
#include <linux/linkage.h>
#include <asm/asm-offsets.h>
+#include <asm/nospec-insn.h>
#include <asm/ptrace.h>
#include <asm/sigp.h>
+ GEN_BR_THUNK %r9
+ GEN_BR_THUNK %r14
+
ENTRY(s390_base_mcck_handler)
basr %r13,0
0: lg %r15,__LC_PANIC_STACK # load panic stack
aghi %r15,-STACK_FRAME_OVERHEAD
larl %r1,s390_base_mcck_handler_fn
- lg %r1,0(%r1)
- ltgr %r1,%r1
+ lg %r9,0(%r1)
+ ltgr %r9,%r9
jz 1f
- basr %r14,%r1
+ BASR_EX %r14,%r9
1: la %r1,4095
lmg %r0,%r15,__LC_GPREGS_SAVE_AREA-4095(%r1)
lpswe __LC_MCK_OLD_PSW
@@ -37,10 +41,10 @@ ENTRY(s390_base_ext_handler)
basr %r13,0
0: aghi %r15,-STACK_FRAME_OVERHEAD
larl %r1,s390_base_ext_handler_fn
- lg %r1,0(%r1)
- ltgr %r1,%r1
+ lg %r9,0(%r1)
+ ltgr %r9,%r9
jz 1f
- basr %r14,%r1
+ BASR_EX %r14,%r9
1: lmg %r0,%r15,__LC_SAVE_AREA_ASYNC
ni __LC_EXT_OLD_PSW+1,0xfd # clear wait state bit
lpswe __LC_EXT_OLD_PSW
@@ -57,10 +61,10 @@ ENTRY(s390_base_pgm_handler)
basr %r13,0
0: aghi %r15,-STACK_FRAME_OVERHEAD
larl %r1,s390_base_pgm_handler_fn
- lg %r1,0(%r1)
- ltgr %r1,%r1
+ lg %r9,0(%r1)
+ ltgr %r9,%r9
jz 1f
- basr %r14,%r1
+ BASR_EX %r14,%r9
lmg %r0,%r15,__LC_SAVE_AREA_SYNC
lpswe __LC_PGM_OLD_PSW
1: lpswe disabled_wait_psw-0b(%r13)
@@ -117,7 +121,7 @@ ENTRY(diag308_reset)
larl %r4,.Lcontinue_psw # Restore PSW flags
lpswe 0(%r4)
.Lcontinue:
- br %r14
+ BR_EX %r14
.align 16
.Lrestart_psw:
.long 0x00080000,0x80000000 + .Lrestart_part2
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index 3f22f139a041..f03402efab4b 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -28,6 +28,7 @@
#include <asm/setup.h>
#include <asm/nmi.h>
#include <asm/export.h>
+#include <asm/nospec-insn.h>
__PT_R0 = __PT_GPRS
__PT_R1 = __PT_GPRS + 8
@@ -183,67 +184,9 @@ _LPP_OFFSET = __LC_LPP
"jnz .+8; .long 0xb2e8d000", 82
.endm
-#ifdef CONFIG_EXPOLINE
-
- .macro GEN_BR_THUNK name,reg,tmp
- .section .text.\name,"axG",@progbits,\name,comdat
- .globl \name
- .hidden \name
- .type \name,@function
-\name:
- CFI_STARTPROC
-#ifdef CONFIG_HAVE_MARCH_Z10_FEATURES
- exrl 0,0f
-#else
- larl \tmp,0f
- ex 0,0(\tmp)
-#endif
- j .
-0: br \reg
- CFI_ENDPROC
- .endm
-
- GEN_BR_THUNK __s390x_indirect_jump_r1use_r9,%r9,%r1
- GEN_BR_THUNK __s390x_indirect_jump_r1use_r14,%r14,%r1
- GEN_BR_THUNK __s390x_indirect_jump_r11use_r14,%r14,%r11
-
- .macro BASR_R14_R9
-0: brasl %r14,__s390x_indirect_jump_r1use_r9
- .pushsection .s390_indirect_branches,"a",@progbits
- .long 0b-.
- .popsection
- .endm
-
- .macro BR_R1USE_R14
-0: jg __s390x_indirect_jump_r1use_r14
- .pushsection .s390_indirect_branches,"a",@progbits
- .long 0b-.
- .popsection
- .endm
-
- .macro BR_R11USE_R14
-0: jg __s390x_indirect_jump_r11use_r14
- .pushsection .s390_indirect_branches,"a",@progbits
- .long 0b-.
- .popsection
- .endm
-
-#else /* CONFIG_EXPOLINE */
-
- .macro BASR_R14_R9
- basr %r14,%r9
- .endm
-
- .macro BR_R1USE_R14
- br %r14
- .endm
-
- .macro BR_R11USE_R14
- br %r14
- .endm
-
-#endif /* CONFIG_EXPOLINE */
-
+ GEN_BR_THUNK %r9
+ GEN_BR_THUNK %r14
+ GEN_BR_THUNK %r14,%r11
.section .kprobes.text, "ax"
.Ldummy:
@@ -260,7 +203,7 @@ _LPP_OFFSET = __LC_LPP
ENTRY(__bpon)
.globl __bpon
BPON
- BR_R1USE_R14
+ BR_EX %r14
/*
* Scheduler resume function, called by switch_to
@@ -284,7 +227,7 @@ ENTRY(__switch_to)
mvc __LC_CURRENT_PID(4,%r0),0(%r3) # store pid of next
lmg %r6,%r15,__SF_GPRS(%r15) # load gprs of next task
ALTERNATIVE "", ".insn s,0xb2800000,_LPP_OFFSET", 40
- BR_R1USE_R14
+ BR_EX %r14
.L__critical_start:
@@ -351,7 +294,7 @@ sie_exit:
xgr %r5,%r5
lmg %r6,%r14,__SF_GPRS(%r15) # restore kernel registers
lg %r2,__SF_SIE_REASON(%r15) # return exit reason code
- BR_R1USE_R14
+ BR_EX %r14
.Lsie_fault:
lghi %r14,-EFAULT
stg %r14,__SF_SIE_REASON(%r15) # set exit reason code
@@ -410,7 +353,7 @@ ENTRY(system_call)
lgf %r9,0(%r8,%r10) # get system call add.
TSTMSK __TI_flags(%r12),_TIF_TRACE
jnz .Lsysc_tracesys
- BASR_R14_R9 # call sys_xxxx
+ BASR_EX %r14,%r9 # call sys_xxxx
stg %r2,__PT_R2(%r11) # store return value
.Lsysc_return:
@@ -595,7 +538,7 @@ ENTRY(system_call)
lmg %r3,%r7,__PT_R3(%r11)
stg %r7,STACK_FRAME_OVERHEAD(%r15)
lg %r2,__PT_ORIG_GPR2(%r11)
- BASR_R14_R9 # call sys_xxx
+ BASR_EX %r14,%r9 # call sys_xxx
stg %r2,__PT_R2(%r11) # store return value
.Lsysc_tracenogo:
TSTMSK __TI_flags(%r12),_TIF_TRACE
@@ -619,7 +562,7 @@ ENTRY(ret_from_fork)
lmg %r9,%r10,__PT_R9(%r11) # load gprs
ENTRY(kernel_thread_starter)
la %r2,0(%r10)
- BASR_R14_R9
+ BASR_EX %r14,%r9
j .Lsysc_tracenogo
/*
@@ -701,7 +644,7 @@ ENTRY(pgm_check_handler)
je .Lpgm_return
lgf %r9,0(%r10,%r1) # load address of handler routine
lgr %r2,%r11 # pass pointer to pt_regs
- BASR_R14_R9 # branch to interrupt-handler
+ BASR_EX %r14,%r9 # branch to interrupt-handler
.Lpgm_return:
LOCKDEP_SYS_EXIT
tm __PT_PSW+1(%r11),0x01 # returning to user ?
@@ -1019,7 +962,7 @@ ENTRY(psw_idle)
stpt __TIMER_IDLE_ENTER(%r2)
.Lpsw_idle_lpsw:
lpswe __SF_EMPTY(%r15)
- BR_R1USE_R14
+ BR_EX %r14
.Lpsw_idle_end:
/*
@@ -1061,7 +1004,7 @@ ENTRY(save_fpu_regs)
.Lsave_fpu_regs_done:
oi __LC_CPU_FLAGS+7,_CIF_FPU
.Lsave_fpu_regs_exit:
- BR_R1USE_R14
+ BR_EX %r14
.Lsave_fpu_regs_end:
EXPORT_SYMBOL(save_fpu_regs)
@@ -1107,7 +1050,7 @@ load_fpu_regs:
.Lload_fpu_regs_done:
ni __LC_CPU_FLAGS+7,255-_CIF_FPU
.Lload_fpu_regs_exit:
- BR_R1USE_R14
+ BR_EX %r14
.Lload_fpu_regs_end:
.L__critical_end:
@@ -1322,7 +1265,7 @@ cleanup_critical:
jl 0f
clg %r9,BASED(.Lcleanup_table+104) # .Lload_fpu_regs_end
jl .Lcleanup_load_fpu_regs
-0: BR_R11USE_R14
+0: BR_EX %r14
.align 8
.Lcleanup_table:
@@ -1358,7 +1301,7 @@ cleanup_critical:
ni __SIE_PROG0C+3(%r9),0xfe # no longer in SIE
lctlg %c1,%c1,__LC_USER_ASCE # load primary asce
larl %r9,sie_exit # skip forward to sie_exit
- BR_R11USE_R14
+ BR_EX %r14
#endif
.Lcleanup_system_call:
@@ -1412,7 +1355,7 @@ cleanup_critical:
stg %r15,56(%r11) # r15 stack pointer
# set new psw address and exit
larl %r9,.Lsysc_do_svc
- BR_R11USE_R14
+ BR_EX %r14,%r11
.Lcleanup_system_call_insn:
.quad system_call
.quad .Lsysc_stmg
@@ -1424,7 +1367,7 @@ cleanup_critical:
.Lcleanup_sysc_tif:
larl %r9,.Lsysc_tif
- BR_R11USE_R14
+ BR_EX %r14,%r11
.Lcleanup_sysc_restore:
# check if stpt has been executed
@@ -1441,14 +1384,14 @@ cleanup_critical:
mvc 0(64,%r11),__PT_R8(%r9)
lmg %r0,%r7,__PT_R0(%r9)
1: lmg %r8,%r9,__LC_RETURN_PSW
- BR_R11USE_R14
+ BR_EX %r14,%r11
.Lcleanup_sysc_restore_insn:
.quad .Lsysc_exit_timer
.quad .Lsysc_done - 4
.Lcleanup_io_tif:
larl %r9,.Lio_tif
- BR_R11USE_R14
+ BR_EX %r14,%r11
.Lcleanup_io_restore:
# check if stpt has been executed
@@ -1462,7 +1405,7 @@ cleanup_critical:
mvc 0(64,%r11),__PT_R8(%r9)
lmg %r0,%r7,__PT_R0(%r9)
1: lmg %r8,%r9,__LC_RETURN_PSW
- BR_R11USE_R14
+ BR_EX %r14,%r11
.Lcleanup_io_restore_insn:
.quad .Lio_exit_timer
.quad .Lio_done - 4
@@ -1515,17 +1458,17 @@ cleanup_critical:
# prepare return psw
nihh %r8,0xfcfd # clear irq & wait state bits
lg %r9,48(%r11) # return from psw_idle
- BR_R11USE_R14
+ BR_EX %r14,%r11
.Lcleanup_idle_insn:
.quad .Lpsw_idle_lpsw
.Lcleanup_save_fpu_regs:
larl %r9,save_fpu_regs
- BR_R11USE_R14
+ BR_EX %r14,%r11
.Lcleanup_load_fpu_regs:
larl %r9,load_fpu_regs
- BR_R11USE_R14
+ BR_EX %r14,%r11
/*
* Integer constants
diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c
index 94f2099bceb0..3d17c41074ca 100644
--- a/arch/s390/kernel/irq.c
+++ b/arch/s390/kernel/irq.c
@@ -176,10 +176,9 @@ void do_softirq_own_stack(void)
new -= STACK_FRAME_OVERHEAD;
((struct stack_frame *) new)->back_chain = old;
asm volatile(" la 15,0(%0)\n"
- " basr 14,%2\n"
+ " brasl 14,__do_softirq\n"
" la 15,0(%1)\n"
- : : "a" (new), "a" (old),
- "a" (__do_softirq)
+ : : "a" (new), "a" (old)
: "0", "1", "2", "3", "4", "5", "14",
"cc", "memory" );
} else {
diff --git a/arch/s390/kernel/mcount.S b/arch/s390/kernel/mcount.S
index 82df7d80fab2..27110f3294ed 100644
--- a/arch/s390/kernel/mcount.S
+++ b/arch/s390/kernel/mcount.S
@@ -9,13 +9,17 @@
#include <linux/linkage.h>
#include <asm/asm-offsets.h>
#include <asm/ftrace.h>
+#include <asm/nospec-insn.h>
#include <asm/ptrace.h>
#include <asm/export.h>
+ GEN_BR_THUNK %r1
+ GEN_BR_THUNK %r14
+
.section .kprobes.text, "ax"
ENTRY(ftrace_stub)
- br %r14
+ BR_EX %r14
#define STACK_FRAME_SIZE (STACK_FRAME_OVERHEAD + __PT_SIZE)
#define STACK_PTREGS (STACK_FRAME_OVERHEAD)
@@ -23,7 +27,7 @@ ENTRY(ftrace_stub)
#define STACK_PTREGS_PSW (STACK_PTREGS + __PT_PSW)
ENTRY(_mcount)
- br %r14
+ BR_EX %r14
EXPORT_SYMBOL(_mcount)
@@ -53,7 +57,7 @@ ENTRY(ftrace_caller)
#endif
lgr %r3,%r14
la %r5,STACK_PTREGS(%r15)
- basr %r14,%r1
+ BASR_EX %r14,%r1
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
# The j instruction gets runtime patched to a nop instruction.
# See ftrace_enable_ftrace_graph_caller.
@@ -68,7 +72,7 @@ ftrace_graph_caller_end:
#endif
lg %r1,(STACK_PTREGS_PSW+8)(%r15)
lmg %r2,%r15,(STACK_PTREGS_GPRS+2*8)(%r15)
- br %r1
+ BR_EX %r1
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
@@ -81,6 +85,6 @@ ENTRY(return_to_handler)
aghi %r15,STACK_FRAME_OVERHEAD
lgr %r14,%r2
lmg %r2,%r5,32(%r15)
- br %r14
+ BR_EX %r14
#endif
diff --git a/arch/s390/kernel/nospec-branch.c b/arch/s390/kernel/nospec-branch.c
index 46d49a11663f..8ad6a7128b3a 100644
--- a/arch/s390/kernel/nospec-branch.c
+++ b/arch/s390/kernel/nospec-branch.c
@@ -1,7 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/module.h>
#include <linux/device.h>
-#include <linux/cpu.h>
#include <asm/nospec-branch.h>
static int __init nobp_setup_early(char *str)
@@ -44,24 +43,6 @@ static int __init nospec_report(void)
}
arch_initcall(nospec_report);
-#ifdef CONFIG_SYSFS
-ssize_t cpu_show_spectre_v1(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- return sprintf(buf, "Mitigation: __user pointer sanitization\n");
-}
-
-ssize_t cpu_show_spectre_v2(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- if (IS_ENABLED(CC_USING_EXPOLINE) && !nospec_disable)
- return sprintf(buf, "Mitigation: execute trampolines\n");
- if (__test_facility(82, S390_lowcore.alt_stfle_fac_list))
- return sprintf(buf, "Mitigation: limited branch prediction.\n");
- return sprintf(buf, "Vulnerable\n");
-}
-#endif
-
#ifdef CONFIG_EXPOLINE
int nospec_disable = IS_ENABLED(CONFIG_EXPOLINE_OFF);
@@ -112,7 +93,6 @@ static void __init_or_module __nospec_revert(s32 *start, s32 *end)
s32 *epo;
/* Second part of the instruction replace is always a nop */
- memcpy(insnbuf + 2, (char[]) { 0x47, 0x00, 0x00, 0x00 }, 4);
for (epo = start; epo < end; epo++) {
instr = (u8 *) epo + *epo;
if (instr[0] == 0xc0 && (instr[1] & 0x0f) == 0x04)
@@ -133,18 +113,34 @@ static void __init_or_module __nospec_revert(s32 *start, s32 *end)
br = thunk + (*(int *)(thunk + 2)) * 2;
else
continue;
- if (br[0] != 0x07 || (br[1] & 0xf0) != 0xf0)
+ /* Check for unconditional branch 0x07f? or 0x47f???? */
+ if ((br[0] & 0xbf) != 0x07 || (br[1] & 0xf0) != 0xf0)
continue;
+
+ memcpy(insnbuf + 2, (char[]) { 0x47, 0x00, 0x07, 0x00 }, 4);
switch (type) {
case BRCL_EXPOLINE:
- /* brcl to thunk, replace with br + nop */
insnbuf[0] = br[0];
insnbuf[1] = (instr[1] & 0xf0) | (br[1] & 0x0f);
+ if (br[0] == 0x47) {
+ /* brcl to b, replace with bc + nopr */
+ insnbuf[2] = br[2];
+ insnbuf[3] = br[3];
+ } else {
+ /* brcl to br, replace with bcr + nop */
+ }
break;
case BRASL_EXPOLINE:
- /* brasl to thunk, replace with basr + nop */
- insnbuf[0] = 0x0d;
insnbuf[1] = (instr[1] & 0xf0) | (br[1] & 0x0f);
+ if (br[0] == 0x47) {
+ /* brasl to b, replace with bas + nopr */
+ insnbuf[0] = 0x4d;
+ insnbuf[2] = br[2];
+ insnbuf[3] = br[3];
+ } else {
+ /* brasl to br, replace with basr + nop */
+ insnbuf[0] = 0x0d;
+ }
break;
}
diff --git a/arch/s390/kernel/nospec-sysfs.c b/arch/s390/kernel/nospec-sysfs.c
new file mode 100644
index 000000000000..8affad5f18cb
--- /dev/null
+++ b/arch/s390/kernel/nospec-sysfs.c
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/device.h>
+#include <linux/cpu.h>
+#include <asm/facility.h>
+#include <asm/nospec-branch.h>
+
+ssize_t cpu_show_spectre_v1(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return sprintf(buf, "Mitigation: __user pointer sanitization\n");
+}
+
+ssize_t cpu_show_spectre_v2(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ if (IS_ENABLED(CC_USING_EXPOLINE) && !nospec_disable)
+ return sprintf(buf, "Mitigation: execute trampolines\n");
+ if (__test_facility(82, S390_lowcore.alt_stfle_fac_list))
+ return sprintf(buf, "Mitigation: limited branch prediction\n");
+ return sprintf(buf, "Vulnerable\n");
+}
diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c
index 1c9ddd7aa5ec..0292d68e7dde 100644
--- a/arch/s390/kernel/perf_cpum_sf.c
+++ b/arch/s390/kernel/perf_cpum_sf.c
@@ -753,6 +753,10 @@ static int __hw_perf_event_init(struct perf_event *event)
*/
rate = 0;
if (attr->freq) {
+ if (!attr->sample_freq) {
+ err = -EINVAL;
+ goto out;
+ }
rate = freq_to_sample_rate(&si, attr->sample_freq);
rate = hw_limit_rate(&si, rate);
attr->freq = 0;
diff --git a/arch/s390/kernel/reipl.S b/arch/s390/kernel/reipl.S
index 73cc3750f0d3..7f14adf512c6 100644
--- a/arch/s390/kernel/reipl.S
+++ b/arch/s390/kernel/reipl.S
@@ -7,8 +7,11 @@
#include <linux/linkage.h>
#include <asm/asm-offsets.h>
+#include <asm/nospec-insn.h>
#include <asm/sigp.h>
+ GEN_BR_THUNK %r9
+
#
# Issue "store status" for the current CPU to its prefix page
# and call passed function afterwards
@@ -67,9 +70,9 @@ ENTRY(store_status)
st %r4,0(%r1)
st %r5,4(%r1)
stg %r2,8(%r1)
- lgr %r1,%r2
+ lgr %r9,%r2
lgr %r2,%r3
- br %r1
+ BR_EX %r9
.section .bss
.align 8
diff --git a/arch/s390/kernel/swsusp.S b/arch/s390/kernel/swsusp.S
index e99187149f17..a049a7b9d6e8 100644
--- a/arch/s390/kernel/swsusp.S
+++ b/arch/s390/kernel/swsusp.S
@@ -13,6 +13,7 @@
#include <asm/ptrace.h>
#include <asm/thread_info.h>
#include <asm/asm-offsets.h>
+#include <asm/nospec-insn.h>
#include <asm/sigp.h>
/*
@@ -24,6 +25,8 @@
* (see below) in the resume process.
* This function runs with disabled interrupts.
*/
+ GEN_BR_THUNK %r14
+
.section .text
ENTRY(swsusp_arch_suspend)
stmg %r6,%r15,__SF_GPRS(%r15)
@@ -103,7 +106,7 @@ ENTRY(swsusp_arch_suspend)
spx 0x318(%r1)
lmg %r6,%r15,STACK_FRAME_OVERHEAD + __SF_GPRS(%r15)
lghi %r2,0
- br %r14
+ BR_EX %r14
/*
* Restore saved memory image to correct place and restore register context.
@@ -197,11 +200,10 @@ pgm_check_entry:
larl %r15,init_thread_union
ahi %r15,1<<(PAGE_SHIFT+THREAD_SIZE_ORDER)
larl %r2,.Lpanic_string
- larl %r3,sclp_early_printk
lghi %r1,0
sam31
sigp %r1,%r0,SIGP_SET_ARCHITECTURE
- basr %r14,%r3
+ brasl %r14,sclp_early_printk
larl %r3,.Ldisabled_wait_31
lpsw 0(%r3)
4:
@@ -267,7 +269,7 @@ restore_registers:
/* Return 0 */
lmg %r6,%r15,STACK_FRAME_OVERHEAD + __SF_GPRS(%r15)
lghi %r2,0
- br %r14
+ BR_EX %r14
.section .data..nosave,"aw",@progbits
.align 8
diff --git a/arch/s390/kernel/sysinfo.c b/arch/s390/kernel/sysinfo.c
index fc7e04c2195b..54f5496913fa 100644
--- a/arch/s390/kernel/sysinfo.c
+++ b/arch/s390/kernel/sysinfo.c
@@ -294,21 +294,9 @@ static int sysinfo_show(struct seq_file *m, void *v)
return 0;
}
-static int sysinfo_open(struct inode *inode, struct file *file)
-{
- return single_open(file, sysinfo_show, NULL);
-}
-
-static const struct file_operations sysinfo_fops = {
- .open = sysinfo_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
static int __init sysinfo_create_proc(void)
{
- proc_create("sysinfo", 0444, NULL, &sysinfo_fops);
+ proc_create_single("sysinfo", 0444, NULL, sysinfo_show);
return 0;
}
device_initcall(sysinfo_create_proc);
@@ -386,18 +374,6 @@ static const struct seq_operations service_level_seq_ops = {
.show = service_level_show
};
-static int service_level_open(struct inode *inode, struct file *file)
-{
- return seq_open(file, &service_level_seq_ops);
-}
-
-static const struct file_operations service_level_ops = {
- .open = service_level_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release
-};
-
static void service_level_vm_print(struct seq_file *m,
struct service_level *slr)
{
@@ -420,7 +396,7 @@ static struct service_level service_level_vm = {
static __init int create_proc_service_level(void)
{
- proc_create("service_levels", 0, NULL, &service_level_ops);
+ proc_create_seq("service_levels", 0, NULL, &service_level_seq_ops);
if (MACHINE_IS_VM)
register_service_level(&service_level_vm);
return 0;
diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c
index a5297a22bc1e..8003b38c1688 100644
--- a/arch/s390/kernel/traps.c
+++ b/arch/s390/kernel/traps.c
@@ -44,14 +44,8 @@ int is_valid_bugaddr(unsigned long addr)
void do_report_trap(struct pt_regs *regs, int si_signo, int si_code, char *str)
{
- siginfo_t info;
-
if (user_mode(regs)) {
- info.si_signo = si_signo;
- info.si_errno = 0;
- info.si_code = si_code;
- info.si_addr = get_trap_ip(regs);
- force_sig_info(si_signo, &info, current);
+ force_sig_fault(si_signo, si_code, get_trap_ip(regs), current);
report_user_fault(regs, si_signo, 0);
} else {
const struct exception_table_entry *fixup;
@@ -80,18 +74,12 @@ NOKPROBE_SYMBOL(do_trap);
void do_per_trap(struct pt_regs *regs)
{
- siginfo_t info;
-
if (notify_die(DIE_SSTEP, "sstep", regs, 0, 0, SIGTRAP) == NOTIFY_STOP)
return;
if (!current->ptrace)
return;
- info.si_signo = SIGTRAP;
- info.si_errno = 0;
- info.si_code = TRAP_HWBKPT;
- info.si_addr =
- (void __force __user *) current->thread.per_event.address;
- force_sig_info(SIGTRAP, &info, current);
+ force_sig_fault(SIGTRAP, TRAP_HWBKPT,
+ (void __force __user *) current->thread.per_event.address, current);
}
NOKPROBE_SYMBOL(do_per_trap);
@@ -165,7 +153,6 @@ void translation_exception(struct pt_regs *regs)
void illegal_op(struct pt_regs *regs)
{
- siginfo_t info;
__u8 opcode[6];
__u16 __user *location;
int is_uprobe_insn = 0;
@@ -177,13 +164,9 @@ void illegal_op(struct pt_regs *regs)
if (get_user(*((__u16 *) opcode), (__u16 __user *) location))
return;
if (*((__u16 *) opcode) == S390_BREAKPOINT_U16) {
- if (current->ptrace) {
- info.si_signo = SIGTRAP;
- info.si_errno = 0;
- info.si_code = TRAP_BRKPT;
- info.si_addr = location;
- force_sig_info(SIGTRAP, &info, current);
- } else
+ if (current->ptrace)
+ force_sig_fault(SIGTRAP, TRAP_BRKPT, location, current);
+ else
signal = SIGILL;
#ifdef CONFIG_UPROBES
} else if (*((__u16 *) opcode) == UPROBE_SWBP_INSN) {
diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c
index 8961e3970901..969882b54266 100644
--- a/arch/s390/kvm/vsie.c
+++ b/arch/s390/kvm/vsie.c
@@ -578,7 +578,7 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
gpa = READ_ONCE(scb_o->itdba) & ~0xffUL;
if (gpa && (scb_s->ecb & ECB_TE)) {
- if (!(gpa & ~0x1fffU)) {
+ if (!(gpa & ~0x1fffUL)) {
rc = set_validity_icpt(scb_s, 0x0080U);
goto unpin;
}
diff --git a/arch/s390/lib/mem.S b/arch/s390/lib/mem.S
index 495c9c4bacc7..2311f15be9cf 100644
--- a/arch/s390/lib/mem.S
+++ b/arch/s390/lib/mem.S
@@ -7,6 +7,9 @@
#include <linux/linkage.h>
#include <asm/export.h>
+#include <asm/nospec-insn.h>
+
+ GEN_BR_THUNK %r14
/*
* void *memmove(void *dest, const void *src, size_t n)
@@ -33,14 +36,14 @@ ENTRY(memmove)
.Lmemmove_forward_remainder:
larl %r5,.Lmemmove_mvc
ex %r4,0(%r5)
- br %r14
+ BR_EX %r14
.Lmemmove_reverse:
ic %r0,0(%r4,%r3)
stc %r0,0(%r4,%r1)
brctg %r4,.Lmemmove_reverse
ic %r0,0(%r4,%r3)
stc %r0,0(%r4,%r1)
- br %r14
+ BR_EX %r14
.Lmemmove_mvc:
mvc 0(1,%r1),0(%r3)
EXPORT_SYMBOL(memmove)
@@ -77,7 +80,7 @@ ENTRY(memset)
.Lmemset_clear_remainder:
larl %r3,.Lmemset_xc
ex %r4,0(%r3)
- br %r14
+ BR_EX %r14
.Lmemset_fill:
cghi %r4,1
lgr %r1,%r2
@@ -95,10 +98,10 @@ ENTRY(memset)
stc %r3,0(%r1)
larl %r5,.Lmemset_mvc
ex %r4,0(%r5)
- br %r14
+ BR_EX %r14
.Lmemset_fill_exit:
stc %r3,0(%r1)
- br %r14
+ BR_EX %r14
.Lmemset_xc:
xc 0(1,%r1),0(%r1)
.Lmemset_mvc:
@@ -121,7 +124,7 @@ ENTRY(memcpy)
.Lmemcpy_remainder:
larl %r5,.Lmemcpy_mvc
ex %r4,0(%r5)
- br %r14
+ BR_EX %r14
.Lmemcpy_loop:
mvc 0(256,%r1),0(%r3)
la %r1,256(%r1)
@@ -159,10 +162,10 @@ ENTRY(__memset\bits)
\insn %r3,0(%r1)
larl %r5,.L__memset_mvc\bits
ex %r4,0(%r5)
- br %r14
+ BR_EX %r14
.L__memset_exit\bits:
\insn %r3,0(%r2)
- br %r14
+ BR_EX %r14
.L__memset_mvc\bits:
mvc \bytes(1,%r1),0(%r1)
.endm
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index 93faeca52284..e074480d3598 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -265,14 +265,10 @@ void report_user_fault(struct pt_regs *regs, long signr, int is_mm_fault)
*/
static noinline void do_sigsegv(struct pt_regs *regs, int si_code)
{
- struct siginfo si;
-
report_user_fault(regs, SIGSEGV, 1);
- si.si_signo = SIGSEGV;
- si.si_errno = 0;
- si.si_code = si_code;
- si.si_addr = (void __user *)(regs->int_parm_long & __FAIL_ADDR_MASK);
- force_sig_info(SIGSEGV, &si, current);
+ force_sig_fault(SIGSEGV, si_code,
+ (void __user *)(regs->int_parm_long & __FAIL_ADDR_MASK),
+ current);
}
static noinline void do_no_context(struct pt_regs *regs)
@@ -316,18 +312,13 @@ static noinline void do_low_address(struct pt_regs *regs)
static noinline void do_sigbus(struct pt_regs *regs)
{
- struct task_struct *tsk = current;
- struct siginfo si;
-
/*
* Send a sigbus, regardless of whether we were in kernel
* or user mode.
*/
- si.si_signo = SIGBUS;
- si.si_errno = 0;
- si.si_code = BUS_ADRERR;
- si.si_addr = (void __user *)(regs->int_parm_long & __FAIL_ADDR_MASK);
- force_sig_info(SIGBUS, &si, tsk);
+ force_sig_fault(SIGBUS, BUS_ADRERR,
+ (void __user *)(regs->int_parm_long & __FAIL_ADDR_MASK),
+ current);
}
static noinline int signal_return(struct pt_regs *regs)
diff --git a/arch/s390/net/bpf_jit.S b/arch/s390/net/bpf_jit.S
index 25bb4643c4f4..9f794869c1b0 100644
--- a/arch/s390/net/bpf_jit.S
+++ b/arch/s390/net/bpf_jit.S
@@ -9,6 +9,7 @@
*/
#include <linux/linkage.h>
+#include <asm/nospec-insn.h>
#include "bpf_jit.h"
/*
@@ -54,7 +55,7 @@ ENTRY(sk_load_##NAME##_pos); \
clg %r3,STK_OFF_HLEN(%r15); /* Offset + SIZE > hlen? */ \
jh sk_load_##NAME##_slow; \
LOAD %r14,-SIZE(%r3,%r12); /* Get data from skb */ \
- b OFF_OK(%r6); /* Return */ \
+ B_EX OFF_OK,%r6; /* Return */ \
\
sk_load_##NAME##_slow:; \
lgr %r2,%r7; /* Arg1 = skb pointer */ \
@@ -64,11 +65,14 @@ sk_load_##NAME##_slow:; \
brasl %r14,skb_copy_bits; /* Get data from skb */ \
LOAD %r14,STK_OFF_TMP(%r15); /* Load from temp bufffer */ \
ltgr %r2,%r2; /* Set cc to (%r2 != 0) */ \
- br %r6; /* Return */
+ BR_EX %r6; /* Return */
sk_load_common(word, 4, llgf) /* r14 = *(u32 *) (skb->data+offset) */
sk_load_common(half, 2, llgh) /* r14 = *(u16 *) (skb->data+offset) */
+ GEN_BR_THUNK %r6
+ GEN_B_THUNK OFF_OK,%r6
+
/*
* Load 1 byte from SKB (optimized version)
*/
@@ -80,7 +84,7 @@ ENTRY(sk_load_byte_pos)
clg %r3,STK_OFF_HLEN(%r15) # Offset >= hlen?
jnl sk_load_byte_slow
llgc %r14,0(%r3,%r12) # Get byte from skb
- b OFF_OK(%r6) # Return OK
+ B_EX OFF_OK,%r6 # Return OK
sk_load_byte_slow:
lgr %r2,%r7 # Arg1 = skb pointer
@@ -90,7 +94,7 @@ sk_load_byte_slow:
brasl %r14,skb_copy_bits # Get data from skb
llgc %r14,STK_OFF_TMP(%r15) # Load result from temp buffer
ltgr %r2,%r2 # Set cc to (%r2 != 0)
- br %r6 # Return cc
+ BR_EX %r6 # Return cc
#define sk_negative_common(NAME, SIZE, LOAD) \
sk_load_##NAME##_slow_neg:; \
@@ -104,7 +108,7 @@ sk_load_##NAME##_slow_neg:; \
jz bpf_error; \
LOAD %r14,0(%r2); /* Get data from pointer */ \
xr %r3,%r3; /* Set cc to zero */ \
- br %r6; /* Return cc */
+ BR_EX %r6; /* Return cc */
sk_negative_common(word, 4, llgf)
sk_negative_common(half, 2, llgh)
@@ -113,4 +117,4 @@ sk_negative_common(byte, 1, llgc)
bpf_error:
# force a return 0 from jit handler
ltgr %r15,%r15 # Set condition code
- br %r6
+ BR_EX %r6
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index 78a19c93b380..dd2bcf0e7d00 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -25,6 +25,8 @@
#include <linux/bpf.h>
#include <asm/cacheflush.h>
#include <asm/dis.h>
+#include <asm/facility.h>
+#include <asm/nospec-branch.h>
#include <asm/set_memory.h>
#include "bpf_jit.h"
@@ -41,6 +43,8 @@ struct bpf_jit {
int base_ip; /* Base address for literal pool */
int ret0_ip; /* Address of return 0 */
int exit_ip; /* Address of exit */
+ int r1_thunk_ip; /* Address of expoline thunk for 'br %r1' */
+ int r14_thunk_ip; /* Address of expoline thunk for 'br %r14' */
int tail_call_start; /* Tail call start offset */
int labels[1]; /* Labels for local jumps */
};
@@ -250,6 +254,19 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1)
REG_SET_SEEN(b2); \
})
+#define EMIT6_PCREL_RILB(op, b, target) \
+({ \
+ int rel = (target - jit->prg) / 2; \
+ _EMIT6(op | reg_high(b) << 16 | rel >> 16, rel & 0xffff); \
+ REG_SET_SEEN(b); \
+})
+
+#define EMIT6_PCREL_RIL(op, target) \
+({ \
+ int rel = (target - jit->prg) / 2; \
+ _EMIT6(op | rel >> 16, rel & 0xffff); \
+})
+
#define _EMIT6_IMM(op, imm) \
({ \
unsigned int __imm = (imm); \
@@ -469,8 +486,45 @@ static void bpf_jit_epilogue(struct bpf_jit *jit, u32 stack_depth)
EMIT4(0xb9040000, REG_2, BPF_REG_0);
/* Restore registers */
save_restore_regs(jit, REGS_RESTORE, stack_depth);
+ if (IS_ENABLED(CC_USING_EXPOLINE) && !nospec_disable) {
+ jit->r14_thunk_ip = jit->prg;
+ /* Generate __s390_indirect_jump_r14 thunk */
+ if (test_facility(35)) {
+ /* exrl %r0,.+10 */
+ EMIT6_PCREL_RIL(0xc6000000, jit->prg + 10);
+ } else {
+ /* larl %r1,.+14 */
+ EMIT6_PCREL_RILB(0xc0000000, REG_1, jit->prg + 14);
+ /* ex 0,0(%r1) */
+ EMIT4_DISP(0x44000000, REG_0, REG_1, 0);
+ }
+ /* j . */
+ EMIT4_PCREL(0xa7f40000, 0);
+ }
/* br %r14 */
_EMIT2(0x07fe);
+
+ if (IS_ENABLED(CC_USING_EXPOLINE) && !nospec_disable &&
+ (jit->seen & SEEN_FUNC)) {
+ jit->r1_thunk_ip = jit->prg;
+ /* Generate __s390_indirect_jump_r1 thunk */
+ if (test_facility(35)) {
+ /* exrl %r0,.+10 */
+ EMIT6_PCREL_RIL(0xc6000000, jit->prg + 10);
+ /* j . */
+ EMIT4_PCREL(0xa7f40000, 0);
+ /* br %r1 */
+ _EMIT2(0x07f1);
+ } else {
+ /* larl %r1,.+14 */
+ EMIT6_PCREL_RILB(0xc0000000, REG_1, jit->prg + 14);
+ /* ex 0,S390_lowcore.br_r1_tampoline */
+ EMIT4_DISP(0x44000000, REG_0, REG_0,
+ offsetof(struct lowcore, br_r1_trampoline));
+ /* j . */
+ EMIT4_PCREL(0xa7f40000, 0);
+ }
+ }
}
/*
@@ -966,8 +1020,13 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i
/* lg %w1,<d(imm)>(%l) */
EMIT6_DISP_LH(0xe3000000, 0x0004, REG_W1, REG_0, REG_L,
EMIT_CONST_U64(func));
- /* basr %r14,%w1 */
- EMIT2(0x0d00, REG_14, REG_W1);
+ if (IS_ENABLED(CC_USING_EXPOLINE) && !nospec_disable) {
+ /* brasl %r14,__s390_indirect_jump_r1 */
+ EMIT6_PCREL_RILB(0xc0050000, REG_14, jit->r1_thunk_ip);
+ } else {
+ /* basr %r14,%w1 */
+ EMIT2(0x0d00, REG_14, REG_W1);
+ }
/* lgr %b0,%r2: load return value into %b0 */
EMIT4(0xb9040000, BPF_REG_0, REG_2);
if ((jit->seen & SEEN_SKB) &&
diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c
index 2d15d84c20ed..d387a0fbdd7e 100644
--- a/arch/s390/pci/pci_dma.c
+++ b/arch/s390/pci/pci_dma.c
@@ -668,15 +668,6 @@ void zpci_dma_exit(void)
kmem_cache_destroy(dma_region_table_cache);
}
-#define PREALLOC_DMA_DEBUG_ENTRIES (1 << 16)
-
-static int __init dma_debug_do_init(void)
-{
- dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES);
- return 0;
-}
-fs_initcall(dma_debug_do_init);
-
const struct dma_map_ops s390_pci_dma_ops = {
.alloc = s390_dma_alloc,
.free = s390_dma_free,
@@ -685,8 +676,6 @@ const struct dma_map_ops s390_pci_dma_ops = {
.map_page = s390_dma_map_pages,
.unmap_page = s390_dma_unmap_pages,
.mapping_error = s390_mapping_error,
- /* if we support direct DMA this must be conditional */
- .is_phys = 0,
/* dma_supported is unconditionally true without a callback */
};
EXPORT_SYMBOL_GPL(s390_pci_dma_ops);
diff --git a/arch/s390/purgatory/Makefile b/arch/s390/purgatory/Makefile
index e9525bc1b4a6..1ace023cbdce 100644
--- a/arch/s390/purgatory/Makefile
+++ b/arch/s390/purgatory/Makefile
@@ -21,7 +21,7 @@ LDFLAGS_purgatory.ro += -z nodefaultlib
KBUILD_CFLAGS := -fno-strict-aliasing -Wall -Wstrict-prototypes
KBUILD_CFLAGS += -Wno-pointer-sign -Wno-sign-compare
KBUILD_CFLAGS += -fno-zero-initialized-in-bss -fno-builtin -ffreestanding
-KBUILD_CFLAGS += -c -MD -Os -m64
+KBUILD_CFLAGS += -c -MD -Os -m64 -msoft-float
KBUILD_CFLAGS += $(call cc-option,-fno-PIE)
$(obj)/purgatory.ro: $(PURGATORY_OBJS) FORCE
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index 97fe29316476..a97538b607a4 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -9,11 +9,11 @@ config SUPERH
select HAVE_IDE if HAS_IOPORT_MAP
select HAVE_MEMBLOCK
select HAVE_MEMBLOCK_NODE_MAP
+ select NO_BOOTMEM
select ARCH_DISCARD_MEMBLOCK
select HAVE_OPROFILE
select HAVE_GENERIC_DMA_COHERENT
select HAVE_ARCH_TRACEHOOK
- select HAVE_DMA_API_DEBUG
select HAVE_PERF_EVENTS
select HAVE_DEBUG_BUGVERBOSE
select ARCH_HAVE_CUSTOM_GPIO_H
@@ -50,6 +50,9 @@ config SUPERH
select HAVE_ARCH_AUDITSYSCALL
select HAVE_FUTEX_CMPXCHG if FUTEX
select HAVE_NMI
+ select NEED_DMA_MAP_STATE
+ select NEED_SG_DMA_LENGTH
+
help
The SuperH is a RISC processor targeted for use in embedded systems
and consumer electronics; it was also used in the Sega Dreamcast
@@ -160,12 +163,6 @@ config DMA_COHERENT
config DMA_NONCOHERENT
def_bool !DMA_COHERENT
-config NEED_DMA_MAP_STATE
- def_bool DMA_NONCOHERENT
-
-config NEED_SG_DMA_LENGTH
- def_bool y
-
config PGTABLE_LEVELS
default 3 if X2TLB
default 2
diff --git a/arch/sh/drivers/dma/dma-api.c b/arch/sh/drivers/dma/dma-api.c
index c0eec08d8f95..b05be597b19f 100644
--- a/arch/sh/drivers/dma/dma-api.c
+++ b/arch/sh/drivers/dma/dma-api.c
@@ -339,18 +339,6 @@ static int dma_proc_show(struct seq_file *m, void *v)
return 0;
}
-static int dma_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, dma_proc_show, NULL);
-}
-
-static const struct file_operations dma_proc_fops = {
- .open = dma_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
int register_dmac(struct dma_info *info)
{
unsigned int total_channels, i;
@@ -423,7 +411,7 @@ EXPORT_SYMBOL(unregister_dmac);
static int __init dma_api_init(void)
{
printk(KERN_NOTICE "DMA: Registering DMA API.\n");
- return proc_create("dma", 0, NULL, &dma_proc_fops) ? 0 : -ENOMEM;
+ return proc_create_single("dma", 0, NULL, dma_proc_show) ? 0 : -ENOMEM;
}
subsys_initcall(dma_api_init);
diff --git a/arch/sh/include/asm/pci.h b/arch/sh/include/asm/pci.h
index 0033f0df2b3b..10a36b1cf2ea 100644
--- a/arch/sh/include/asm/pci.h
+++ b/arch/sh/include/asm/pci.h
@@ -71,12 +71,6 @@ extern unsigned long PCIBIOS_MIN_IO, PCIBIOS_MIN_MEM;
* SuperH has everything mapped statically like x86.
*/
-/* The PCI address space does equal the physical memory
- * address space. The networking and block device layers use
- * this boolean for bounce buffer decisions.
- */
-#define PCI_DMA_BUS_IS_PHYS (dma_ops->is_phys)
-
#ifdef CONFIG_PCI
/*
* None of the SH PCI controllers support MWI, it is always treated as a
diff --git a/arch/sh/kernel/cpu/sh2/probe.c b/arch/sh/kernel/cpu/sh2/probe.c
index 4205f6d42b69..a5bd03642678 100644
--- a/arch/sh/kernel/cpu/sh2/probe.c
+++ b/arch/sh/kernel/cpu/sh2/probe.c
@@ -43,7 +43,11 @@ void __ref cpu_probe(void)
#endif
#if defined(CONFIG_CPU_J2)
+#if defined(CONFIG_SMP)
unsigned cpu = hard_smp_processor_id();
+#else
+ unsigned cpu = 0;
+#endif
if (cpu == 0) of_scan_flat_dt(scan_cache, NULL);
if (j2_ccr_base) __raw_writel(0x80000303, j2_ccr_base + 4*cpu);
if (cpu != 0) return;
diff --git a/arch/sh/kernel/dma-nommu.c b/arch/sh/kernel/dma-nommu.c
index 178457d7620c..3e3a32fc676e 100644
--- a/arch/sh/kernel/dma-nommu.c
+++ b/arch/sh/kernel/dma-nommu.c
@@ -78,7 +78,6 @@ const struct dma_map_ops nommu_dma_ops = {
.sync_single_for_device = nommu_sync_single_for_device,
.sync_sg_for_device = nommu_sync_sg_for_device,
#endif
- .is_phys = 1,
};
void __init no_iommu_init(void)
diff --git a/arch/sh/kernel/hw_breakpoint.c b/arch/sh/kernel/hw_breakpoint.c
index afe965712a69..8648ed05ccf0 100644
--- a/arch/sh/kernel/hw_breakpoint.c
+++ b/arch/sh/kernel/hw_breakpoint.c
@@ -347,13 +347,8 @@ static int __kprobes hw_breakpoint_handler(struct die_args *args)
/* Deliver the signal to userspace */
if (!arch_check_bp_in_kernelspace(bp)) {
- siginfo_t info;
-
- info.si_signo = args->signr;
- info.si_errno = notifier_to_errno(rc);
- info.si_code = TRAP_HWBKPT;
-
- force_sig_info(args->signr, &info, current);
+ force_sig_fault(SIGTRAP, TRAP_HWBKPT,
+ (void __user *)NULL, current);
}
rcu_read_unlock();
diff --git a/arch/sh/kernel/irq.c b/arch/sh/kernel/irq.c
index 245dbeb20afe..5717c7cbdd97 100644
--- a/arch/sh/kernel/irq.c
+++ b/arch/sh/kernel/irq.c
@@ -44,7 +44,7 @@ int arch_show_interrupts(struct seq_file *p, int prec)
seq_printf(p, "%*s: ", prec, "NMI");
for_each_online_cpu(j)
- seq_printf(p, "%10u ", irq_stat[j].__nmi_count);
+ seq_printf(p, "%10u ", nmi_count(j));
seq_printf(p, " Non-maskable interrupts\n");
seq_printf(p, "%*s: %10u\n", prec, "ERR", atomic_read(&irq_err_count));
diff --git a/arch/sh/kernel/setup.c b/arch/sh/kernel/setup.c
index d34e998b809f..c286cf5da6e7 100644
--- a/arch/sh/kernel/setup.c
+++ b/arch/sh/kernel/setup.c
@@ -11,7 +11,6 @@
#include <linux/ioport.h>
#include <linux/init.h>
#include <linux/initrd.h>
-#include <linux/bootmem.h>
#include <linux/console.h>
#include <linux/root_dev.h>
#include <linux/utsname.h>
diff --git a/arch/sh/kernel/traps_32.c b/arch/sh/kernel/traps_32.c
index b3770bb26211..60709ad17fc7 100644
--- a/arch/sh/kernel/traps_32.c
+++ b/arch/sh/kernel/traps_32.c
@@ -477,7 +477,6 @@ asmlinkage void do_address_error(struct pt_regs *regs,
{
unsigned long error_code = 0;
mm_segment_t oldfs;
- siginfo_t info;
insn_size_t instruction;
int tmp;
@@ -537,11 +536,7 @@ uspace_segv:
"access (PC %lx PR %lx)\n", current->comm, regs->pc,
regs->pr);
- info.si_signo = SIGBUS;
- info.si_errno = 0;
- info.si_code = si_code;
- info.si_addr = (void __user *)address;
- force_sig_info(SIGBUS, &info, current);
+ force_sig_fault(SIGBUS, si_code, (void __user *)address, current);
} else {
inc_unaligned_kernel_access();
@@ -598,19 +593,20 @@ int is_dsp_inst(struct pt_regs *regs)
#ifdef CONFIG_CPU_SH2A
asmlinkage void do_divide_error(unsigned long r4)
{
- siginfo_t info;
+ int code;
switch (r4) {
case TRAP_DIVZERO_ERROR:
- info.si_code = FPE_INTDIV;
+ code = FPE_INTDIV;
break;
case TRAP_DIVOVF_ERROR:
- info.si_code = FPE_INTOVF;
+ code = FPE_INTOVF;
break;
+ default:
+ /* Let gcc know unhandled cases don't make it past here */
+ return;
}
-
- info.si_signo = SIGFPE;
- force_sig_info(info.si_signo, &info, current);
+ force_sig_fault(SIGFPE, code, NULL, current);
}
#endif
diff --git a/arch/sh/math-emu/math.c b/arch/sh/math-emu/math.c
index c86f4360c6ce..a0fa8fc88739 100644
--- a/arch/sh/math-emu/math.c
+++ b/arch/sh/math-emu/math.c
@@ -507,7 +507,6 @@ static int ieee_fpe_handler(struct pt_regs *regs)
unsigned short insn = *(unsigned short *)regs->pc;
unsigned short finsn;
unsigned long nextpc;
- siginfo_t info;
int nib[4] = {
(insn >> 12) & 0xf,
(insn >> 8) & 0xf,
@@ -560,11 +559,8 @@ static int ieee_fpe_handler(struct pt_regs *regs)
~(FPSCR_CAUSE_MASK | FPSCR_FLAG_MASK);
task_thread_info(tsk)->status |= TS_USEDFPU;
} else {
- info.si_signo = SIGFPE;
- info.si_errno = 0;
- info.si_code = FPE_FLTINV;
- info.si_addr = (void __user *)regs->pc;
- force_sig_info(SIGFPE, &info, tsk);
+ force_sig_fault(SIGFPE, FPE_FLTINV,
+ (void __user *)regs->pc, tsk);
}
regs->pc = nextpc;
diff --git a/arch/sh/mm/consistent.c b/arch/sh/mm/consistent.c
index 8ce98691d822..fceb2adfcac7 100644
--- a/arch/sh/mm/consistent.c
+++ b/arch/sh/mm/consistent.c
@@ -20,18 +20,9 @@
#include <asm/cacheflush.h>
#include <asm/addrspace.h>
-#define PREALLOC_DMA_DEBUG_ENTRIES 4096
-
const struct dma_map_ops *dma_ops;
EXPORT_SYMBOL(dma_ops);
-static int __init dma_init(void)
-{
- dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES);
- return 0;
-}
-fs_initcall(dma_init);
-
void *dma_generic_alloc_coherent(struct device *dev, size_t size,
dma_addr_t *dma_handle, gfp_t gfp,
unsigned long attrs)
@@ -59,7 +50,9 @@ void *dma_generic_alloc_coherent(struct device *dev, size_t size,
split_page(pfn_to_page(virt_to_phys(ret) >> PAGE_SHIFT), order);
- *dma_handle = virt_to_phys(ret) - PFN_PHYS(dev->dma_pfn_offset);
+ *dma_handle = virt_to_phys(ret);
+ if (!WARN_ON(!dev))
+ *dma_handle -= PFN_PHYS(dev->dma_pfn_offset);
return ret_nocache;
}
@@ -69,9 +62,12 @@ void dma_generic_free_coherent(struct device *dev, size_t size,
unsigned long attrs)
{
int order = get_order(size);
- unsigned long pfn = (dma_handle >> PAGE_SHIFT) + dev->dma_pfn_offset;
+ unsigned long pfn = dma_handle >> PAGE_SHIFT;
int k;
+ if (!WARN_ON(!dev))
+ pfn += dev->dma_pfn_offset;
+
for (k = 0; k < (1 << order); k++)
__free_pages(pfn_to_page(pfn + k), 0);
@@ -143,7 +139,7 @@ int __init platform_resource_setup_memory(struct platform_device *pdev,
if (!memsize)
return 0;
- buf = dma_alloc_coherent(NULL, memsize, &dma_handle, GFP_KERNEL);
+ buf = dma_alloc_coherent(&pdev->dev, memsize, &dma_handle, GFP_KERNEL);
if (!buf) {
pr_warning("%s: unable to allocate memory\n", name);
return -ENOMEM;
diff --git a/arch/sh/mm/fault.c b/arch/sh/mm/fault.c
index 6fd1bf7481c7..b8e7bb84b6b1 100644
--- a/arch/sh/mm/fault.c
+++ b/arch/sh/mm/fault.c
@@ -42,14 +42,7 @@ static void
force_sig_info_fault(int si_signo, int si_code, unsigned long address,
struct task_struct *tsk)
{
- siginfo_t info;
-
- info.si_signo = si_signo;
- info.si_errno = 0;
- info.si_code = si_code;
- info.si_addr = (void __user *)address;
-
- force_sig_info(si_signo, &info, tsk);
+ force_sig_fault(si_signo, si_code, (void __user *)address, tsk);
}
/*
diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c
index ce0bbaa7e404..4034035fbede 100644
--- a/arch/sh/mm/init.c
+++ b/arch/sh/mm/init.c
@@ -211,59 +211,15 @@ void __init allocate_pgdat(unsigned int nid)
NODE_DATA(nid) = __va(phys);
memset(NODE_DATA(nid), 0, sizeof(struct pglist_data));
-
- NODE_DATA(nid)->bdata = &bootmem_node_data[nid];
#endif
NODE_DATA(nid)->node_start_pfn = start_pfn;
NODE_DATA(nid)->node_spanned_pages = end_pfn - start_pfn;
}
-static void __init bootmem_init_one_node(unsigned int nid)
-{
- unsigned long total_pages, paddr;
- unsigned long end_pfn;
- struct pglist_data *p;
-
- p = NODE_DATA(nid);
-
- /* Nothing to do.. */
- if (!p->node_spanned_pages)
- return;
-
- end_pfn = pgdat_end_pfn(p);
-
- total_pages = bootmem_bootmap_pages(p->node_spanned_pages);
-
- paddr = memblock_alloc(total_pages << PAGE_SHIFT, PAGE_SIZE);
- if (!paddr)
- panic("Can't allocate bootmap for nid[%d]\n", nid);
-
- init_bootmem_node(p, paddr >> PAGE_SHIFT, p->node_start_pfn, end_pfn);
-
- free_bootmem_with_active_regions(nid, end_pfn);
-
- /*
- * XXX Handle initial reservations for the system memory node
- * only for the moment, we'll refactor this later for handling
- * reservations in other nodes.
- */
- if (nid == 0) {
- struct memblock_region *reg;
-
- /* Reserve the sections we're already using. */
- for_each_memblock(reserved, reg) {
- reserve_bootmem(reg->base, reg->size, BOOTMEM_DEFAULT);
- }
- }
-
- sparse_memory_present_with_active_regions(nid);
-}
-
static void __init do_init_bootmem(void)
{
struct memblock_region *reg;
- int i;
/* Add active regions with valid PFNs. */
for_each_memblock(memory, reg) {
@@ -279,9 +235,12 @@ static void __init do_init_bootmem(void)
plat_mem_setup();
- for_each_online_node(i)
- bootmem_init_one_node(i);
+ for_each_memblock(memory, reg) {
+ int nid = memblock_get_region_node(reg);
+ memory_present(nid, memblock_region_memory_base_pfn(reg),
+ memblock_region_memory_end_pfn(reg));
+ }
sparse_init();
}
@@ -322,7 +281,6 @@ void __init paging_init(void)
{
unsigned long max_zone_pfns[MAX_NR_ZONES];
unsigned long vaddr, end;
- int nid;
sh_mv.mv_mem_init();
@@ -377,21 +335,7 @@ void __init paging_init(void)
kmap_coherent_init();
memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
-
- for_each_online_node(nid) {
- pg_data_t *pgdat = NODE_DATA(nid);
- unsigned long low, start_pfn;
-
- start_pfn = pgdat->bdata->node_min_pfn;
- low = pgdat->bdata->node_low_pfn;
-
- if (max_zone_pfns[ZONE_NORMAL] < low)
- max_zone_pfns[ZONE_NORMAL] = low;
-
- printk("Node %u: start_pfn = 0x%lx, low = 0x%lx\n",
- nid, start_pfn, low);
- }
-
+ max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
free_area_init_nodes(max_zone_pfns);
}
diff --git a/arch/sh/mm/numa.c b/arch/sh/mm/numa.c
index 05713d190247..830e8b3684e4 100644
--- a/arch/sh/mm/numa.c
+++ b/arch/sh/mm/numa.c
@@ -8,7 +8,6 @@
* for more details.
*/
#include <linux/module.h>
-#include <linux/bootmem.h>
#include <linux/memblock.h>
#include <linux/mm.h>
#include <linux/numa.h>
@@ -26,9 +25,7 @@ EXPORT_SYMBOL_GPL(node_data);
*/
void __init setup_bootmem_node(int nid, unsigned long start, unsigned long end)
{
- unsigned long bootmap_pages;
unsigned long start_pfn, end_pfn;
- unsigned long bootmem_paddr;
/* Don't allow bogus node assignment */
BUG_ON(nid >= MAX_NUMNODES || nid <= 0);
@@ -48,25 +45,9 @@ void __init setup_bootmem_node(int nid, unsigned long start, unsigned long end)
SMP_CACHE_BYTES, end));
memset(NODE_DATA(nid), 0, sizeof(struct pglist_data));
- NODE_DATA(nid)->bdata = &bootmem_node_data[nid];
NODE_DATA(nid)->node_start_pfn = start_pfn;
NODE_DATA(nid)->node_spanned_pages = end_pfn - start_pfn;
- /* Node-local bootmap */
- bootmap_pages = bootmem_bootmap_pages(end_pfn - start_pfn);
- bootmem_paddr = memblock_alloc_base(bootmap_pages << PAGE_SHIFT,
- PAGE_SIZE, end);
- init_bootmem_node(NODE_DATA(nid), bootmem_paddr >> PAGE_SHIFT,
- start_pfn, end_pfn);
-
- free_bootmem_with_active_regions(nid, end_pfn);
-
- /* Reserve the pgdat and bootmap space with the bootmem allocator */
- reserve_bootmem_node(NODE_DATA(nid), start_pfn << PAGE_SHIFT,
- sizeof(struct pglist_data), BOOTMEM_DEFAULT);
- reserve_bootmem_node(NODE_DATA(nid), bootmem_paddr,
- bootmap_pages << PAGE_SHIFT, BOOTMEM_DEFAULT);
-
/* It's up */
node_set_online(nid);
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index 8767e45f1b2b..435dbc033afe 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -25,7 +25,6 @@ config SPARC
select RTC_CLASS
select RTC_DRV_M48T59
select RTC_SYSTOHC
- select HAVE_DMA_API_DEBUG
select HAVE_ARCH_JUMP_LABEL if SPARC64
select GENERIC_IRQ_SHOW
select ARCH_WANT_IPC_PARSE_VERSION
@@ -44,6 +43,8 @@ config SPARC
select ARCH_HAS_SG_CHAIN
select CPU_NO_EFFICIENT_FFS
select LOCKDEP_SMALL if LOCKDEP
+ select NEED_DMA_MAP_STATE
+ select NEED_SG_DMA_LENGTH
config SPARC32
def_bool !64BIT
@@ -67,6 +68,7 @@ config SPARC64
select HAVE_SYSCALL_TRACEPOINTS
select HAVE_CONTEXT_TRACKING
select HAVE_DEBUG_KMEMLEAK
+ select IOMMU_HELPER
select SPARSE_IRQ
select RTC_DRV_CMOS
select RTC_DRV_BQ4802
@@ -102,14 +104,6 @@ config ARCH_ATU
bool
default y if SPARC64
-config ARCH_DMA_ADDR_T_64BIT
- bool
- default y if ARCH_ATU
-
-config IOMMU_HELPER
- bool
- default y if SPARC64
-
config STACKTRACE_SUPPORT
bool
default y if SPARC64
@@ -146,12 +140,6 @@ config ZONE_DMA
bool
default y if SPARC32
-config NEED_DMA_MAP_STATE
- def_bool y
-
-config NEED_SG_DMA_LENGTH
- def_bool y
-
config GENERIC_ISA_DMA
bool
default y if SPARC32
diff --git a/arch/sparc/include/asm/hardirq_64.h b/arch/sparc/include/asm/hardirq_64.h
index f56540271993..75b92bfe04b5 100644
--- a/arch/sparc/include/asm/hardirq_64.h
+++ b/arch/sparc/include/asm/hardirq_64.h
@@ -10,8 +10,9 @@
#include <asm/cpudata.h>
#define __ARCH_IRQ_STAT
-#define local_softirq_pending() \
- (local_cpu_data().__softirq_pending)
+
+#define local_softirq_pending_ref \
+ __cpu_data.__softirq_pending
void ack_bad_irq(unsigned int irq);
diff --git a/include/linux/iommu-common.h b/arch/sparc/include/asm/iommu-common.h
index 802c90c79d1f..802c90c79d1f 100644
--- a/include/linux/iommu-common.h
+++ b/arch/sparc/include/asm/iommu-common.h
diff --git a/arch/sparc/include/asm/iommu_64.h b/arch/sparc/include/asm/iommu_64.h
index 9ed6b54caa4b..0ef6dedf747e 100644
--- a/arch/sparc/include/asm/iommu_64.h
+++ b/arch/sparc/include/asm/iommu_64.h
@@ -17,7 +17,7 @@
#define IOPTE_WRITE 0x0000000000000002UL
#define IOMMU_NUM_CTXS 4096
-#include <linux/iommu-common.h>
+#include <asm/iommu-common.h>
struct iommu_arena {
unsigned long *map;
diff --git a/arch/sparc/include/asm/pci_32.h b/arch/sparc/include/asm/pci_32.h
index 98917e48727d..cfc0ee9476c6 100644
--- a/arch/sparc/include/asm/pci_32.h
+++ b/arch/sparc/include/asm/pci_32.h
@@ -17,10 +17,6 @@
#define PCI_IRQ_NONE 0xffffffff
-/* Dynamic DMA mapping stuff.
- */
-#define PCI_DMA_BUS_IS_PHYS (0)
-
#endif /* __KERNEL__ */
#ifndef CONFIG_LEON_PCI
diff --git a/arch/sparc/include/asm/pci_64.h b/arch/sparc/include/asm/pci_64.h
index 671274e36cfa..fac77813402c 100644
--- a/arch/sparc/include/asm/pci_64.h
+++ b/arch/sparc/include/asm/pci_64.h
@@ -17,12 +17,6 @@
#define PCI_IRQ_NONE 0xffffffff
-/* The PCI address space does not equal the physical memory
- * address space. The networking and block device layers use
- * this boolean for bounce buffer decisions.
- */
-#define PCI_DMA_BUS_IS_PHYS (0)
-
/* PCI IOMMU mapping bypass support. */
/* PCI 64-bit addressing works for all slots on all controller
diff --git a/arch/sparc/include/uapi/asm/jsflash.h b/arch/sparc/include/uapi/asm/jsflash.h
deleted file mode 100644
index 68c98a54281a..000000000000
--- a/arch/sparc/include/uapi/asm/jsflash.h
+++ /dev/null
@@ -1,40 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-/*
- * jsflash.h: OS Flash SIMM support for JavaStations.
- *
- * Copyright (C) 1999 Pete Zaitcev
- */
-
-#ifndef _SPARC_JSFLASH_H
-#define _SPARC_JSFLASH_H
-
-#ifndef _SPARC_TYPES_H
-#include <linux/types.h>
-#endif
-
-/*
- * Semantics of the offset is a full address.
- * Hardcode it or get it from probe ioctl.
- *
- * We use full bus address, so that we would be
- * automatically compatible with possible future systems.
- */
-
-#define JSFLASH_IDENT (('F'<<8)|54)
-struct jsflash_ident_arg {
- __u64 off; /* 0x20000000 is included */
- __u32 size;
- char name[32]; /* With trailing zero */
-};
-
-#define JSFLASH_ERASE (('F'<<8)|55)
-/* Put 0 as argument, may be flags or sector number... */
-
-#define JSFLASH_PROGRAM (('F'<<8)|56)
-struct jsflash_program_arg {
- __u64 data; /* char* for sparc and sparc64 */
- __u64 off;
- __u32 size;
-};
-
-#endif /* _SPARC_JSFLASH_H */
diff --git a/arch/sparc/include/uapi/asm/oradax.h b/arch/sparc/include/uapi/asm/oradax.h
index 722951908b0a..4f6676fe4bcc 100644
--- a/arch/sparc/include/uapi/asm/oradax.h
+++ b/arch/sparc/include/uapi/asm/oradax.h
@@ -3,7 +3,7 @@
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
+ * the Free Software Foundation, either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
diff --git a/arch/sparc/include/uapi/asm/siginfo.h b/arch/sparc/include/uapi/asm/siginfo.h
index 896ce447d16a..e7049550ac82 100644
--- a/arch/sparc/include/uapi/asm/siginfo.h
+++ b/arch/sparc/include/uapi/asm/siginfo.h
@@ -18,13 +18,6 @@
#define SI_NOINFO 32767 /* no information in siginfo_t */
/*
- * SIGFPE si_codes
- */
-#ifdef __KERNEL__
-#define FPE_FIXME 0 /* Broken dup of SI_USER */
-#endif /* __KERNEL__ */
-
-/*
* SIGEMT si_codes
*/
#define EMT_TAGOVF 1 /* tag overflow */
diff --git a/arch/sparc/kernel/Makefile b/arch/sparc/kernel/Makefile
index 76cb57750dda..cf8640841b7a 100644
--- a/arch/sparc/kernel/Makefile
+++ b/arch/sparc/kernel/Makefile
@@ -59,7 +59,7 @@ obj-$(CONFIG_SPARC32) += leon_pmc.o
obj-$(CONFIG_SPARC64) += reboot.o
obj-$(CONFIG_SPARC64) += sysfs.o
-obj-$(CONFIG_SPARC64) += iommu.o
+obj-$(CONFIG_SPARC64) += iommu.o iommu-common.o
obj-$(CONFIG_SPARC64) += central.o
obj-$(CONFIG_SPARC64) += starfire.o
obj-$(CONFIG_SPARC64) += power.o
@@ -74,8 +74,6 @@ obj-$(CONFIG_SPARC64) += pcr.o
obj-$(CONFIG_SPARC64) += nmi.o
obj-$(CONFIG_SPARC64_SMP) += cpumap.o
-obj-y += dma.o
-
obj-$(CONFIG_PCIC_PCI) += pcic.o
obj-$(CONFIG_LEON_PCI) += leon_pci.o
obj-$(CONFIG_SPARC_GRPCI2)+= leon_pci_grpci2.o
diff --git a/arch/sparc/kernel/dma.c b/arch/sparc/kernel/dma.c
deleted file mode 100644
index f73e7597c971..000000000000
--- a/arch/sparc/kernel/dma.c
+++ /dev/null
@@ -1,13 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <linux/kernel.h>
-#include <linux/dma-mapping.h>
-#include <linux/dma-debug.h>
-
-#define PREALLOC_DMA_DEBUG_ENTRIES (1 << 15)
-
-static int __init dma_init(void)
-{
- dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES);
- return 0;
-}
-fs_initcall(dma_init);
diff --git a/lib/iommu-common.c b/arch/sparc/kernel/iommu-common.c
index 55b00de106b5..59cb16691322 100644
--- a/lib/iommu-common.c
+++ b/arch/sparc/kernel/iommu-common.c
@@ -8,9 +8,9 @@
#include <linux/bitmap.h>
#include <linux/bug.h>
#include <linux/iommu-helper.h>
-#include <linux/iommu-common.h>
#include <linux/dma-mapping.h>
#include <linux/hash.h>
+#include <asm/iommu-common.h>
static unsigned long iommu_large_alloc = 15;
@@ -93,7 +93,6 @@ void iommu_tbl_pool_init(struct iommu_map_table *iommu,
p->hint = p->start;
p->end = num_entries;
}
-EXPORT_SYMBOL(iommu_tbl_pool_init);
unsigned long iommu_tbl_range_alloc(struct device *dev,
struct iommu_map_table *iommu,
@@ -224,7 +223,6 @@ bail:
return n;
}
-EXPORT_SYMBOL(iommu_tbl_range_alloc);
static struct iommu_pool *get_pool(struct iommu_map_table *tbl,
unsigned long entry)
@@ -264,4 +262,3 @@ void iommu_tbl_range_free(struct iommu_map_table *iommu, u64 dma_addr,
bitmap_clear(iommu->map, entry, npages);
spin_unlock_irqrestore(&(pool->lock), flags);
}
-EXPORT_SYMBOL(iommu_tbl_range_free);
diff --git a/arch/sparc/kernel/iommu.c b/arch/sparc/kernel/iommu.c
index b08dc3416f06..40d008b0bd3e 100644
--- a/arch/sparc/kernel/iommu.c
+++ b/arch/sparc/kernel/iommu.c
@@ -14,7 +14,7 @@
#include <linux/errno.h>
#include <linux/iommu-helper.h>
#include <linux/bitmap.h>
-#include <linux/iommu-common.h>
+#include <asm/iommu-common.h>
#ifdef CONFIG_PCI
#include <linux/pci.h>
diff --git a/arch/sparc/kernel/ioport.c b/arch/sparc/kernel/ioport.c
index 3bcef9ce74df..cca9134cfa7d 100644
--- a/arch/sparc/kernel/ioport.c
+++ b/arch/sparc/kernel/ioport.c
@@ -678,25 +678,14 @@ static int sparc_io_proc_show(struct seq_file *m, void *v)
return 0;
}
-
-static int sparc_io_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, sparc_io_proc_show, PDE_DATA(inode));
-}
-
-static const struct file_operations sparc_io_proc_fops = {
- .owner = THIS_MODULE,
- .open = sparc_io_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
#endif /* CONFIG_PROC_FS */
static void register_proc_sparc_ioport(void)
{
#ifdef CONFIG_PROC_FS
- proc_create_data("io_map", 0, NULL, &sparc_io_proc_fops, &sparc_iomap);
- proc_create_data("dvma_map", 0, NULL, &sparc_io_proc_fops, &_sparc_dvma);
+ proc_create_single_data("io_map", 0, NULL, sparc_io_proc_show,
+ &sparc_iomap);
+ proc_create_single_data("dvma_map", 0, NULL, sparc_io_proc_show,
+ &_sparc_dvma);
#endif
}
diff --git a/arch/sparc/kernel/ldc.c b/arch/sparc/kernel/ldc.c
index 86b625f9d8dc..c0fa3ef6cf01 100644
--- a/arch/sparc/kernel/ldc.c
+++ b/arch/sparc/kernel/ldc.c
@@ -16,7 +16,7 @@
#include <linux/list.h>
#include <linux/init.h>
#include <linux/bitmap.h>
-#include <linux/iommu-common.h>
+#include <asm/iommu-common.h>
#include <asm/hypervisor.h>
#include <asm/iommu.h>
diff --git a/arch/sparc/kernel/pci_sun4v.c b/arch/sparc/kernel/pci_sun4v.c
index 249367228c33..565d9ac883d0 100644
--- a/arch/sparc/kernel/pci_sun4v.c
+++ b/arch/sparc/kernel/pci_sun4v.c
@@ -16,7 +16,7 @@
#include <linux/export.h>
#include <linux/log2.h>
#include <linux/of_device.h>
-#include <linux/iommu-common.h>
+#include <asm/iommu-common.h>
#include <asm/iommu.h>
#include <asm/irq.h>
diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c
index 454a8af28f13..6c086086ca8f 100644
--- a/arch/sparc/kernel/process_64.c
+++ b/arch/sparc/kernel/process_64.c
@@ -518,14 +518,7 @@ void synchronize_user_stack(void)
static void stack_unaligned(unsigned long sp)
{
- siginfo_t info;
-
- info.si_signo = SIGBUS;
- info.si_errno = 0;
- info.si_code = BUS_ADRALN;
- info.si_addr = (void __user *) sp;
- info.si_trapno = 0;
- force_sig_info(SIGBUS, &info, current);
+ force_sig_fault(SIGBUS, BUS_ADRALN, (void __user *) sp, 0, current);
}
void fault_in_user_windows(void)
diff --git a/arch/sparc/kernel/sys_sparc_32.c b/arch/sparc/kernel/sys_sparc_32.c
index e8c3cb6b6d08..7f3d9c59719a 100644
--- a/arch/sparc/kernel/sys_sparc_32.c
+++ b/arch/sparc/kernel/sys_sparc_32.c
@@ -147,17 +147,11 @@ SYSCALL_DEFINE0(nis_syscall)
asmlinkage void
sparc_breakpoint (struct pt_regs *regs)
{
- siginfo_t info;
#ifdef DEBUG_SPARC_BREAKPOINT
printk ("TRAP: Entering kernel PC=%x, nPC=%x\n", regs->pc, regs->npc);
#endif
- info.si_signo = SIGTRAP;
- info.si_errno = 0;
- info.si_code = TRAP_BRKPT;
- info.si_addr = (void __user *)regs->pc;
- info.si_trapno = 0;
- force_sig_info(SIGTRAP, &info, current);
+ force_sig_fault(SIGTRAP, TRAP_BRKPT, (void __user *)regs->pc, 0, current);
#ifdef DEBUG_SPARC_BREAKPOINT
printk ("TRAP: Returning to space: PC=%x nPC=%x\n", regs->pc, regs->npc);
diff --git a/arch/sparc/kernel/sys_sparc_64.c b/arch/sparc/kernel/sys_sparc_64.c
index 9ef8de63f28b..7e49bbc925a5 100644
--- a/arch/sparc/kernel/sys_sparc_64.c
+++ b/arch/sparc/kernel/sys_sparc_64.c
@@ -502,7 +502,6 @@ SYSCALL_DEFINE0(nis_syscall)
asmlinkage void sparc_breakpoint(struct pt_regs *regs)
{
enum ctx_state prev_state = exception_enter();
- siginfo_t info;
if (test_thread_flag(TIF_32BIT)) {
regs->tpc &= 0xffffffff;
@@ -511,12 +510,7 @@ asmlinkage void sparc_breakpoint(struct pt_regs *regs)
#ifdef DEBUG_SPARC_BREAKPOINT
printk ("TRAP: Entering kernel PC=%lx, nPC=%lx\n", regs->tpc, regs->tnpc);
#endif
- info.si_signo = SIGTRAP;
- info.si_errno = 0;
- info.si_code = TRAP_BRKPT;
- info.si_addr = (void __user *)regs->tpc;
- info.si_trapno = 0;
- force_sig_info(SIGTRAP, &info, current);
+ force_sig_fault(SIGTRAP, TRAP_BRKPT, (void __user *)regs->tpc, 0, current);
#ifdef DEBUG_SPARC_BREAKPOINT
printk ("TRAP: Returning to space: PC=%lx nPC=%lx\n", regs->tpc, regs->tnpc);
#endif
diff --git a/arch/sparc/kernel/traps_32.c b/arch/sparc/kernel/traps_32.c
index b1ed763e4787..bcdfc6168dd5 100644
--- a/arch/sparc/kernel/traps_32.c
+++ b/arch/sparc/kernel/traps_32.c
@@ -93,8 +93,6 @@ void __noreturn die_if_kernel(char *str, struct pt_regs *regs)
void do_hw_interrupt(struct pt_regs *regs, unsigned long type)
{
- siginfo_t info;
-
if(type < 0x80) {
/* Sun OS's puke from bad traps, Linux survives! */
printk("Unimplemented Sparc TRAP, type = %02lx\n", type);
@@ -104,19 +102,13 @@ void do_hw_interrupt(struct pt_regs *regs, unsigned long type)
if(regs->psr & PSR_PS)
die_if_kernel("Kernel bad trap", regs);
- info.si_signo = SIGILL;
- info.si_errno = 0;
- info.si_code = ILL_ILLTRP;
- info.si_addr = (void __user *)regs->pc;
- info.si_trapno = type - 0x80;
- force_sig_info(SIGILL, &info, current);
+ force_sig_fault(SIGILL, ILL_ILLTRP,
+ (void __user *)regs->pc, type - 0x80, current);
}
void do_illegal_instruction(struct pt_regs *regs, unsigned long pc, unsigned long npc,
unsigned long psr)
{
- siginfo_t info;
-
if(psr & PSR_PS)
die_if_kernel("Kernel illegal instruction", regs);
#ifdef TRAP_DEBUG
@@ -124,27 +116,15 @@ void do_illegal_instruction(struct pt_regs *regs, unsigned long pc, unsigned lon
regs->pc, *(unsigned long *)regs->pc);
#endif
- info.si_signo = SIGILL;
- info.si_errno = 0;
- info.si_code = ILL_ILLOPC;
- info.si_addr = (void __user *)pc;
- info.si_trapno = 0;
- send_sig_info(SIGILL, &info, current);
+ send_sig_fault(SIGILL, ILL_ILLOPC, (void __user *)pc, 0, current);
}
void do_priv_instruction(struct pt_regs *regs, unsigned long pc, unsigned long npc,
unsigned long psr)
{
- siginfo_t info;
-
if(psr & PSR_PS)
die_if_kernel("Penguin instruction from Penguin mode??!?!", regs);
- info.si_signo = SIGILL;
- info.si_errno = 0;
- info.si_code = ILL_PRVOPC;
- info.si_addr = (void __user *)pc;
- info.si_trapno = 0;
- send_sig_info(SIGILL, &info, current);
+ send_sig_fault(SIGILL, ILL_PRVOPC, (void __user *)pc, 0, current);
}
/* XXX User may want to be allowed to do this. XXX */
@@ -152,8 +132,6 @@ void do_priv_instruction(struct pt_regs *regs, unsigned long pc, unsigned long n
void do_memaccess_unaligned(struct pt_regs *regs, unsigned long pc, unsigned long npc,
unsigned long psr)
{
- siginfo_t info;
-
if(regs->psr & PSR_PS) {
printk("KERNEL MNA at pc %08lx npc %08lx called by %08lx\n", pc, npc,
regs->u_regs[UREG_RETPC]);
@@ -165,12 +143,9 @@ void do_memaccess_unaligned(struct pt_regs *regs, unsigned long pc, unsigned lon
instruction_dump ((unsigned long *) regs->pc);
printk ("do_MNA!\n");
#endif
- info.si_signo = SIGBUS;
- info.si_errno = 0;
- info.si_code = BUS_ADRALN;
- info.si_addr = /* FIXME: Should dig out mna address */ (void *)0;
- info.si_trapno = 0;
- send_sig_info(SIGBUS, &info, current);
+ send_sig_fault(SIGBUS, BUS_ADRALN,
+ /* FIXME: Should dig out mna address */ (void *)0,
+ 0, current);
}
static unsigned long init_fsr = 0x0UL;
@@ -226,9 +201,9 @@ void do_fpe_trap(struct pt_regs *regs, unsigned long pc, unsigned long npc,
unsigned long psr)
{
static int calls;
- siginfo_t info;
unsigned long fsr;
int ret = 0;
+ int code;
#ifndef CONFIG_SMP
struct task_struct *fpt = last_task_used_math;
#else
@@ -303,24 +278,20 @@ void do_fpe_trap(struct pt_regs *regs, unsigned long pc, unsigned long npc,
}
fsr = fpt->thread.fsr;
- info.si_signo = SIGFPE;
- info.si_errno = 0;
- info.si_addr = (void __user *)pc;
- info.si_trapno = 0;
- info.si_code = FPE_FIXME;
+ code = FPE_FLTUNK;
if ((fsr & 0x1c000) == (1 << 14)) {
if (fsr & 0x10)
- info.si_code = FPE_FLTINV;
+ code = FPE_FLTINV;
else if (fsr & 0x08)
- info.si_code = FPE_FLTOVF;
+ code = FPE_FLTOVF;
else if (fsr & 0x04)
- info.si_code = FPE_FLTUND;
+ code = FPE_FLTUND;
else if (fsr & 0x02)
- info.si_code = FPE_FLTDIV;
+ code = FPE_FLTDIV;
else if (fsr & 0x01)
- info.si_code = FPE_FLTRES;
+ code = FPE_FLTRES;
}
- send_sig_info(SIGFPE, &info, fpt);
+ send_sig_fault(SIGFPE, code, (void __user *)pc, 0, fpt);
#ifndef CONFIG_SMP
last_task_used_math = NULL;
#endif
@@ -332,16 +303,9 @@ void do_fpe_trap(struct pt_regs *regs, unsigned long pc, unsigned long npc,
void handle_tag_overflow(struct pt_regs *regs, unsigned long pc, unsigned long npc,
unsigned long psr)
{
- siginfo_t info;
-
if(psr & PSR_PS)
die_if_kernel("Penguin overflow trap from kernel mode", regs);
- info.si_signo = SIGEMT;
- info.si_errno = 0;
- info.si_code = EMT_TAGOVF;
- info.si_addr = (void __user *)pc;
- info.si_trapno = 0;
- send_sig_info(SIGEMT, &info, current);
+ send_sig_fault(SIGEMT, EMT_TAGOVF, (void __user *)pc, 0, current);
}
void handle_watchpoint(struct pt_regs *regs, unsigned long pc, unsigned long npc,
@@ -359,61 +323,33 @@ void handle_watchpoint(struct pt_regs *regs, unsigned long pc, unsigned long npc
void handle_reg_access(struct pt_regs *regs, unsigned long pc, unsigned long npc,
unsigned long psr)
{
- siginfo_t info;
-
#ifdef TRAP_DEBUG
printk("Register Access Exception at PC %08lx NPC %08lx PSR %08lx\n",
pc, npc, psr);
#endif
- info.si_signo = SIGBUS;
- info.si_errno = 0;
- info.si_code = BUS_OBJERR;
- info.si_addr = (void __user *)pc;
- info.si_trapno = 0;
- force_sig_info(SIGBUS, &info, current);
+ force_sig_fault(SIGBUS, BUS_OBJERR, (void __user *)pc, 0, current);
}
void handle_cp_disabled(struct pt_regs *regs, unsigned long pc, unsigned long npc,
unsigned long psr)
{
- siginfo_t info;
-
- info.si_signo = SIGILL;
- info.si_errno = 0;
- info.si_code = ILL_COPROC;
- info.si_addr = (void __user *)pc;
- info.si_trapno = 0;
- send_sig_info(SIGILL, &info, current);
+ send_sig_fault(SIGILL, ILL_COPROC, (void __user *)pc, 0, current);
}
void handle_cp_exception(struct pt_regs *regs, unsigned long pc, unsigned long npc,
unsigned long psr)
{
- siginfo_t info;
-
#ifdef TRAP_DEBUG
printk("Co-Processor Exception at PC %08lx NPC %08lx PSR %08lx\n",
pc, npc, psr);
#endif
- info.si_signo = SIGILL;
- info.si_errno = 0;
- info.si_code = ILL_COPROC;
- info.si_addr = (void __user *)pc;
- info.si_trapno = 0;
- send_sig_info(SIGILL, &info, current);
+ send_sig_fault(SIGILL, ILL_COPROC, (void __user *)pc, 0, current);
}
void handle_hw_divzero(struct pt_regs *regs, unsigned long pc, unsigned long npc,
unsigned long psr)
{
- siginfo_t info;
-
- info.si_signo = SIGFPE;
- info.si_errno = 0;
- info.si_code = FPE_INTDIV;
- info.si_addr = (void __user *)pc;
- info.si_trapno = 0;
- send_sig_info(SIGFPE, &info, current);
+ send_sig_fault(SIGFPE, FPE_INTDIV, (void __user *)pc, 0, current);
}
#ifdef CONFIG_DEBUG_BUGVERBOSE
diff --git a/arch/sparc/kernel/traps_64.c b/arch/sparc/kernel/traps_64.c
index 462a21abd105..aa624ed79db1 100644
--- a/arch/sparc/kernel/traps_64.c
+++ b/arch/sparc/kernel/traps_64.c
@@ -87,7 +87,6 @@ static void dump_tl1_traplog(struct tl1_traplog *p)
void bad_trap(struct pt_regs *regs, long lvl)
{
char buffer[36];
- siginfo_t info;
if (notify_die(DIE_TRAP, "bad trap", regs,
0, lvl, SIGTRAP) == NOTIFY_STOP)
@@ -107,12 +106,8 @@ void bad_trap(struct pt_regs *regs, long lvl)
regs->tpc &= 0xffffffff;
regs->tnpc &= 0xffffffff;
}
- info.si_signo = SIGILL;
- info.si_errno = 0;
- info.si_code = ILL_ILLTRP;
- info.si_addr = (void __user *)regs->tpc;
- info.si_trapno = lvl;
- force_sig_info(SIGILL, &info, current);
+ force_sig_fault(SIGILL, ILL_ILLTRP,
+ (void __user *)regs->tpc, lvl, current);
}
void bad_trap_tl1(struct pt_regs *regs, long lvl)
@@ -191,7 +186,6 @@ EXPORT_SYMBOL_GPL(unregister_dimm_printer);
void spitfire_insn_access_exception(struct pt_regs *regs, unsigned long sfsr, unsigned long sfar)
{
enum ctx_state prev_state = exception_enter();
- siginfo_t info;
if (notify_die(DIE_TRAP, "instruction access exception", regs,
0, 0x8, SIGTRAP) == NOTIFY_STOP)
@@ -206,12 +200,8 @@ void spitfire_insn_access_exception(struct pt_regs *regs, unsigned long sfsr, un
regs->tpc &= 0xffffffff;
regs->tnpc &= 0xffffffff;
}
- info.si_signo = SIGSEGV;
- info.si_errno = 0;
- info.si_code = SEGV_MAPERR;
- info.si_addr = (void __user *)regs->tpc;
- info.si_trapno = 0;
- force_sig_info(SIGSEGV, &info, current);
+ force_sig_fault(SIGSEGV, SEGV_MAPERR,
+ (void __user *)regs->tpc, 0, current);
out:
exception_exit(prev_state);
}
@@ -230,7 +220,6 @@ void sun4v_insn_access_exception(struct pt_regs *regs, unsigned long addr, unsig
{
unsigned short type = (type_ctx >> 16);
unsigned short ctx = (type_ctx & 0xffff);
- siginfo_t info;
if (notify_die(DIE_TRAP, "instruction access exception", regs,
0, 0x8, SIGTRAP) == NOTIFY_STOP)
@@ -247,12 +236,7 @@ void sun4v_insn_access_exception(struct pt_regs *regs, unsigned long addr, unsig
regs->tpc &= 0xffffffff;
regs->tnpc &= 0xffffffff;
}
- info.si_signo = SIGSEGV;
- info.si_errno = 0;
- info.si_code = SEGV_MAPERR;
- info.si_addr = (void __user *) addr;
- info.si_trapno = 0;
- force_sig_info(SIGSEGV, &info, current);
+ force_sig_fault(SIGSEGV, SEGV_MAPERR, (void __user *) addr, 0, current);
}
void sun4v_insn_access_exception_tl1(struct pt_regs *regs, unsigned long addr, unsigned long type_ctx)
@@ -307,7 +291,6 @@ bool is_no_fault_exception(struct pt_regs *regs)
void spitfire_data_access_exception(struct pt_regs *regs, unsigned long sfsr, unsigned long sfar)
{
enum ctx_state prev_state = exception_enter();
- siginfo_t info;
if (notify_die(DIE_TRAP, "data access exception", regs,
0, 0x30, SIGTRAP) == NOTIFY_STOP)
@@ -338,12 +321,7 @@ void spitfire_data_access_exception(struct pt_regs *regs, unsigned long sfsr, un
if (is_no_fault_exception(regs))
return;
- info.si_signo = SIGSEGV;
- info.si_errno = 0;
- info.si_code = SEGV_MAPERR;
- info.si_addr = (void __user *)sfar;
- info.si_trapno = 0;
- force_sig_info(SIGSEGV, &info, current);
+ force_sig_fault(SIGSEGV, SEGV_MAPERR, (void __user *)sfar, 0, current);
out:
exception_exit(prev_state);
}
@@ -559,8 +537,6 @@ static void spitfire_cee_log(unsigned long afsr, unsigned long afar, unsigned lo
static void spitfire_ue_log(unsigned long afsr, unsigned long afar, unsigned long udbh, unsigned long udbl, unsigned long tt, int tl1, struct pt_regs *regs)
{
- siginfo_t info;
-
printk(KERN_WARNING "CPU[%d]: Uncorrectable Error AFSR[%lx] "
"AFAR[%lx] UDBL[%lx] UDBH[%ld] TT[%lx] TL>1[%d]\n",
smp_processor_id(), afsr, afar, udbl, udbh, tt, tl1);
@@ -595,12 +571,7 @@ static void spitfire_ue_log(unsigned long afsr, unsigned long afar, unsigned lon
regs->tpc &= 0xffffffff;
regs->tnpc &= 0xffffffff;
}
- info.si_signo = SIGBUS;
- info.si_errno = 0;
- info.si_code = BUS_OBJERR;
- info.si_addr = (void *)0;
- info.si_trapno = 0;
- force_sig_info(SIGBUS, &info, current);
+ force_sig_fault(SIGBUS, BUS_OBJERR, (void *)0, 0, current);
}
void spitfire_access_error(struct pt_regs *regs, unsigned long status_encoded, unsigned long afar)
@@ -2190,7 +2161,6 @@ bool sun4v_nonresum_error_user_handled(struct pt_regs *regs,
if (attrs & SUN4V_ERR_ATTRS_MEMORY) {
unsigned long addr = ent->err_raddr;
- siginfo_t info;
if (addr == ~(u64)0) {
/* This seems highly unlikely to ever occur */
@@ -2211,21 +2181,13 @@ bool sun4v_nonresum_error_user_handled(struct pt_regs *regs,
addr += PAGE_SIZE;
}
}
- info.si_signo = SIGKILL;
- info.si_errno = 0;
- info.si_trapno = 0;
- force_sig_info(info.si_signo, &info, current);
+ force_sig(SIGKILL, current);
return true;
}
if (attrs & SUN4V_ERR_ATTRS_PIO) {
- siginfo_t info;
-
- info.si_signo = SIGBUS;
- info.si_code = BUS_ADRERR;
- info.si_addr = (void __user *)sun4v_get_vaddr(regs);
- force_sig_info(info.si_signo, &info, current);
-
+ force_sig_fault(SIGBUS, BUS_ADRERR,
+ (void __user *)sun4v_get_vaddr(regs), 0, current);
return true;
}
@@ -2362,30 +2324,27 @@ static void do_fpe_common(struct pt_regs *regs)
regs->tnpc += 4;
} else {
unsigned long fsr = current_thread_info()->xfsr[0];
- siginfo_t info;
+ int code;
if (test_thread_flag(TIF_32BIT)) {
regs->tpc &= 0xffffffff;
regs->tnpc &= 0xffffffff;
}
- info.si_signo = SIGFPE;
- info.si_errno = 0;
- info.si_addr = (void __user *)regs->tpc;
- info.si_trapno = 0;
- info.si_code = FPE_FIXME;
+ code = FPE_FLTUNK;
if ((fsr & 0x1c000) == (1 << 14)) {
if (fsr & 0x10)
- info.si_code = FPE_FLTINV;
+ code = FPE_FLTINV;
else if (fsr & 0x08)
- info.si_code = FPE_FLTOVF;
+ code = FPE_FLTOVF;
else if (fsr & 0x04)
- info.si_code = FPE_FLTUND;
+ code = FPE_FLTUND;
else if (fsr & 0x02)
- info.si_code = FPE_FLTDIV;
+ code = FPE_FLTDIV;
else if (fsr & 0x01)
- info.si_code = FPE_FLTRES;
+ code = FPE_FLTRES;
}
- force_sig_info(SIGFPE, &info, current);
+ force_sig_fault(SIGFPE, code,
+ (void __user *)regs->tpc, 0, current);
}
}
@@ -2428,7 +2387,6 @@ out:
void do_tof(struct pt_regs *regs)
{
enum ctx_state prev_state = exception_enter();
- siginfo_t info;
if (notify_die(DIE_TRAP, "tagged arithmetic overflow", regs,
0, 0x26, SIGEMT) == NOTIFY_STOP)
@@ -2440,12 +2398,8 @@ void do_tof(struct pt_regs *regs)
regs->tpc &= 0xffffffff;
regs->tnpc &= 0xffffffff;
}
- info.si_signo = SIGEMT;
- info.si_errno = 0;
- info.si_code = EMT_TAGOVF;
- info.si_addr = (void __user *)regs->tpc;
- info.si_trapno = 0;
- force_sig_info(SIGEMT, &info, current);
+ force_sig_fault(SIGEMT, EMT_TAGOVF,
+ (void __user *)regs->tpc, 0, current);
out:
exception_exit(prev_state);
}
@@ -2453,7 +2407,6 @@ out:
void do_div0(struct pt_regs *regs)
{
enum ctx_state prev_state = exception_enter();
- siginfo_t info;
if (notify_die(DIE_TRAP, "integer division by zero", regs,
0, 0x28, SIGFPE) == NOTIFY_STOP)
@@ -2465,12 +2418,8 @@ void do_div0(struct pt_regs *regs)
regs->tpc &= 0xffffffff;
regs->tnpc &= 0xffffffff;
}
- info.si_signo = SIGFPE;
- info.si_errno = 0;
- info.si_code = FPE_INTDIV;
- info.si_addr = (void __user *)regs->tpc;
- info.si_trapno = 0;
- force_sig_info(SIGFPE, &info, current);
+ force_sig_fault(SIGFPE, FPE_INTDIV,
+ (void __user *)regs->tpc, 0, current);
out:
exception_exit(prev_state);
}
@@ -2632,7 +2581,6 @@ void do_illegal_instruction(struct pt_regs *regs)
unsigned long pc = regs->tpc;
unsigned long tstate = regs->tstate;
u32 insn;
- siginfo_t info;
if (notify_die(DIE_TRAP, "illegal instruction", regs,
0, 0x10, SIGILL) == NOTIFY_STOP)
@@ -2666,12 +2614,7 @@ void do_illegal_instruction(struct pt_regs *regs)
}
}
}
- info.si_signo = SIGILL;
- info.si_errno = 0;
- info.si_code = ILL_ILLOPC;
- info.si_addr = (void __user *)pc;
- info.si_trapno = 0;
- force_sig_info(SIGILL, &info, current);
+ force_sig_fault(SIGILL, ILL_ILLOPC, (void __user *)pc, 0, current);
out:
exception_exit(prev_state);
}
@@ -2679,7 +2622,6 @@ out:
void mem_address_unaligned(struct pt_regs *regs, unsigned long sfar, unsigned long sfsr)
{
enum ctx_state prev_state = exception_enter();
- siginfo_t info;
if (notify_die(DIE_TRAP, "memory address unaligned", regs,
0, 0x34, SIGSEGV) == NOTIFY_STOP)
@@ -2692,20 +2634,13 @@ void mem_address_unaligned(struct pt_regs *regs, unsigned long sfar, unsigned lo
if (is_no_fault_exception(regs))
return;
- info.si_signo = SIGBUS;
- info.si_errno = 0;
- info.si_code = BUS_ADRALN;
- info.si_addr = (void __user *)sfar;
- info.si_trapno = 0;
- force_sig_info(SIGBUS, &info, current);
+ force_sig_fault(SIGBUS, BUS_ADRALN, (void __user *)sfar, 0, current);
out:
exception_exit(prev_state);
}
void sun4v_do_mna(struct pt_regs *regs, unsigned long addr, unsigned long type_ctx)
{
- siginfo_t info;
-
if (notify_die(DIE_TRAP, "memory address unaligned", regs,
0, 0x34, SIGSEGV) == NOTIFY_STOP)
return;
@@ -2717,12 +2652,7 @@ void sun4v_do_mna(struct pt_regs *regs, unsigned long addr, unsigned long type_c
if (is_no_fault_exception(regs))
return;
- info.si_signo = SIGBUS;
- info.si_errno = 0;
- info.si_code = BUS_ADRALN;
- info.si_addr = (void __user *) addr;
- info.si_trapno = 0;
- force_sig_info(SIGBUS, &info, current);
+ force_sig_fault(SIGBUS, BUS_ADRALN, (void __user *) addr, 0, current);
}
/* sun4v_mem_corrupt_detect_precise() - Handle precise exception on an ADI
@@ -2775,7 +2705,6 @@ void sun4v_mem_corrupt_detect_precise(struct pt_regs *regs, unsigned long addr,
void do_privop(struct pt_regs *regs)
{
enum ctx_state prev_state = exception_enter();
- siginfo_t info;
if (notify_die(DIE_TRAP, "privileged operation", regs,
0, 0x11, SIGILL) == NOTIFY_STOP)
@@ -2785,12 +2714,8 @@ void do_privop(struct pt_regs *regs)
regs->tpc &= 0xffffffff;
regs->tnpc &= 0xffffffff;
}
- info.si_signo = SIGILL;
- info.si_errno = 0;
- info.si_code = ILL_PRVOPC;
- info.si_addr = (void __user *)regs->tpc;
- info.si_trapno = 0;
- force_sig_info(SIGILL, &info, current);
+ force_sig_fault(SIGILL, ILL_PRVOPC,
+ (void __user *)regs->tpc, 0, current);
out:
exception_exit(prev_state);
}
diff --git a/arch/sparc/kernel/unaligned_32.c b/arch/sparc/kernel/unaligned_32.c
index 7642d7e4f0d9..64ac8c0c1429 100644
--- a/arch/sparc/kernel/unaligned_32.c
+++ b/arch/sparc/kernel/unaligned_32.c
@@ -311,14 +311,9 @@ static inline int ok_for_user(struct pt_regs *regs, unsigned int insn,
static void user_mna_trap_fault(struct pt_regs *regs, unsigned int insn)
{
- siginfo_t info;
-
- info.si_signo = SIGBUS;
- info.si_errno = 0;
- info.si_code = BUS_ADRALN;
- info.si_addr = (void __user *)safe_compute_effective_address(regs, insn);
- info.si_trapno = 0;
- send_sig_info(SIGBUS, &info, current);
+ send_sig_fault(SIGBUS, BUS_ADRALN,
+ (void __user *)safe_compute_effective_address(regs, insn),
+ 0, current);
}
asmlinkage void user_unaligned_trap(struct pt_regs *regs, unsigned int insn)
diff --git a/arch/sparc/kernel/vio.c b/arch/sparc/kernel/vio.c
index 1a0fa10cb6b7..32bae68e34c1 100644
--- a/arch/sparc/kernel/vio.c
+++ b/arch/sparc/kernel/vio.c
@@ -403,7 +403,7 @@ static struct vio_dev *vio_create_one(struct mdesc_handle *hp, u64 mp,
if (err) {
printk(KERN_ERR "VIO: Could not register device %s, err=%d\n",
dev_name(&vdev->dev), err);
- kfree(vdev);
+ put_device(&vdev->dev);
return NULL;
}
if (vdev->dp)
diff --git a/arch/sparc/mm/fault_32.c b/arch/sparc/mm/fault_32.c
index a8103a84b4ac..9f75b6444bf1 100644
--- a/arch/sparc/mm/fault_32.c
+++ b/arch/sparc/mm/fault_32.c
@@ -127,19 +127,11 @@ show_signal_msg(struct pt_regs *regs, int sig, int code,
static void __do_fault_siginfo(int code, int sig, struct pt_regs *regs,
unsigned long addr)
{
- siginfo_t info;
-
- info.si_signo = sig;
- info.si_code = code;
- info.si_errno = 0;
- info.si_addr = (void __user *) addr;
- info.si_trapno = 0;
-
if (unlikely(show_unhandled_signals))
- show_signal_msg(regs, sig, info.si_code,
+ show_signal_msg(regs, sig, code,
addr, current);
- force_sig_info (sig, &info, current);
+ force_sig_fault(sig, code, (void __user *) addr, 0, current);
}
static unsigned long compute_si_addr(struct pt_regs *regs, int text_fault)
diff --git a/arch/sparc/mm/fault_64.c b/arch/sparc/mm/fault_64.c
index 41363f46797b..63166fcf9e25 100644
--- a/arch/sparc/mm/fault_64.c
+++ b/arch/sparc/mm/fault_64.c
@@ -170,11 +170,7 @@ static void do_fault_siginfo(int code, int sig, struct pt_regs *regs,
int fault_code)
{
unsigned long addr;
- siginfo_t info;
- info.si_code = code;
- info.si_signo = sig;
- info.si_errno = 0;
if (fault_code & FAULT_CODE_ITLB) {
addr = regs->tpc;
} else {
@@ -187,13 +183,11 @@ static void do_fault_siginfo(int code, int sig, struct pt_regs *regs,
else
addr = fault_addr;
}
- info.si_addr = (void __user *) addr;
- info.si_trapno = 0;
if (unlikely(show_unhandled_signals))
show_signal_msg(regs, sig, code, addr, current);
- force_sig_info(sig, &info, current);
+ force_sig_fault(sig, code, (void __user *) addr, 0, current);
}
static unsigned int get_fault_insn(struct pt_regs *regs, unsigned int insn)
diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
index d4e8c497ae86..dcf5ea28a281 100644
--- a/arch/um/drivers/ubd_kern.c
+++ b/arch/um/drivers/ubd_kern.c
@@ -208,19 +208,6 @@ static int fake_ide_media_proc_show(struct seq_file *m, void *v)
return 0;
}
-static int fake_ide_media_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, fake_ide_media_proc_show, NULL);
-}
-
-static const struct file_operations fake_ide_media_proc_fops = {
- .owner = THIS_MODULE,
- .open = fake_ide_media_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
static void make_ide_entries(const char *dev_name)
{
struct proc_dir_entry *dir, *ent;
@@ -231,7 +218,8 @@ static void make_ide_entries(const char *dev_name)
dir = proc_mkdir(dev_name, proc_ide);
if(!dir) return;
- ent = proc_create("media", S_IRUGO, dir, &fake_ide_media_proc_fops);
+ ent = proc_create_single("media", S_IRUGO, dir,
+ fake_ide_media_proc_show);
if(!ent) return;
snprintf(name, sizeof(name), "ide0/%s", dev_name);
proc_symlink(dev_name, proc_ide_root, name);
diff --git a/arch/um/kernel/ptrace.c b/arch/um/kernel/ptrace.c
index bc2a516c190f..1a1d88a4d940 100644
--- a/arch/um/kernel/ptrace.c
+++ b/arch/um/kernel/ptrace.c
@@ -115,17 +115,10 @@ long arch_ptrace(struct task_struct *child, long request,
static void send_sigtrap(struct task_struct *tsk, struct uml_pt_regs *regs,
int error_code)
{
- struct siginfo info;
-
- memset(&info, 0, sizeof(info));
- info.si_signo = SIGTRAP;
- info.si_code = TRAP_BRKPT;
-
- /* User-mode eip? */
- info.si_addr = UPT_IS_USER(regs) ? (void __user *) UPT_IP(regs) : NULL;
-
/* Send us the fake SIGTRAP */
- force_sig_info(SIGTRAP, &info, tsk);
+ force_sig_fault(SIGTRAP, TRAP_BRKPT,
+ /* User-mode eip? */
+ UPT_IS_USER(regs) ? (void __user *) UPT_IP(regs) : NULL, tsk);
}
/*
diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c
index b2b02df9896e..ec9a42c14c56 100644
--- a/arch/um/kernel/trap.c
+++ b/arch/um/kernel/trap.c
@@ -162,13 +162,9 @@ static void show_segv_info(struct uml_pt_regs *regs)
static void bad_segv(struct faultinfo fi, unsigned long ip)
{
- struct siginfo si;
-
- si.si_signo = SIGSEGV;
- si.si_code = SEGV_ACCERR;
- si.si_addr = (void __user *) FAULT_ADDRESS(fi);
current->thread.arch.faultinfo = fi;
- force_sig_info(SIGSEGV, &si, current);
+ force_sig_fault(SIGSEGV, SEGV_ACCERR, (void __user *) FAULT_ADDRESS(fi),
+ current);
}
void fatal_sigsegv(void)
@@ -214,8 +210,8 @@ void segv_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs)
unsigned long segv(struct faultinfo fi, unsigned long ip, int is_user,
struct uml_pt_regs *regs)
{
- struct siginfo si;
jmp_buf *catcher;
+ int si_code;
int err;
int is_write = FAULT_WRITE(fi);
unsigned long address = FAULT_ADDRESS(fi);
@@ -239,7 +235,7 @@ unsigned long segv(struct faultinfo fi, unsigned long ip, int is_user,
if (SEGV_IS_FIXABLE(&fi))
err = handle_page_fault(address, ip, is_write, is_user,
- &si.si_code);
+ &si_code);
else {
err = -EFAULT;
/*
@@ -271,18 +267,14 @@ unsigned long segv(struct faultinfo fi, unsigned long ip, int is_user,
show_segv_info(regs);
if (err == -EACCES) {
- si.si_signo = SIGBUS;
- si.si_errno = 0;
- si.si_code = BUS_ADRERR;
- si.si_addr = (void __user *)address;
current->thread.arch.faultinfo = fi;
- force_sig_info(SIGBUS, &si, current);
+ force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address,
+ current);
} else {
BUG_ON(err != -EFAULT);
- si.si_signo = SIGSEGV;
- si.si_addr = (void __user *) address;
current->thread.arch.faultinfo = fi;
- force_sig_info(SIGSEGV, &si, current);
+ force_sig_fault(SIGSEGV, si_code, (void __user *) address,
+ current);
}
out:
@@ -294,9 +286,7 @@ out:
void relay_signal(int sig, struct siginfo *si, struct uml_pt_regs *regs)
{
- struct faultinfo *fi;
- struct siginfo clean_si;
-
+ int code, err;
if (!UPT_IS_USER(regs)) {
if (sig == SIGBUS)
printk(KERN_ERR "Bus error - the host /dev/shm or /tmp "
@@ -306,29 +296,21 @@ void relay_signal(int sig, struct siginfo *si, struct uml_pt_regs *regs)
arch_examine_signal(sig, regs);
- clear_siginfo(&clean_si);
- clean_si.si_signo = si->si_signo;
- clean_si.si_errno = si->si_errno;
- clean_si.si_code = si->si_code;
- switch (sig) {
- case SIGILL:
- case SIGFPE:
- case SIGSEGV:
- case SIGBUS:
- case SIGTRAP:
- fi = UPT_FAULTINFO(regs);
- clean_si.si_addr = (void __user *) FAULT_ADDRESS(*fi);
+ /* Is the signal layout for the signal known?
+ * Signal data must be scrubbed to prevent information leaks.
+ */
+ code = si->si_code;
+ err = si->si_errno;
+ if ((err == 0) && (siginfo_layout(sig, code) == SIL_FAULT)) {
+ struct faultinfo *fi = UPT_FAULTINFO(regs);
current->thread.arch.faultinfo = *fi;
-#ifdef __ARCH_SI_TRAPNO
- clean_si.si_trapno = si->si_trapno;
-#endif
- break;
- default:
- printk(KERN_ERR "Attempted to relay unknown signal %d (si_code = %d)\n",
- sig, si->si_code);
+ force_sig_fault(sig, code, (void __user *)FAULT_ADDRESS(*fi),
+ current);
+ } else {
+ printk(KERN_ERR "Attempted to relay unknown signal %d (si_code = %d) with errno %d\n",
+ sig, code, err);
+ force_sig(sig, current);
}
-
- force_sig_info(sig, &clean_si, current);
}
void bus_handler(int sig, struct siginfo *si, struct uml_pt_regs *regs)
diff --git a/arch/unicore32/Kconfig b/arch/unicore32/Kconfig
index 462e59a7ae78..03f991e44288 100644
--- a/arch/unicore32/Kconfig
+++ b/arch/unicore32/Kconfig
@@ -19,6 +19,8 @@ config UNICORE32
select ARCH_WANT_FRAME_POINTERS
select GENERIC_IOMAP
select MODULES_USE_ELF_REL
+ select NEED_DMA_MAP_STATE
+ select SWIOTLB
help
UniCore-32 is 32-bit Instruction Set Architecture,
including a series of low-power-consumption RISC chip
@@ -61,9 +63,6 @@ config ARCH_MAY_HAVE_PC_FDC
config ZONE_DMA
def_bool y
-config NEED_DMA_MAP_STATE
- def_bool y
-
source "init/Kconfig"
source "kernel/Kconfig.freezer"
diff --git a/arch/unicore32/kernel/fpu-ucf64.c b/arch/unicore32/kernel/fpu-ucf64.c
index 12c8c9527b8e..8594b168f25e 100644
--- a/arch/unicore32/kernel/fpu-ucf64.c
+++ b/arch/unicore32/kernel/fpu-ucf64.c
@@ -52,14 +52,14 @@
* Raise a SIGFPE for the current process.
* sicode describes the signal being raised.
*/
-void ucf64_raise_sigfpe(unsigned int sicode, struct pt_regs *regs)
+void ucf64_raise_sigfpe(struct pt_regs *regs)
{
siginfo_t info;
- memset(&info, 0, sizeof(info));
+ clear_siginfo(&info);
info.si_signo = SIGFPE;
- info.si_code = sicode;
+ info.si_code = FPE_FLTUNK;
info.si_addr = (void __user *)(instruction_pointer(regs) - 4);
/*
@@ -94,7 +94,7 @@ void ucf64_exchandler(u32 inst, u32 fpexc, struct pt_regs *regs)
pr_debug("UniCore-F64 FPSCR 0x%08x INST 0x%08x\n",
cff(FPSCR), inst);
- ucf64_raise_sigfpe(0, regs);
+ ucf64_raise_sigfpe(regs);
return;
}
diff --git a/arch/unicore32/mm/Kconfig b/arch/unicore32/mm/Kconfig
index e9154a59d561..82759b6aba67 100644
--- a/arch/unicore32/mm/Kconfig
+++ b/arch/unicore32/mm/Kconfig
@@ -39,14 +39,3 @@ config CPU_TLB_SINGLE_ENTRY_DISABLE
default y
help
Say Y here to disable the TLB single entry operations.
-
-config SWIOTLB
- def_bool y
- select DMA_DIRECT_OPS
-
-config IOMMU_HELPER
- def_bool SWIOTLB
-
-config NEED_SG_DMA_LENGTH
- def_bool SWIOTLB
-
diff --git a/arch/unicore32/mm/fault.c b/arch/unicore32/mm/fault.c
index bbefcc46a45e..381473412937 100644
--- a/arch/unicore32/mm/fault.c
+++ b/arch/unicore32/mm/fault.c
@@ -125,6 +125,7 @@ static void __do_user_fault(struct task_struct *tsk, unsigned long addr,
tsk->thread.address = addr;
tsk->thread.error_code = fsr;
tsk->thread.trap_no = 14;
+ clear_siginfo(&si);
si.si_signo = sig;
si.si_errno = 0;
si.si_code = code;
@@ -472,6 +473,7 @@ asmlinkage void do_DataAbort(unsigned long addr, unsigned int fsr,
printk(KERN_ALERT "Unhandled fault: %s (0x%03x) at 0x%08lx\n",
inf->name, fsr, addr);
+ clear_siginfo(&info);
info.si_signo = inf->sig;
info.si_errno = 0;
info.si_code = inf->code;
@@ -491,6 +493,7 @@ asmlinkage void do_PrefetchAbort(unsigned long addr,
printk(KERN_ALERT "Unhandled prefetch abort: %s (0x%03x) at 0x%08lx\n",
inf->name, ifsr, addr);
+ clear_siginfo(&info);
info.si_signo = inf->sig;
info.si_errno = 0;
info.si_code = inf->code;
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index c07f492b871a..1fe24b624d44 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -28,6 +28,8 @@ config X86_64
select ARCH_USE_CMPXCHG_LOCKREF
select HAVE_ARCH_SOFT_DIRTY
select MODULES_USE_ELF_RELA
+ select NEED_DMA_MAP_STATE
+ select SWIOTLB
select X86_DEV_DMA_OPS
select ARCH_HAS_SYSCALL_WRAPPER
@@ -60,6 +62,7 @@ config X86
select ARCH_HAS_PMEM_API if X86_64
select ARCH_HAS_REFCOUNT
select ARCH_HAS_UACCESS_FLUSHCACHE if X86_64
+ select ARCH_HAS_UACCESS_MCSAFE if X86_64
select ARCH_HAS_SET_MEMORY
select ARCH_HAS_SG_CHAIN
select ARCH_HAS_STRICT_KERNEL_RWX
@@ -134,7 +137,6 @@ config X86
select HAVE_C_RECORDMCOUNT
select HAVE_DEBUG_KMEMLEAK
select HAVE_DEBUG_STACKOVERFLOW
- select HAVE_DMA_API_DEBUG
select HAVE_DMA_CONTIGUOUS
select HAVE_DYNAMIC_FTRACE
select HAVE_DYNAMIC_FTRACE_WITH_REGS
@@ -184,6 +186,7 @@ config X86
select HAVE_UNSTABLE_SCHED_CLOCK
select HAVE_USER_RETURN_NOTIFIER
select IRQ_FORCED_THREADING
+ select NEED_SG_DMA_LENGTH
select PCI_LOCKLESS_CONFIG
select PERF_EVENTS
select RTC_LIB
@@ -236,13 +239,6 @@ config ARCH_MMAP_RND_COMPAT_BITS_MAX
config SBUS
bool
-config NEED_DMA_MAP_STATE
- def_bool y
- depends on X86_64 || INTEL_IOMMU || DMA_API_DEBUG || SWIOTLB
-
-config NEED_SG_DMA_LENGTH
- def_bool y
-
config GENERIC_ISA_DMA
def_bool y
depends on ISA_DMA_API
@@ -875,6 +871,7 @@ config DMI
config GART_IOMMU
bool "Old AMD GART IOMMU support"
+ select IOMMU_HELPER
select SWIOTLB
depends on X86_64 && PCI && AMD_NB
---help---
@@ -896,6 +893,7 @@ config GART_IOMMU
config CALGARY_IOMMU
bool "IBM Calgary IOMMU support"
+ select IOMMU_HELPER
select SWIOTLB
depends on X86_64 && PCI
---help---
@@ -923,20 +921,6 @@ config CALGARY_IOMMU_ENABLED_BY_DEFAULT
Calgary anyway, pass 'iommu=calgary' on the kernel command line.
If unsure, say Y.
-# need this always selected by IOMMU for the VIA workaround
-config SWIOTLB
- def_bool y if X86_64
- ---help---
- Support for software bounce buffers used on x86-64 systems
- which don't have a hardware IOMMU. Using this PCI devices
- which can only access 32-bits of memory can be used on systems
- with more than 3 GB of memory.
- If unsure, say Y.
-
-config IOMMU_HELPER
- def_bool y
- depends on CALGARY_IOMMU || GART_IOMMU || SWIOTLB || AMD_IOMMU
-
config MAXSMP
bool "Enable Maximum number of SMP Processors and NUMA Nodes"
depends on X86_64 && SMP && DEBUG_KERNEL
@@ -1458,6 +1442,7 @@ config HIGHMEM
config X86_PAE
bool "PAE (Physical Address Extension) Support"
depends on X86_32 && !HIGHMEM4G
+ select PHYS_ADDR_T_64BIT
select SWIOTLB
---help---
PAE is required for NX support, and furthermore enables
@@ -1485,14 +1470,6 @@ config X86_5LEVEL
Say N if unsure.
-config ARCH_PHYS_ADDR_T_64BIT
- def_bool y
- depends on X86_64 || X86_PAE
-
-config ARCH_DMA_ADDR_T_64BIT
- def_bool y
- depends on X86_64 || HIGHMEM64G
-
config X86_DIRECT_GBPAGES
def_bool y
depends on X86_64 && !DEBUG_PAGEALLOC
diff --git a/arch/x86/boot/compressed/cmdline.c b/arch/x86/boot/compressed/cmdline.c
index 0cb325734cfb..af6cda0b7900 100644
--- a/arch/x86/boot/compressed/cmdline.c
+++ b/arch/x86/boot/compressed/cmdline.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
#include "misc.h"
-#if CONFIG_EARLY_PRINTK || CONFIG_RANDOMIZE_BASE
+#if CONFIG_EARLY_PRINTK || CONFIG_RANDOMIZE_BASE || CONFIG_X86_5LEVEL
static unsigned long fs;
static inline void set_fs(unsigned long seg)
diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c
index 47d3efff6805..a8a8642d2b0b 100644
--- a/arch/x86/boot/compressed/eboot.c
+++ b/arch/x86/boot/compressed/eboot.c
@@ -109,23 +109,34 @@ void efi_char16_printk(efi_system_table_t *table, efi_char16_t *str)
}
static efi_status_t
-__setup_efi_pci32(efi_pci_io_protocol_32 *pci, struct pci_setup_rom **__rom)
+__setup_efi_pci(efi_pci_io_protocol_t *pci, struct pci_setup_rom **__rom)
{
struct pci_setup_rom *rom = NULL;
efi_status_t status;
unsigned long size;
- uint64_t attributes;
+ uint64_t attributes, romsize;
+ void *romimage;
- status = efi_early->call(pci->attributes, pci,
- EfiPciIoAttributeOperationGet, 0, 0,
- &attributes);
+ status = efi_call_proto(efi_pci_io_protocol, attributes, pci,
+ EfiPciIoAttributeOperationGet, 0, 0,
+ &attributes);
if (status != EFI_SUCCESS)
return status;
- if (!pci->romimage || !pci->romsize)
+ /*
+ * Some firmware images contain EFI function pointers at the place where the
+ * romimage and romsize fields are supposed to be. Typically the EFI
+ * code is mapped at high addresses, translating to an unrealistically
+ * large romsize. The UEFI spec limits the size of option ROMs to 16
+ * MiB so we reject any ROMs over 16 MiB in size to catch this.
+ */
+ romimage = (void *)(unsigned long)efi_table_attr(efi_pci_io_protocol,
+ romimage, pci);
+ romsize = efi_table_attr(efi_pci_io_protocol, romsize, pci);
+ if (!romimage || !romsize || romsize > SZ_16M)
return EFI_INVALID_PARAMETER;
- size = pci->romsize + sizeof(*rom);
+ size = romsize + sizeof(*rom);
status = efi_call_early(allocate_pool, EFI_LOADER_DATA, size, &rom);
if (status != EFI_SUCCESS) {
@@ -141,29 +152,32 @@ __setup_efi_pci32(efi_pci_io_protocol_32 *pci, struct pci_setup_rom **__rom)
rom->pcilen = pci->romsize;
*__rom = rom;
- status = efi_early->call(pci->pci.read, pci, EfiPciIoWidthUint16,
- PCI_VENDOR_ID, 1, &(rom->vendor));
+ status = efi_call_proto(efi_pci_io_protocol, pci.read, pci,
+ EfiPciIoWidthUint16, PCI_VENDOR_ID, 1,
+ &rom->vendor);
if (status != EFI_SUCCESS) {
efi_printk(sys_table, "Failed to read rom->vendor\n");
goto free_struct;
}
- status = efi_early->call(pci->pci.read, pci, EfiPciIoWidthUint16,
- PCI_DEVICE_ID, 1, &(rom->devid));
+ status = efi_call_proto(efi_pci_io_protocol, pci.read, pci,
+ EfiPciIoWidthUint16, PCI_DEVICE_ID, 1,
+ &rom->devid);
if (status != EFI_SUCCESS) {
efi_printk(sys_table, "Failed to read rom->devid\n");
goto free_struct;
}
- status = efi_early->call(pci->get_location, pci, &(rom->segment),
- &(rom->bus), &(rom->device), &(rom->function));
+ status = efi_call_proto(efi_pci_io_protocol, get_location, pci,
+ &rom->segment, &rom->bus, &rom->device,
+ &rom->function);
if (status != EFI_SUCCESS)
goto free_struct;
- memcpy(rom->romdata, pci->romimage, pci->romsize);
+ memcpy(rom->romdata, romimage, romsize);
return status;
free_struct:
@@ -175,7 +189,7 @@ static void
setup_efi_pci32(struct boot_params *params, void **pci_handle,
unsigned long size)
{
- efi_pci_io_protocol_32 *pci = NULL;
+ efi_pci_io_protocol_t *pci = NULL;
efi_guid_t pci_proto = EFI_PCI_IO_PROTOCOL_GUID;
u32 *handles = (u32 *)(unsigned long)pci_handle;
efi_status_t status;
@@ -202,7 +216,7 @@ setup_efi_pci32(struct boot_params *params, void **pci_handle,
if (!pci)
continue;
- status = __setup_efi_pci32(pci, &rom);
+ status = __setup_efi_pci(pci, &rom);
if (status != EFI_SUCCESS)
continue;
@@ -216,73 +230,11 @@ setup_efi_pci32(struct boot_params *params, void **pci_handle,
}
}
-static efi_status_t
-__setup_efi_pci64(efi_pci_io_protocol_64 *pci, struct pci_setup_rom **__rom)
-{
- struct pci_setup_rom *rom;
- efi_status_t status;
- unsigned long size;
- uint64_t attributes;
-
- status = efi_early->call(pci->attributes, pci,
- EfiPciIoAttributeOperationGet, 0,
- &attributes);
- if (status != EFI_SUCCESS)
- return status;
-
- if (!pci->romimage || !pci->romsize)
- return EFI_INVALID_PARAMETER;
-
- size = pci->romsize + sizeof(*rom);
-
- status = efi_call_early(allocate_pool, EFI_LOADER_DATA, size, &rom);
- if (status != EFI_SUCCESS) {
- efi_printk(sys_table, "Failed to alloc mem for rom\n");
- return status;
- }
-
- rom->data.type = SETUP_PCI;
- rom->data.len = size - sizeof(struct setup_data);
- rom->data.next = 0;
- rom->pcilen = pci->romsize;
- *__rom = rom;
-
- status = efi_early->call(pci->pci.read, pci, EfiPciIoWidthUint16,
- PCI_VENDOR_ID, 1, &(rom->vendor));
-
- if (status != EFI_SUCCESS) {
- efi_printk(sys_table, "Failed to read rom->vendor\n");
- goto free_struct;
- }
-
- status = efi_early->call(pci->pci.read, pci, EfiPciIoWidthUint16,
- PCI_DEVICE_ID, 1, &(rom->devid));
-
- if (status != EFI_SUCCESS) {
- efi_printk(sys_table, "Failed to read rom->devid\n");
- goto free_struct;
- }
-
- status = efi_early->call(pci->get_location, pci, &(rom->segment),
- &(rom->bus), &(rom->device), &(rom->function));
-
- if (status != EFI_SUCCESS)
- goto free_struct;
-
- memcpy(rom->romdata, pci->romimage, pci->romsize);
- return status;
-
-free_struct:
- efi_call_early(free_pool, rom);
- return status;
-
-}
-
static void
setup_efi_pci64(struct boot_params *params, void **pci_handle,
unsigned long size)
{
- efi_pci_io_protocol_64 *pci = NULL;
+ efi_pci_io_protocol_t *pci = NULL;
efi_guid_t pci_proto = EFI_PCI_IO_PROTOCOL_GUID;
u64 *handles = (u64 *)(unsigned long)pci_handle;
efi_status_t status;
@@ -309,7 +261,7 @@ setup_efi_pci64(struct boot_params *params, void **pci_handle,
if (!pci)
continue;
- status = __setup_efi_pci64(pci, &rom);
+ status = __setup_efi_pci(pci, &rom);
if (status != EFI_SUCCESS)
continue;
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index fca012baba19..64037895b085 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -306,6 +306,25 @@ ENTRY(startup_64)
leaq boot_stack_end(%rbx), %rsp
/*
+ * paging_prepare() and cleanup_trampoline() below can have GOT
+ * references. Adjust the table with address we are running at.
+ *
+ * Zero RAX for adjust_got: the GOT was not adjusted before;
+ * there's no adjustment to undo.
+ */
+ xorq %rax, %rax
+
+ /*
+ * Calculate the address the binary is loaded at and use it as
+ * a GOT adjustment.
+ */
+ call 1f
+1: popq %rdi
+ subq $1b, %rdi
+
+ call adjust_got
+
+ /*
* At this point we are in long mode with 4-level paging enabled,
* but we might want to enable 5-level paging or vice versa.
*
@@ -346,6 +365,7 @@ ENTRY(startup_64)
* this function call.
*/
pushq %rsi
+ movq %rsi, %rdi /* real mode address */
call paging_prepare
popq %rsi
@@ -370,10 +390,14 @@ trampoline_return:
/*
* cleanup_trampoline() would restore trampoline memory.
*
+ * RDI is address of the page table to use instead of page table
+ * in trampoline memory (if required).
+ *
* RSI holds real mode data and needs to be preserved across
* this function call.
*/
pushq %rsi
+ leaq top_pgtable(%rbx), %rdi
call cleanup_trampoline
popq %rsi
@@ -381,6 +405,21 @@ trampoline_return:
pushq $0
popfq
+ /*
+ * Previously we've adjusted the GOT with address the binary was
+ * loaded at. Now we need to re-adjust for relocation address.
+ *
+ * Calculate the address the binary is loaded at, so that we can
+ * undo the previous GOT adjustment.
+ */
+ call 1f
+1: popq %rax
+ subq $1b, %rax
+
+ /* The new adjustment is the relocation address */
+ movq %rbx, %rdi
+ call adjust_got
+
/*
* Copy the compressed kernel to the end of our buffer
* where decompression in place becomes safe.
@@ -482,19 +521,6 @@ relocated:
rep stosq
/*
- * Adjust our own GOT
- */
- leaq _got(%rip), %rdx
- leaq _egot(%rip), %rcx
-1:
- cmpq %rcx, %rdx
- jae 2f
- addq %rbx, (%rdx)
- addq $8, %rdx
- jmp 1b
-2:
-
-/*
* Do the extraction, and jump to the new kernel..
*/
pushq %rsi /* Save the real mode argument */
@@ -512,6 +538,27 @@ relocated:
*/
jmp *%rax
+/*
+ * Adjust the global offset table
+ *
+ * RAX is the previous adjustment of the table to undo (use 0 if it's the
+ * first time we touch GOT).
+ * RDI is the new adjustment to apply.
+ */
+adjust_got:
+ /* Walk through the GOT adding the address to the entries */
+ leaq _got(%rip), %rdx
+ leaq _egot(%rip), %rcx
+1:
+ cmpq %rcx, %rdx
+ jae 2f
+ subq %rax, (%rdx) /* Undo previous adjustment */
+ addq %rdi, (%rdx) /* Apply the new adjustment */
+ addq $8, %rdx
+ jmp 1b
+2:
+ ret
+
.code32
/*
* This is the 32-bit trampoline that will be copied over to low memory.
@@ -649,3 +696,10 @@ boot_stack_end:
.balign 4096
pgtable:
.fill BOOT_PGT_SIZE, 1, 0
+
+/*
+ * The page table is going to be used instead of page table in the trampoline
+ * memory.
+ */
+top_pgtable:
+ .fill PAGE_SIZE, 1, 0
diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c
index a0a50b91ecef..b87a7582853d 100644
--- a/arch/x86/boot/compressed/kaslr.c
+++ b/arch/x86/boot/compressed/kaslr.c
@@ -47,7 +47,7 @@
#include <linux/decompress/mm.h>
#ifdef CONFIG_X86_5LEVEL
-unsigned int pgtable_l5_enabled __ro_after_init;
+unsigned int __pgtable_l5_enabled;
unsigned int pgdir_shift __ro_after_init = 39;
unsigned int ptrs_per_p4d __ro_after_init = 1;
#endif
@@ -734,7 +734,7 @@ void choose_random_location(unsigned long input,
#ifdef CONFIG_X86_5LEVEL
if (__read_cr4() & X86_CR4_LA57) {
- pgtable_l5_enabled = 1;
+ __pgtable_l5_enabled = 1;
pgdir_shift = 48;
ptrs_per_p4d = 512;
}
diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h
index 9e11be4cae19..a423bdb42686 100644
--- a/arch/x86/boot/compressed/misc.h
+++ b/arch/x86/boot/compressed/misc.h
@@ -12,10 +12,8 @@
#undef CONFIG_PARAVIRT_SPINLOCKS
#undef CONFIG_KASAN
-#ifdef CONFIG_X86_5LEVEL
-/* cpu_feature_enabled() cannot be used that early */
-#define pgtable_l5_enabled __pgtable_l5_enabled
-#endif
+/* cpu_feature_enabled() cannot be used this early */
+#define USE_EARLY_PGTABLE_L5
#include <linux/linkage.h>
#include <linux/screen_info.h>
diff --git a/arch/x86/boot/compressed/pgtable_64.c b/arch/x86/boot/compressed/pgtable_64.c
index 32af1cbcd903..8c5107545251 100644
--- a/arch/x86/boot/compressed/pgtable_64.c
+++ b/arch/x86/boot/compressed/pgtable_64.c
@@ -23,14 +23,6 @@ struct paging_config {
static char trampoline_save[TRAMPOLINE_32BIT_SIZE];
/*
- * The page table is going to be used instead of page table in the trampoline
- * memory.
- *
- * It must not be in BSS as BSS is cleared after cleanup_trampoline().
- */
-static char top_pgtable[PAGE_SIZE] __aligned(PAGE_SIZE) __section(.data);
-
-/*
* Trampoline address will be printed by extract_kernel() for debugging
* purposes.
*
@@ -39,16 +31,23 @@ static char top_pgtable[PAGE_SIZE] __aligned(PAGE_SIZE) __section(.data);
*/
unsigned long *trampoline_32bit __section(.data);
-struct paging_config paging_prepare(void)
+extern struct boot_params *boot_params;
+int cmdline_find_option_bool(const char *option);
+
+struct paging_config paging_prepare(void *rmode)
{
struct paging_config paging_config = {};
unsigned long bios_start, ebda_start;
+ /* Initialize boot_params. Required for cmdline_find_option_bool(). */
+ boot_params = rmode;
+
/*
* Check if LA57 is desired and supported.
*
- * There are two parts to the check:
+ * There are several parts to the check:
* - if the kernel supports 5-level paging: CONFIG_X86_5LEVEL=y
+ * - if user asked to disable 5-level paging: no5lvl in cmdline
* - if the machine supports 5-level paging:
* + CPUID leaf 7 is supported
* + the leaf has the feature bit set
@@ -56,6 +55,7 @@ struct paging_config paging_prepare(void)
* That's substitute for boot_cpu_has() in early boot code.
*/
if (IS_ENABLED(CONFIG_X86_5LEVEL) &&
+ !cmdline_find_option_bool("no5lvl") &&
native_cpuid_eax(0) >= 7 &&
(native_cpuid_ecx(7) & (1 << (X86_FEATURE_LA57 & 31)))) {
paging_config.l5_required = 1;
@@ -134,19 +134,19 @@ out:
return paging_config;
}
-void cleanup_trampoline(void)
+void cleanup_trampoline(void *pgtable)
{
void *trampoline_pgtable;
- trampoline_pgtable = trampoline_32bit + TRAMPOLINE_32BIT_PGTABLE_OFFSET;
+ trampoline_pgtable = trampoline_32bit + TRAMPOLINE_32BIT_PGTABLE_OFFSET / sizeof(unsigned long);
/*
* Move the top level page table out of trampoline memory,
* if it's there.
*/
if ((void *)__native_read_cr3() == trampoline_pgtable) {
- memcpy(top_pgtable, trampoline_pgtable, PAGE_SIZE);
- native_write_cr3((unsigned long)top_pgtable);
+ memcpy(pgtable, trampoline_pgtable, PAGE_SIZE);
+ native_write_cr3((unsigned long)pgtable);
}
/* Restore trampoline memory */
diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index d6b27dab1b30..14a2f996e543 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -396,3 +396,4 @@
382 i386 pkey_free sys_pkey_free __ia32_sys_pkey_free
383 i386 statx sys_statx __ia32_sys_statx
384 i386 arch_prctl sys_arch_prctl __ia32_compat_sys_arch_prctl
+385 i386 io_pgetevents sys_io_pgetevents __ia32_compat_sys_io_pgetevents
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
index 4dfe42666d0c..cd36232ab62f 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -341,6 +341,7 @@
330 common pkey_alloc __x64_sys_pkey_alloc
331 common pkey_free __x64_sys_pkey_free
332 common statx __x64_sys_statx
+333 common io_pgetevents __x64_sys_io_pgetevents
#
# x32-specific system call numbers start at 512 to avoid cache impact
diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile
index d998a487c9b1..261802b1cc50 100644
--- a/arch/x86/entry/vdso/Makefile
+++ b/arch/x86/entry/vdso/Makefile
@@ -44,14 +44,14 @@ obj-y += $(vdso_img_objs)
targets += $(vdso_img_cfiles)
targets += $(vdso_img_sodbg) $(vdso_img-y:%=vdso%.so)
-export CPPFLAGS_vdso.lds += -P -C
+CPPFLAGS_vdso.lds += -P -C
VDSO_LDFLAGS_vdso.lds = -m64 -Wl,-soname=linux-vdso.so.1 \
-Wl,--no-undefined \
-Wl,-z,max-page-size=4096 -Wl,-z,common-page-size=4096 \
$(DISABLE_LTO)
-$(obj)/vdso64.so.dbg: $(src)/vdso.lds $(vobjs) FORCE
+$(obj)/vdso64.so.dbg: $(obj)/vdso.lds $(vobjs) FORCE
$(call if_changed,vdso)
HOST_EXTRACFLAGS += -I$(srctree)/tools/include -I$(srctree)/include/uapi -I$(srctree)/arch/$(SUBARCH)/include/uapi
@@ -100,11 +100,8 @@ VDSO_LDFLAGS_vdsox32.lds = -Wl,-m,elf32_x86_64 \
-Wl,-z,max-page-size=4096 \
-Wl,-z,common-page-size=4096
-# 64-bit objects to re-brand as x32
-vobjs64-for-x32 := $(filter-out $(vobjs-nox32),$(vobjs-y))
-
# x32-rebranded versions
-vobjx32s-y := $(vobjs64-for-x32:.o=-x32.o)
+vobjx32s-y := $(vobjs-y:.o=-x32.o)
# same thing, but in the output directory
vobjx32s := $(foreach F,$(vobjx32s-y),$(obj)/$F)
@@ -122,7 +119,7 @@ $(obj)/%.so: OBJCOPYFLAGS := -S
$(obj)/%.so: $(obj)/%.so.dbg
$(call if_changed,objcopy)
-$(obj)/vdsox32.so.dbg: $(src)/vdsox32.lds $(vobjx32s) FORCE
+$(obj)/vdsox32.so.dbg: $(obj)/vdsox32.lds $(vobjx32s) FORCE
$(call if_changed,vdso)
CPPFLAGS_vdso32.lds = $(CPPFLAGS_vdso.lds)
diff --git a/arch/x86/entry/vdso/vdso32/vdso-fakesections.c b/arch/x86/entry/vdso/vdso32/vdso-fakesections.c
deleted file mode 100644
index 541468e25265..000000000000
--- a/arch/x86/entry/vdso/vdso32/vdso-fakesections.c
+++ /dev/null
@@ -1 +0,0 @@
-#include "../vdso-fakesections.c"
diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c
index 70b7845434cb..7782cdbcd67d 100644
--- a/arch/x86/entry/vsyscall/vsyscall_64.c
+++ b/arch/x86/entry/vsyscall/vsyscall_64.c
@@ -107,7 +107,7 @@ static bool write_ok_or_segv(unsigned long ptr, size_t size)
thread->cr2 = ptr;
thread->trap_nr = X86_TRAP_PF;
- memset(&info, 0, sizeof(info));
+ clear_siginfo(&info);
info.si_signo = SIGSEGV;
info.si_errno = 0;
info.si_code = SEGV_MAPERR;
diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c
index 786fd875de92..4b98101209a1 100644
--- a/arch/x86/events/amd/ibs.c
+++ b/arch/x86/events/amd/ibs.c
@@ -889,7 +889,7 @@ static void force_ibs_eilvt_setup(void)
if (!ibs_eilvt_valid())
goto out;
- pr_info("IBS: LVT offset %d assigned\n", offset);
+ pr_info("LVT offset %d assigned\n", offset);
return;
out:
diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c
index f5cbbba99283..981ba5e8241b 100644
--- a/arch/x86/events/amd/uncore.c
+++ b/arch/x86/events/amd/uncore.c
@@ -19,6 +19,7 @@
#include <asm/cpufeature.h>
#include <asm/perf_event.h>
#include <asm/msr.h>
+#include <asm/smp.h>
#define NUM_COUNTERS_NB 4
#define NUM_COUNTERS_L2 4
@@ -399,26 +400,8 @@ static int amd_uncore_cpu_starting(unsigned int cpu)
}
if (amd_uncore_llc) {
- unsigned int apicid = cpu_data(cpu).apicid;
- unsigned int nshared, subleaf, prev_eax = 0;
-
uncore = *per_cpu_ptr(amd_uncore_llc, cpu);
- /*
- * Iterate over Cache Topology Definition leaves until no
- * more cache descriptions are available.
- */
- for (subleaf = 0; subleaf < 5; subleaf++) {
- cpuid_count(0x8000001d, subleaf, &eax, &ebx, &ecx, &edx);
-
- /* EAX[0:4] gives type of cache */
- if (!(eax & 0x1f))
- break;
-
- prev_eax = eax;
- }
- nshared = ((prev_eax >> 14) & 0xfff) + 1;
-
- uncore->id = apicid - (apicid % nshared);
+ uncore->id = per_cpu(cpu_llc_id, cpu);
uncore = amd_uncore_find_online_sibling(uncore, amd_uncore_llc);
*per_cpu_ptr(amd_uncore_llc, cpu) = uncore;
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 7a987e6c7c35..6e461fb1e0d4 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -27,6 +27,7 @@
#include <linux/cpu.h>
#include <linux/bitops.h>
#include <linux/device.h>
+#include <linux/nospec.h>
#include <asm/apic.h>
#include <asm/stacktrace.h>
@@ -304,17 +305,20 @@ set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event *event)
config = attr->config;
- cache_type = (config >> 0) & 0xff;
+ cache_type = (config >> 0) & 0xff;
if (cache_type >= PERF_COUNT_HW_CACHE_MAX)
return -EINVAL;
+ cache_type = array_index_nospec(cache_type, PERF_COUNT_HW_CACHE_MAX);
cache_op = (config >> 8) & 0xff;
if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX)
return -EINVAL;
+ cache_op = array_index_nospec(cache_op, PERF_COUNT_HW_CACHE_OP_MAX);
cache_result = (config >> 16) & 0xff;
if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
return -EINVAL;
+ cache_result = array_index_nospec(cache_result, PERF_COUNT_HW_CACHE_RESULT_MAX);
val = hw_cache_event_ids[cache_type][cache_op][cache_result];
@@ -421,6 +425,8 @@ int x86_setup_perfctr(struct perf_event *event)
if (attr->config >= x86_pmu.max_events)
return -EINVAL;
+ attr->config = array_index_nospec((unsigned long)attr->config, x86_pmu.max_events);
+
/*
* The generic map:
*/
diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c
index 9aca448bb8e6..9f8084f18d58 100644
--- a/arch/x86/events/intel/cstate.c
+++ b/arch/x86/events/intel/cstate.c
@@ -92,6 +92,7 @@
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/perf_event.h>
+#include <linux/nospec.h>
#include <asm/cpu_device_id.h>
#include <asm/intel-family.h>
#include "../perf_event.h"
@@ -302,6 +303,7 @@ static int cstate_pmu_event_init(struct perf_event *event)
} else if (event->pmu == &cstate_pkg_pmu) {
if (cfg >= PERF_CSTATE_PKG_EVENT_MAX)
return -EINVAL;
+ cfg = array_index_nospec((unsigned long)cfg, PERF_CSTATE_PKG_EVENT_MAX);
if (!pkg_msr[cfg].attr)
return -EINVAL;
event->hw.event_base = pkg_msr[cfg].msr;
diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c
index 3b993942a0e4..8d016ce5b80d 100644
--- a/arch/x86/events/intel/pt.c
+++ b/arch/x86/events/intel/pt.c
@@ -1194,7 +1194,7 @@ static int pt_event_addr_filters_validate(struct list_head *filters)
filter->action == PERF_ADDR_FILTER_ACTION_START)
return -EOPNOTSUPP;
- if (!filter->inode) {
+ if (!filter->path.dentry) {
if (!valid_kernel_ip(filter->offset))
return -EINVAL;
@@ -1221,7 +1221,7 @@ static void pt_event_addr_filters_sync(struct perf_event *event)
return;
list_for_each_entry(filter, &head->list, entry) {
- if (filter->inode && !offs[range]) {
+ if (filter->path.dentry && !offs[range]) {
msr_a = msr_b = 0;
} else {
/* apply the offset */
diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c
index a7956fc7ca1d..15b07379e72d 100644
--- a/arch/x86/events/intel/uncore.c
+++ b/arch/x86/events/intel/uncore.c
@@ -203,7 +203,7 @@ static void uncore_assign_hw_event(struct intel_uncore_box *box,
hwc->idx = idx;
hwc->last_tag = ++box->tags[idx];
- if (hwc->idx == UNCORE_PMC_IDX_FIXED) {
+ if (uncore_pmc_fixed(hwc->idx)) {
hwc->event_base = uncore_fixed_ctr(box);
hwc->config_base = uncore_fixed_ctl(box);
return;
@@ -218,7 +218,9 @@ void uncore_perf_event_update(struct intel_uncore_box *box, struct perf_event *e
u64 prev_count, new_count, delta;
int shift;
- if (event->hw.idx >= UNCORE_PMC_IDX_FIXED)
+ if (uncore_pmc_freerunning(event->hw.idx))
+ shift = 64 - uncore_freerunning_bits(box, event);
+ else if (uncore_pmc_fixed(event->hw.idx))
shift = 64 - uncore_fixed_ctr_bits(box);
else
shift = 64 - uncore_perf_ctr_bits(box);
@@ -449,15 +451,30 @@ static int uncore_assign_events(struct intel_uncore_box *box, int assign[], int
return ret ? -EINVAL : 0;
}
-static void uncore_pmu_event_start(struct perf_event *event, int flags)
+void uncore_pmu_event_start(struct perf_event *event, int flags)
{
struct intel_uncore_box *box = uncore_event_to_box(event);
int idx = event->hw.idx;
- if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
+ if (WARN_ON_ONCE(idx == -1 || idx >= UNCORE_PMC_IDX_MAX))
return;
- if (WARN_ON_ONCE(idx == -1 || idx >= UNCORE_PMC_IDX_MAX))
+ /*
+ * Free running counter is read-only and always active.
+ * Use the current counter value as start point.
+ * There is no overflow interrupt for free running counter.
+ * Use hrtimer to periodically poll the counter to avoid overflow.
+ */
+ if (uncore_pmc_freerunning(event->hw.idx)) {
+ list_add_tail(&event->active_entry, &box->active_list);
+ local64_set(&event->hw.prev_count,
+ uncore_read_counter(box, event));
+ if (box->n_active++ == 0)
+ uncore_pmu_start_hrtimer(box);
+ return;
+ }
+
+ if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
return;
event->hw.state = 0;
@@ -474,11 +491,20 @@ static void uncore_pmu_event_start(struct perf_event *event, int flags)
}
}
-static void uncore_pmu_event_stop(struct perf_event *event, int flags)
+void uncore_pmu_event_stop(struct perf_event *event, int flags)
{
struct intel_uncore_box *box = uncore_event_to_box(event);
struct hw_perf_event *hwc = &event->hw;
+ /* Cannot disable free running counter which is read-only */
+ if (uncore_pmc_freerunning(hwc->idx)) {
+ list_del(&event->active_entry);
+ if (--box->n_active == 0)
+ uncore_pmu_cancel_hrtimer(box);
+ uncore_perf_event_update(box, event);
+ return;
+ }
+
if (__test_and_clear_bit(hwc->idx, box->active_mask)) {
uncore_disable_event(box, event);
box->n_active--;
@@ -502,7 +528,7 @@ static void uncore_pmu_event_stop(struct perf_event *event, int flags)
}
}
-static int uncore_pmu_event_add(struct perf_event *event, int flags)
+int uncore_pmu_event_add(struct perf_event *event, int flags)
{
struct intel_uncore_box *box = uncore_event_to_box(event);
struct hw_perf_event *hwc = &event->hw;
@@ -512,6 +538,17 @@ static int uncore_pmu_event_add(struct perf_event *event, int flags)
if (!box)
return -ENODEV;
+ /*
+ * The free funning counter is assigned in event_init().
+ * The free running counter event and free running counter
+ * are 1:1 mapped. It doesn't need to be tracked in event_list.
+ */
+ if (uncore_pmc_freerunning(hwc->idx)) {
+ if (flags & PERF_EF_START)
+ uncore_pmu_event_start(event, 0);
+ return 0;
+ }
+
ret = n = uncore_collect_events(box, event, false);
if (ret < 0)
return ret;
@@ -563,13 +600,21 @@ static int uncore_pmu_event_add(struct perf_event *event, int flags)
return 0;
}
-static void uncore_pmu_event_del(struct perf_event *event, int flags)
+void uncore_pmu_event_del(struct perf_event *event, int flags)
{
struct intel_uncore_box *box = uncore_event_to_box(event);
int i;
uncore_pmu_event_stop(event, PERF_EF_UPDATE);
+ /*
+ * The event for free running counter is not tracked by event_list.
+ * It doesn't need to force event->hw.idx = -1 to reassign the counter.
+ * Because the event and the free running counter are 1:1 mapped.
+ */
+ if (uncore_pmc_freerunning(event->hw.idx))
+ return;
+
for (i = 0; i < box->n_events; i++) {
if (event == box->event_list[i]) {
uncore_put_event_constraint(box, event);
@@ -603,6 +648,10 @@ static int uncore_validate_group(struct intel_uncore_pmu *pmu,
struct intel_uncore_box *fake_box;
int ret = -EINVAL, n;
+ /* The free running counter is always active. */
+ if (uncore_pmc_freerunning(event->hw.idx))
+ return 0;
+
fake_box = uncore_alloc_box(pmu->type, NUMA_NO_NODE);
if (!fake_box)
return -ENOMEM;
@@ -690,6 +739,17 @@ static int uncore_pmu_event_init(struct perf_event *event)
/* fixed counters have event field hardcoded to zero */
hwc->config = 0ULL;
+ } else if (is_freerunning_event(event)) {
+ if (!check_valid_freerunning_event(box, event))
+ return -EINVAL;
+ event->hw.idx = UNCORE_PMC_IDX_FREERUNNING;
+ /*
+ * The free running counter event and free running counter
+ * are always 1:1 mapped.
+ * The free running counter is always active.
+ * Assign the free running counter here.
+ */
+ event->hw.event_base = uncore_freerunning_counter(box, event);
} else {
hwc->config = event->attr.config &
(pmu->type->event_mask | ((u64)pmu->type->event_mask_ext << 32));
diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h
index 414dc7e7c950..c9e1e0bef3c3 100644
--- a/arch/x86/events/intel/uncore.h
+++ b/arch/x86/events/intel/uncore.h
@@ -12,8 +12,13 @@
#define UNCORE_FIXED_EVENT 0xff
#define UNCORE_PMC_IDX_MAX_GENERIC 8
+#define UNCORE_PMC_IDX_MAX_FIXED 1
+#define UNCORE_PMC_IDX_MAX_FREERUNNING 1
#define UNCORE_PMC_IDX_FIXED UNCORE_PMC_IDX_MAX_GENERIC
-#define UNCORE_PMC_IDX_MAX (UNCORE_PMC_IDX_FIXED + 1)
+#define UNCORE_PMC_IDX_FREERUNNING (UNCORE_PMC_IDX_FIXED + \
+ UNCORE_PMC_IDX_MAX_FIXED)
+#define UNCORE_PMC_IDX_MAX (UNCORE_PMC_IDX_FREERUNNING + \
+ UNCORE_PMC_IDX_MAX_FREERUNNING)
#define UNCORE_PCI_DEV_FULL_DATA(dev, func, type, idx) \
((dev << 24) | (func << 16) | (type << 8) | idx)
@@ -35,6 +40,7 @@ struct intel_uncore_ops;
struct intel_uncore_pmu;
struct intel_uncore_box;
struct uncore_event_desc;
+struct freerunning_counters;
struct intel_uncore_type {
const char *name;
@@ -42,6 +48,7 @@ struct intel_uncore_type {
int num_boxes;
int perf_ctr_bits;
int fixed_ctr_bits;
+ int num_freerunning_types;
unsigned perf_ctr;
unsigned event_ctl;
unsigned event_mask;
@@ -59,6 +66,7 @@ struct intel_uncore_type {
struct intel_uncore_pmu *pmus;
struct intel_uncore_ops *ops;
struct uncore_event_desc *event_descs;
+ struct freerunning_counters *freerunning;
const struct attribute_group *attr_groups[4];
struct pmu *pmu; /* for custom pmu ops */
};
@@ -129,6 +137,14 @@ struct uncore_event_desc {
const char *config;
};
+struct freerunning_counters {
+ unsigned int counter_base;
+ unsigned int counter_offset;
+ unsigned int box_offset;
+ unsigned int num_counters;
+ unsigned int bits;
+};
+
struct pci2phy_map {
struct list_head list;
int segment;
@@ -157,6 +173,16 @@ static ssize_t __uncore_##_var##_show(struct kobject *kobj, \
static struct kobj_attribute format_attr_##_var = \
__ATTR(_name, 0444, __uncore_##_var##_show, NULL)
+static inline bool uncore_pmc_fixed(int idx)
+{
+ return idx == UNCORE_PMC_IDX_FIXED;
+}
+
+static inline bool uncore_pmc_freerunning(int idx)
+{
+ return idx == UNCORE_PMC_IDX_FREERUNNING;
+}
+
static inline unsigned uncore_pci_box_ctl(struct intel_uncore_box *box)
{
return box->pmu->type->box_ctl;
@@ -214,6 +240,60 @@ static inline unsigned uncore_msr_fixed_ctr(struct intel_uncore_box *box)
return box->pmu->type->fixed_ctr + uncore_msr_box_offset(box);
}
+
+/*
+ * In the uncore document, there is no event-code assigned to free running
+ * counters. Some events need to be defined to indicate the free running
+ * counters. The events are encoded as event-code + umask-code.
+ *
+ * The event-code for all free running counters is 0xff, which is the same as
+ * the fixed counters.
+ *
+ * The umask-code is used to distinguish a fixed counter and a free running
+ * counter, and different types of free running counters.
+ * - For fixed counters, the umask-code is 0x0X.
+ * X indicates the index of the fixed counter, which starts from 0.
+ * - For free running counters, the umask-code uses the rest of the space.
+ * It would bare the format of 0xXY.
+ * X stands for the type of free running counters, which starts from 1.
+ * Y stands for the index of free running counters of same type, which
+ * starts from 0.
+ *
+ * For example, there are three types of IIO free running counters on Skylake
+ * server, IO CLOCKS counters, BANDWIDTH counters and UTILIZATION counters.
+ * The event-code for all the free running counters is 0xff.
+ * 'ioclk' is the first counter of IO CLOCKS. IO CLOCKS is the first type,
+ * which umask-code starts from 0x10.
+ * So 'ioclk' is encoded as event=0xff,umask=0x10
+ * 'bw_in_port2' is the third counter of BANDWIDTH counters. BANDWIDTH is
+ * the second type, which umask-code starts from 0x20.
+ * So 'bw_in_port2' is encoded as event=0xff,umask=0x22
+ */
+static inline unsigned int uncore_freerunning_idx(u64 config)
+{
+ return ((config >> 8) & 0xf);
+}
+
+#define UNCORE_FREERUNNING_UMASK_START 0x10
+
+static inline unsigned int uncore_freerunning_type(u64 config)
+{
+ return ((((config >> 8) - UNCORE_FREERUNNING_UMASK_START) >> 4) & 0xf);
+}
+
+static inline
+unsigned int uncore_freerunning_counter(struct intel_uncore_box *box,
+ struct perf_event *event)
+{
+ unsigned int type = uncore_freerunning_type(event->attr.config);
+ unsigned int idx = uncore_freerunning_idx(event->attr.config);
+ struct intel_uncore_pmu *pmu = box->pmu;
+
+ return pmu->type->freerunning[type].counter_base +
+ pmu->type->freerunning[type].counter_offset * idx +
+ pmu->type->freerunning[type].box_offset * pmu->pmu_idx;
+}
+
static inline
unsigned uncore_msr_event_ctl(struct intel_uncore_box *box, int idx)
{
@@ -276,11 +356,52 @@ static inline int uncore_fixed_ctr_bits(struct intel_uncore_box *box)
return box->pmu->type->fixed_ctr_bits;
}
+static inline
+unsigned int uncore_freerunning_bits(struct intel_uncore_box *box,
+ struct perf_event *event)
+{
+ unsigned int type = uncore_freerunning_type(event->attr.config);
+
+ return box->pmu->type->freerunning[type].bits;
+}
+
+static inline int uncore_num_freerunning(struct intel_uncore_box *box,
+ struct perf_event *event)
+{
+ unsigned int type = uncore_freerunning_type(event->attr.config);
+
+ return box->pmu->type->freerunning[type].num_counters;
+}
+
+static inline int uncore_num_freerunning_types(struct intel_uncore_box *box,
+ struct perf_event *event)
+{
+ return box->pmu->type->num_freerunning_types;
+}
+
+static inline bool check_valid_freerunning_event(struct intel_uncore_box *box,
+ struct perf_event *event)
+{
+ unsigned int type = uncore_freerunning_type(event->attr.config);
+ unsigned int idx = uncore_freerunning_idx(event->attr.config);
+
+ return (type < uncore_num_freerunning_types(box, event)) &&
+ (idx < uncore_num_freerunning(box, event));
+}
+
static inline int uncore_num_counters(struct intel_uncore_box *box)
{
return box->pmu->type->num_counters;
}
+static inline bool is_freerunning_event(struct perf_event *event)
+{
+ u64 cfg = event->attr.config;
+
+ return ((cfg & UNCORE_FIXED_EVENT) == UNCORE_FIXED_EVENT) &&
+ (((cfg >> 8) & 0xff) >= UNCORE_FREERUNNING_UMASK_START);
+}
+
static inline void uncore_disable_box(struct intel_uncore_box *box)
{
if (box->pmu->type->ops->disable_box)
@@ -346,6 +467,10 @@ struct intel_uncore_box *uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu
u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_event *event);
void uncore_pmu_start_hrtimer(struct intel_uncore_box *box);
void uncore_pmu_cancel_hrtimer(struct intel_uncore_box *box);
+void uncore_pmu_event_start(struct perf_event *event, int flags);
+void uncore_pmu_event_stop(struct perf_event *event, int flags);
+int uncore_pmu_event_add(struct perf_event *event, int flags);
+void uncore_pmu_event_del(struct perf_event *event, int flags);
void uncore_pmu_event_read(struct perf_event *event);
void uncore_perf_event_update(struct intel_uncore_box *box, struct perf_event *event);
struct event_constraint *
diff --git a/arch/x86/events/intel/uncore_nhmex.c b/arch/x86/events/intel/uncore_nhmex.c
index 93e7a8397cde..173e2674be6e 100644
--- a/arch/x86/events/intel/uncore_nhmex.c
+++ b/arch/x86/events/intel/uncore_nhmex.c
@@ -246,7 +246,7 @@ static void nhmex_uncore_msr_enable_event(struct intel_uncore_box *box, struct p
{
struct hw_perf_event *hwc = &event->hw;
- if (hwc->idx >= UNCORE_PMC_IDX_FIXED)
+ if (hwc->idx == UNCORE_PMC_IDX_FIXED)
wrmsrl(hwc->config_base, NHMEX_PMON_CTL_EN_BIT0);
else if (box->pmu->type->event_mask & NHMEX_PMON_CTL_EN_BIT0)
wrmsrl(hwc->config_base, hwc->config | NHMEX_PMON_CTL_EN_BIT22);
diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c
index aee5e8496be4..8527c3e1038b 100644
--- a/arch/x86/events/intel/uncore_snb.c
+++ b/arch/x86/events/intel/uncore_snb.c
@@ -285,6 +285,15 @@ static struct uncore_event_desc snb_uncore_imc_events[] = {
#define SNB_UNCORE_PCI_IMC_DATA_WRITES_BASE 0x5054
#define SNB_UNCORE_PCI_IMC_CTR_BASE SNB_UNCORE_PCI_IMC_DATA_READS_BASE
+enum perf_snb_uncore_imc_freerunning_types {
+ SNB_PCI_UNCORE_IMC_DATA = 0,
+ SNB_PCI_UNCORE_IMC_FREERUNNING_TYPE_MAX,
+};
+
+static struct freerunning_counters snb_uncore_imc_freerunning[] = {
+ [SNB_PCI_UNCORE_IMC_DATA] = { SNB_UNCORE_PCI_IMC_DATA_READS_BASE, 0x4, 0x0, 2, 32 },
+};
+
static struct attribute *snb_uncore_imc_formats_attr[] = {
&format_attr_event.attr,
NULL,
@@ -341,9 +350,8 @@ static u64 snb_uncore_imc_read_counter(struct intel_uncore_box *box, struct perf
}
/*
- * custom event_init() function because we define our own fixed, free
- * running counters, so we do not want to conflict with generic uncore
- * logic. Also simplifies processing
+ * Keep the custom event_init() function compatible with old event
+ * encoding for free running counters.
*/
static int snb_uncore_imc_event_init(struct perf_event *event)
{
@@ -405,11 +413,11 @@ static int snb_uncore_imc_event_init(struct perf_event *event)
switch (cfg) {
case SNB_UNCORE_PCI_IMC_DATA_READS:
base = SNB_UNCORE_PCI_IMC_DATA_READS_BASE;
- idx = UNCORE_PMC_IDX_FIXED;
+ idx = UNCORE_PMC_IDX_FREERUNNING;
break;
case SNB_UNCORE_PCI_IMC_DATA_WRITES:
base = SNB_UNCORE_PCI_IMC_DATA_WRITES_BASE;
- idx = UNCORE_PMC_IDX_FIXED + 1;
+ idx = UNCORE_PMC_IDX_FREERUNNING;
break;
default:
return -EINVAL;
@@ -430,75 +438,6 @@ static int snb_uncore_imc_hw_config(struct intel_uncore_box *box, struct perf_ev
return 0;
}
-static void snb_uncore_imc_event_start(struct perf_event *event, int flags)
-{
- struct intel_uncore_box *box = uncore_event_to_box(event);
- u64 count;
-
- if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
- return;
-
- event->hw.state = 0;
- box->n_active++;
-
- list_add_tail(&event->active_entry, &box->active_list);
-
- count = snb_uncore_imc_read_counter(box, event);
- local64_set(&event->hw.prev_count, count);
-
- if (box->n_active == 1)
- uncore_pmu_start_hrtimer(box);
-}
-
-static void snb_uncore_imc_event_stop(struct perf_event *event, int flags)
-{
- struct intel_uncore_box *box = uncore_event_to_box(event);
- struct hw_perf_event *hwc = &event->hw;
-
- if (!(hwc->state & PERF_HES_STOPPED)) {
- box->n_active--;
-
- WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
- hwc->state |= PERF_HES_STOPPED;
-
- list_del(&event->active_entry);
-
- if (box->n_active == 0)
- uncore_pmu_cancel_hrtimer(box);
- }
-
- if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
- /*
- * Drain the remaining delta count out of a event
- * that we are disabling:
- */
- uncore_perf_event_update(box, event);
- hwc->state |= PERF_HES_UPTODATE;
- }
-}
-
-static int snb_uncore_imc_event_add(struct perf_event *event, int flags)
-{
- struct intel_uncore_box *box = uncore_event_to_box(event);
- struct hw_perf_event *hwc = &event->hw;
-
- if (!box)
- return -ENODEV;
-
- hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
- if (!(flags & PERF_EF_START))
- hwc->state |= PERF_HES_ARCH;
-
- snb_uncore_imc_event_start(event, 0);
-
- return 0;
-}
-
-static void snb_uncore_imc_event_del(struct perf_event *event, int flags)
-{
- snb_uncore_imc_event_stop(event, PERF_EF_UPDATE);
-}
-
int snb_pci2phy_map_init(int devid)
{
struct pci_dev *dev = NULL;
@@ -530,10 +469,10 @@ int snb_pci2phy_map_init(int devid)
static struct pmu snb_uncore_imc_pmu = {
.task_ctx_nr = perf_invalid_context,
.event_init = snb_uncore_imc_event_init,
- .add = snb_uncore_imc_event_add,
- .del = snb_uncore_imc_event_del,
- .start = snb_uncore_imc_event_start,
- .stop = snb_uncore_imc_event_stop,
+ .add = uncore_pmu_event_add,
+ .del = uncore_pmu_event_del,
+ .start = uncore_pmu_event_start,
+ .stop = uncore_pmu_event_stop,
.read = uncore_pmu_event_read,
};
@@ -552,12 +491,10 @@ static struct intel_uncore_type snb_uncore_imc = {
.name = "imc",
.num_counters = 2,
.num_boxes = 1,
- .fixed_ctr_bits = 32,
- .fixed_ctr = SNB_UNCORE_PCI_IMC_CTR_BASE,
+ .num_freerunning_types = SNB_PCI_UNCORE_IMC_FREERUNNING_TYPE_MAX,
+ .freerunning = snb_uncore_imc_freerunning,
.event_descs = snb_uncore_imc_events,
.format_group = &snb_uncore_imc_format_group,
- .perf_ctr = SNB_UNCORE_PCI_IMC_DATA_READS_BASE,
- .event_mask = SNB_UNCORE_PCI_IMC_EVENT_MASK,
.ops = &snb_uncore_imc_ops,
.pmu = &snb_uncore_imc_pmu,
};
diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c
index 77076a102e34..87dc0263a2e1 100644
--- a/arch/x86/events/intel/uncore_snbep.c
+++ b/arch/x86/events/intel/uncore_snbep.c
@@ -3522,6 +3522,87 @@ static struct intel_uncore_type skx_uncore_iio = {
.format_group = &skx_uncore_iio_format_group,
};
+enum perf_uncore_iio_freerunning_type_id {
+ SKX_IIO_MSR_IOCLK = 0,
+ SKX_IIO_MSR_BW = 1,
+ SKX_IIO_MSR_UTIL = 2,
+
+ SKX_IIO_FREERUNNING_TYPE_MAX,
+};
+
+
+static struct freerunning_counters skx_iio_freerunning[] = {
+ [SKX_IIO_MSR_IOCLK] = { 0xa45, 0x1, 0x20, 1, 36 },
+ [SKX_IIO_MSR_BW] = { 0xb00, 0x1, 0x10, 8, 36 },
+ [SKX_IIO_MSR_UTIL] = { 0xb08, 0x1, 0x10, 8, 36 },
+};
+
+static struct uncore_event_desc skx_uncore_iio_freerunning_events[] = {
+ /* Free-Running IO CLOCKS Counter */
+ INTEL_UNCORE_EVENT_DESC(ioclk, "event=0xff,umask=0x10"),
+ /* Free-Running IIO BANDWIDTH Counters */
+ INTEL_UNCORE_EVENT_DESC(bw_in_port0, "event=0xff,umask=0x20"),
+ INTEL_UNCORE_EVENT_DESC(bw_in_port0.scale, "3.814697266e-6"),
+ INTEL_UNCORE_EVENT_DESC(bw_in_port0.unit, "MiB"),
+ INTEL_UNCORE_EVENT_DESC(bw_in_port1, "event=0xff,umask=0x21"),
+ INTEL_UNCORE_EVENT_DESC(bw_in_port1.scale, "3.814697266e-6"),
+ INTEL_UNCORE_EVENT_DESC(bw_in_port1.unit, "MiB"),
+ INTEL_UNCORE_EVENT_DESC(bw_in_port2, "event=0xff,umask=0x22"),
+ INTEL_UNCORE_EVENT_DESC(bw_in_port2.scale, "3.814697266e-6"),
+ INTEL_UNCORE_EVENT_DESC(bw_in_port2.unit, "MiB"),
+ INTEL_UNCORE_EVENT_DESC(bw_in_port3, "event=0xff,umask=0x23"),
+ INTEL_UNCORE_EVENT_DESC(bw_in_port3.scale, "3.814697266e-6"),
+ INTEL_UNCORE_EVENT_DESC(bw_in_port3.unit, "MiB"),
+ INTEL_UNCORE_EVENT_DESC(bw_out_port0, "event=0xff,umask=0x24"),
+ INTEL_UNCORE_EVENT_DESC(bw_out_port0.scale, "3.814697266e-6"),
+ INTEL_UNCORE_EVENT_DESC(bw_out_port0.unit, "MiB"),
+ INTEL_UNCORE_EVENT_DESC(bw_out_port1, "event=0xff,umask=0x25"),
+ INTEL_UNCORE_EVENT_DESC(bw_out_port1.scale, "3.814697266e-6"),
+ INTEL_UNCORE_EVENT_DESC(bw_out_port1.unit, "MiB"),
+ INTEL_UNCORE_EVENT_DESC(bw_out_port2, "event=0xff,umask=0x26"),
+ INTEL_UNCORE_EVENT_DESC(bw_out_port2.scale, "3.814697266e-6"),
+ INTEL_UNCORE_EVENT_DESC(bw_out_port2.unit, "MiB"),
+ INTEL_UNCORE_EVENT_DESC(bw_out_port3, "event=0xff,umask=0x27"),
+ INTEL_UNCORE_EVENT_DESC(bw_out_port3.scale, "3.814697266e-6"),
+ INTEL_UNCORE_EVENT_DESC(bw_out_port3.unit, "MiB"),
+ /* Free-running IIO UTILIZATION Counters */
+ INTEL_UNCORE_EVENT_DESC(util_in_port0, "event=0xff,umask=0x30"),
+ INTEL_UNCORE_EVENT_DESC(util_out_port0, "event=0xff,umask=0x31"),
+ INTEL_UNCORE_EVENT_DESC(util_in_port1, "event=0xff,umask=0x32"),
+ INTEL_UNCORE_EVENT_DESC(util_out_port1, "event=0xff,umask=0x33"),
+ INTEL_UNCORE_EVENT_DESC(util_in_port2, "event=0xff,umask=0x34"),
+ INTEL_UNCORE_EVENT_DESC(util_out_port2, "event=0xff,umask=0x35"),
+ INTEL_UNCORE_EVENT_DESC(util_in_port3, "event=0xff,umask=0x36"),
+ INTEL_UNCORE_EVENT_DESC(util_out_port3, "event=0xff,umask=0x37"),
+ { /* end: all zeroes */ },
+};
+
+static struct intel_uncore_ops skx_uncore_iio_freerunning_ops = {
+ .read_counter = uncore_msr_read_counter,
+};
+
+static struct attribute *skx_uncore_iio_freerunning_formats_attr[] = {
+ &format_attr_event.attr,
+ &format_attr_umask.attr,
+ NULL,
+};
+
+static const struct attribute_group skx_uncore_iio_freerunning_format_group = {
+ .name = "format",
+ .attrs = skx_uncore_iio_freerunning_formats_attr,
+};
+
+static struct intel_uncore_type skx_uncore_iio_free_running = {
+ .name = "iio_free_running",
+ .num_counters = 17,
+ .num_boxes = 6,
+ .num_freerunning_types = SKX_IIO_FREERUNNING_TYPE_MAX,
+ .freerunning = skx_iio_freerunning,
+ .ops = &skx_uncore_iio_freerunning_ops,
+ .event_descs = skx_uncore_iio_freerunning_events,
+ .format_group = &skx_uncore_iio_freerunning_format_group,
+};
+
static struct attribute *skx_uncore_formats_attr[] = {
&format_attr_event.attr,
&format_attr_umask.attr,
@@ -3595,6 +3676,7 @@ static struct intel_uncore_type *skx_msr_uncores[] = {
&skx_uncore_ubox,
&skx_uncore_chabox,
&skx_uncore_iio,
+ &skx_uncore_iio_free_running,
&skx_uncore_irp,
&skx_uncore_pcu,
NULL,
diff --git a/arch/x86/events/msr.c b/arch/x86/events/msr.c
index e7edf19e64c2..b4771a6ddbc1 100644
--- a/arch/x86/events/msr.c
+++ b/arch/x86/events/msr.c
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/perf_event.h>
+#include <linux/nospec.h>
#include <asm/intel-family.h>
enum perf_msr_id {
@@ -158,9 +159,6 @@ static int msr_event_init(struct perf_event *event)
if (event->attr.type != event->pmu->type)
return -ENOENT;
- if (cfg >= PERF_MSR_EVENT_MAX)
- return -EINVAL;
-
/* unsupported modes and filters */
if (event->attr.exclude_user ||
event->attr.exclude_kernel ||
@@ -171,6 +169,11 @@ static int msr_event_init(struct perf_event *event)
event->attr.sample_period) /* no sampling */
return -EINVAL;
+ if (cfg >= PERF_MSR_EVENT_MAX)
+ return -EINVAL;
+
+ cfg = array_index_nospec((unsigned long)cfg, PERF_MSR_EVENT_MAX);
+
if (!msr[cfg].attr)
return -EINVAL;
diff --git a/arch/x86/include/asm/cacheinfo.h b/arch/x86/include/asm/cacheinfo.h
new file mode 100644
index 000000000000..e958e28f7ab5
--- /dev/null
+++ b/arch/x86/include/asm/cacheinfo.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_CACHEINFO_H
+#define _ASM_X86_CACHEINFO_H
+
+void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, int cpu, u8 node_id);
+
+#endif /* _ASM_X86_CACHEINFO_H */
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index b27da9602a6d..aced6c9290d6 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -140,6 +140,20 @@ extern void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit);
#define setup_force_cpu_bug(bit) setup_force_cpu_cap(bit)
+#if defined(__clang__) && !defined(CC_HAVE_ASM_GOTO)
+
+/*
+ * Workaround for the sake of BPF compilation which utilizes kernel
+ * headers, but clang does not support ASM GOTO and fails the build.
+ */
+#ifndef __BPF_TRACING__
+#warning "Compiler lacks ASM_GOTO support. Add -D __BPF_TRACING__ to your compiler arguments"
+#endif
+
+#define static_cpu_has(bit) boot_cpu_has(bit)
+
+#else
+
/*
* Static testing of CPU features. Used the same as boot_cpu_has().
* These will statically patch the target code for additional
@@ -195,6 +209,7 @@ t_no:
boot_cpu_has(bit) : \
_static_cpu_has(bit) \
)
+#endif
#define cpu_has_bug(c, bit) cpu_has(c, (bit))
#define set_cpu_bug(c, bit) set_cpu_cap(c, (bit))
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 578793e97431..fb00a2fca990 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -198,7 +198,6 @@
#define X86_FEATURE_CAT_L2 ( 7*32+ 5) /* Cache Allocation Technology L2 */
#define X86_FEATURE_CDP_L3 ( 7*32+ 6) /* Code and Data Prioritization L3 */
#define X86_FEATURE_INVPCID_SINGLE ( 7*32+ 7) /* Effectively INVPCID && CR4.PCIDE=1 */
-
#define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */
#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
#define X86_FEATURE_SME ( 7*32+10) /* AMD Secure Memory Encryption */
@@ -207,13 +206,19 @@
#define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* "" AMD Retpoline mitigation for Spectre variant 2 */
#define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */
#define X86_FEATURE_CDP_L2 ( 7*32+15) /* Code and Data Prioritization L2 */
-
+#define X86_FEATURE_MSR_SPEC_CTRL ( 7*32+16) /* "" MSR SPEC_CTRL is implemented */
+#define X86_FEATURE_SSBD ( 7*32+17) /* Speculative Store Bypass Disable */
#define X86_FEATURE_MBA ( 7*32+18) /* Memory Bandwidth Allocation */
#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* "" Fill RSB on context switches */
#define X86_FEATURE_SEV ( 7*32+20) /* AMD Secure Encrypted Virtualization */
-
#define X86_FEATURE_USE_IBPB ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */
#define X86_FEATURE_USE_IBRS_FW ( 7*32+22) /* "" Use IBRS during runtime firmware calls */
+#define X86_FEATURE_SPEC_STORE_BYPASS_DISABLE ( 7*32+23) /* "" Disable Speculative Store Bypass. */
+#define X86_FEATURE_LS_CFG_SSBD ( 7*32+24) /* "" AMD SSBD implementation via LS_CFG MSR */
+#define X86_FEATURE_IBRS ( 7*32+25) /* Indirect Branch Restricted Speculation */
+#define X86_FEATURE_IBPB ( 7*32+26) /* Indirect Branch Prediction Barrier */
+#define X86_FEATURE_STIBP ( 7*32+27) /* Single Thread Indirect Branch Predictors */
+#define X86_FEATURE_ZEN ( 7*32+28) /* "" CPU is AMD family 0x17 (Zen) */
/* Virtualization flags: Linux defined, word 8 */
#define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */
@@ -274,9 +279,10 @@
#define X86_FEATURE_CLZERO (13*32+ 0) /* CLZERO instruction */
#define X86_FEATURE_IRPERF (13*32+ 1) /* Instructions Retired Count */
#define X86_FEATURE_XSAVEERPTR (13*32+ 2) /* Always save/restore FP error pointers */
-#define X86_FEATURE_IBPB (13*32+12) /* Indirect Branch Prediction Barrier */
-#define X86_FEATURE_IBRS (13*32+14) /* Indirect Branch Restricted Speculation */
-#define X86_FEATURE_STIBP (13*32+15) /* Single Thread Indirect Branch Predictors */
+#define X86_FEATURE_AMD_IBPB (13*32+12) /* "" Indirect Branch Prediction Barrier */
+#define X86_FEATURE_AMD_IBRS (13*32+14) /* "" Indirect Branch Restricted Speculation */
+#define X86_FEATURE_AMD_STIBP (13*32+15) /* "" Single Thread Indirect Branch Predictors */
+#define X86_FEATURE_VIRT_SSBD (13*32+25) /* Virtualized Speculative Store Bypass Disable */
/* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */
#define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */
@@ -334,6 +340,7 @@
#define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */
#define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */
#define X86_FEATURE_ARCH_CAPABILITIES (18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */
+#define X86_FEATURE_SPEC_CTRL_SSBD (18*32+31) /* "" Speculative Store Bypass Disable */
/*
* BUG word(s)
@@ -363,5 +370,6 @@
#define X86_BUG_CPU_MELTDOWN X86_BUG(14) /* CPU is affected by meltdown attack and needs kernel page table isolation */
#define X86_BUG_SPECTRE_V1 X86_BUG(15) /* CPU is affected by Spectre variant 1 attack with conditional branches */
#define X86_BUG_SPECTRE_V2 X86_BUG(16) /* CPU is affected by Spectre variant 2 attack with indirect branches */
+#define X86_BUG_SPEC_STORE_BYPASS X86_BUG(17) /* CPU is affected by speculative store bypass attack */
#endif /* _ASM_X86_CPUFEATURES_H */
diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h
index 89ce4bfd241f..ce4d176b3d13 100644
--- a/arch/x86/include/asm/dma-mapping.h
+++ b/arch/x86/include/asm/dma-mapping.h
@@ -30,10 +30,7 @@ static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus)
return dma_ops;
}
-int arch_dma_supported(struct device *dev, u64 mask);
-#define arch_dma_supported arch_dma_supported
-
-bool arch_dma_alloc_attrs(struct device **dev, gfp_t *gfp);
+bool arch_dma_alloc_attrs(struct device **dev);
#define arch_dma_alloc_attrs arch_dma_alloc_attrs
#endif
diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h
index 5ea2afd4c871..740a428acf1e 100644
--- a/arch/x86/include/asm/hardirq.h
+++ b/arch/x86/include/asm/hardirq.h
@@ -50,14 +50,6 @@ DECLARE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
#define inc_irq_stat(member) this_cpu_inc(irq_stat.member)
-#define local_softirq_pending() this_cpu_read(irq_stat.__softirq_pending)
-
-#define __ARCH_SET_SOFTIRQ_PENDING
-
-#define set_softirq_pending(x) \
- this_cpu_write(irq_stat.__softirq_pending, (x))
-#define or_softirq_pending(x) this_cpu_or(irq_stat.__softirq_pending, (x))
-
extern void ack_bad_irq(unsigned int irq);
extern u64 arch_irq_stat_cpu(unsigned int cpu);
diff --git a/arch/x86/include/asm/insn.h b/arch/x86/include/asm/insn.h
index b3e32b010ab1..c2c01f84df75 100644
--- a/arch/x86/include/asm/insn.h
+++ b/arch/x86/include/asm/insn.h
@@ -208,4 +208,22 @@ static inline int insn_offset_immediate(struct insn *insn)
return insn_offset_displacement(insn) + insn->displacement.nbytes;
}
+#define POP_SS_OPCODE 0x1f
+#define MOV_SREG_OPCODE 0x8e
+
+/*
+ * Intel SDM Vol.3A 6.8.3 states;
+ * "Any single-step trap that would be delivered following the MOV to SS
+ * instruction or POP to SS instruction (because EFLAGS.TF is 1) is
+ * suppressed."
+ * This function returns true if @insn is MOV SS or POP SS. On these
+ * instructions, single stepping is suppressed.
+ */
+static inline int insn_masking_exception(struct insn *insn)
+{
+ return insn->opcode.bytes[0] == POP_SS_OPCODE ||
+ (insn->opcode.bytes[0] == MOV_SREG_OPCODE &&
+ X86_MODRM_REG(insn->modrm.bytes[0]) == 2);
+}
+
#endif /* _ASM_X86_INSN_H */
diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
index f6e5b9375d8c..6de64840dd22 100644
--- a/arch/x86/include/asm/io.h
+++ b/arch/x86/include/asm/io.h
@@ -94,10 +94,10 @@ build_mmio_write(__writel, "l", unsigned int, "r", )
#ifdef CONFIG_X86_64
-build_mmio_read(readq, "q", unsigned long, "=r", :"memory")
-build_mmio_read(__readq, "q", unsigned long, "=r", )
-build_mmio_write(writeq, "q", unsigned long, "r", :"memory")
-build_mmio_write(__writeq, "q", unsigned long, "r", )
+build_mmio_read(readq, "q", u64, "=r", :"memory")
+build_mmio_read(__readq, "q", u64, "=r", )
+build_mmio_write(writeq, "q", u64, "r", :"memory")
+build_mmio_write(__writeq, "q", u64, "r", )
#define readq_relaxed(a) __readq(a)
#define writeq_relaxed(v, a) __writeq(v, a)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index c25775fad4ed..f4b2588865e9 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -924,7 +924,7 @@ struct kvm_x86_ops {
int (*hardware_setup)(void); /* __init */
void (*hardware_unsetup)(void); /* __exit */
bool (*cpu_has_accelerated_tpr)(void);
- bool (*cpu_has_high_real_mode_segbase)(void);
+ bool (*has_emulated_msr)(int index);
void (*cpuid_update)(struct kvm_vcpu *vcpu);
struct kvm *(*vm_alloc)(void);
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index 57e3785d0d26..cf9911b5a53c 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -193,7 +193,7 @@ static inline int init_new_context(struct task_struct *tsk,
#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
if (cpu_feature_enabled(X86_FEATURE_OSPKE)) {
- /* pkey 0 is the default and always allocated */
+ /* pkey 0 is the default and allocated implicitly */
mm->context.pkey_allocation_map = 0x1;
/* -1 means unallocated or invalid */
mm->context.execute_only_pkey = -1;
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 53d5b1b9255e..fda2114197b3 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -42,6 +42,8 @@
#define MSR_IA32_SPEC_CTRL 0x00000048 /* Speculation Control */
#define SPEC_CTRL_IBRS (1 << 0) /* Indirect Branch Restricted Speculation */
#define SPEC_CTRL_STIBP (1 << 1) /* Single Thread Indirect Branch Predictors */
+#define SPEC_CTRL_SSBD_SHIFT 2 /* Speculative Store Bypass Disable bit */
+#define SPEC_CTRL_SSBD (1 << SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */
#define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */
#define PRED_CMD_IBPB (1 << 0) /* Indirect Branch Prediction Barrier */
@@ -68,6 +70,11 @@
#define MSR_IA32_ARCH_CAPABILITIES 0x0000010a
#define ARCH_CAP_RDCL_NO (1 << 0) /* Not susceptible to Meltdown */
#define ARCH_CAP_IBRS_ALL (1 << 1) /* Enhanced IBRS support */
+#define ARCH_CAP_SSB_NO (1 << 4) /*
+ * Not susceptible to Speculative Store Bypass
+ * attack, so no Speculative Store Bypass
+ * control required.
+ */
#define MSR_IA32_BBL_CR_CTL 0x00000119
#define MSR_IA32_BBL_CR_CTL3 0x0000011e
@@ -340,6 +347,8 @@
#define MSR_AMD64_SEV_ENABLED_BIT 0
#define MSR_AMD64_SEV_ENABLED BIT_ULL(MSR_AMD64_SEV_ENABLED_BIT)
+#define MSR_AMD64_VIRT_SPEC_CTRL 0xc001011f
+
/* Fam 17h MSRs */
#define MSR_F17H_IRPERF 0xc00000e9
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index f928ad9b143f..8b38df98548e 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -217,6 +217,14 @@ enum spectre_v2_mitigation {
SPECTRE_V2_IBRS,
};
+/* The Speculative Store Bypass disable variants */
+enum ssb_mitigation {
+ SPEC_STORE_BYPASS_NONE,
+ SPEC_STORE_BYPASS_DISABLE,
+ SPEC_STORE_BYPASS_PRCTL,
+ SPEC_STORE_BYPASS_SECCOMP,
+};
+
extern char __indirect_thunk_start[];
extern char __indirect_thunk_end[];
@@ -241,22 +249,27 @@ static inline void vmexit_fill_RSB(void)
#endif
}
-#define alternative_msr_write(_msr, _val, _feature) \
- asm volatile(ALTERNATIVE("", \
- "movl %[msr], %%ecx\n\t" \
- "movl %[val], %%eax\n\t" \
- "movl $0, %%edx\n\t" \
- "wrmsr", \
- _feature) \
- : : [msr] "i" (_msr), [val] "i" (_val) \
- : "eax", "ecx", "edx", "memory")
+static __always_inline
+void alternative_msr_write(unsigned int msr, u64 val, unsigned int feature)
+{
+ asm volatile(ALTERNATIVE("", "wrmsr", %c[feature])
+ : : "c" (msr),
+ "a" ((u32)val),
+ "d" ((u32)(val >> 32)),
+ [feature] "i" (feature)
+ : "memory");
+}
static inline void indirect_branch_prediction_barrier(void)
{
- alternative_msr_write(MSR_IA32_PRED_CMD, PRED_CMD_IBPB,
- X86_FEATURE_USE_IBPB);
+ u64 val = PRED_CMD_IBPB;
+
+ alternative_msr_write(MSR_IA32_PRED_CMD, val, X86_FEATURE_USE_IBPB);
}
+/* The Intel SPEC CTRL MSR base value cache */
+extern u64 x86_spec_ctrl_base;
+
/*
* With retpoline, we must use IBRS to restrict branch prediction
* before calling into firmware.
@@ -265,14 +278,18 @@ static inline void indirect_branch_prediction_barrier(void)
*/
#define firmware_restrict_branch_speculation_start() \
do { \
+ u64 val = x86_spec_ctrl_base | SPEC_CTRL_IBRS; \
+ \
preempt_disable(); \
- alternative_msr_write(MSR_IA32_SPEC_CTRL, SPEC_CTRL_IBRS, \
+ alternative_msr_write(MSR_IA32_SPEC_CTRL, val, \
X86_FEATURE_USE_IBRS_FW); \
} while (0)
#define firmware_restrict_branch_speculation_end() \
do { \
- alternative_msr_write(MSR_IA32_SPEC_CTRL, 0, \
+ u64 val = x86_spec_ctrl_base; \
+ \
+ alternative_msr_write(MSR_IA32_SPEC_CTRL, val, \
X86_FEATURE_USE_IBRS_FW); \
preempt_enable(); \
} while (0)
diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h
index 2c5a966dc222..6afac386a434 100644
--- a/arch/x86/include/asm/page_64_types.h
+++ b/arch/x86/include/asm/page_64_types.h
@@ -53,7 +53,7 @@
#define __PHYSICAL_MASK_SHIFT 52
#ifdef CONFIG_X86_5LEVEL
-#define __VIRTUAL_MASK_SHIFT (pgtable_l5_enabled ? 56 : 47)
+#define __VIRTUAL_MASK_SHIFT (pgtable_l5_enabled() ? 56 : 47)
#else
#define __VIRTUAL_MASK_SHIFT 47
#endif
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 9be2bf13825b..d49bbf4bb5c8 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -574,14 +574,14 @@ static inline void __set_pgd(pgd_t *pgdp, pgd_t pgd)
}
#define set_pgd(pgdp, pgdval) do { \
- if (pgtable_l5_enabled) \
+ if (pgtable_l5_enabled()) \
__set_pgd(pgdp, pgdval); \
else \
set_p4d((p4d_t *)(pgdp), (p4d_t) { (pgdval).pgd }); \
} while (0)
#define pgd_clear(pgdp) do { \
- if (pgtable_l5_enabled) \
+ if (pgtable_l5_enabled()) \
set_pgd(pgdp, __pgd(0)); \
} while (0)
diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h
index d32175e30259..662963681ea6 100644
--- a/arch/x86/include/asm/pci.h
+++ b/arch/x86/include/asm/pci.h
@@ -117,9 +117,6 @@ void native_restore_msi_irqs(struct pci_dev *dev);
#define native_setup_msi_irqs NULL
#define native_teardown_msi_irq NULL
#endif
-
-#define PCI_DMA_BUS_IS_PHYS (dma_ops->is_phys)
-
#endif /* __KERNEL__ */
#ifdef CONFIG_X86_64
diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h
index 263c142a6a6c..ada6410fd2ec 100644
--- a/arch/x86/include/asm/pgalloc.h
+++ b/arch/x86/include/asm/pgalloc.h
@@ -167,7 +167,7 @@ static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud,
#if CONFIG_PGTABLE_LEVELS > 4
static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, p4d_t *p4d)
{
- if (!pgtable_l5_enabled)
+ if (!pgtable_l5_enabled())
return;
paravirt_alloc_p4d(mm, __pa(p4d) >> PAGE_SHIFT);
set_pgd(pgd, __pgd(_PAGE_TABLE | __pa(p4d)));
@@ -193,7 +193,7 @@ extern void ___p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d);
static inline void __p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d,
unsigned long address)
{
- if (pgtable_l5_enabled)
+ if (pgtable_l5_enabled())
___p4d_free_tlb(tlb, p4d);
}
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index f1633de5a675..99ecde23c3ec 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -65,7 +65,7 @@ extern pmdval_t early_pmd_flags;
#ifndef __PAGETABLE_P4D_FOLDED
#define set_pgd(pgdp, pgd) native_set_pgd(pgdp, pgd)
-#define pgd_clear(pgd) (pgtable_l5_enabled ? native_pgd_clear(pgd) : 0)
+#define pgd_clear(pgd) (pgtable_l5_enabled() ? native_pgd_clear(pgd) : 0)
#endif
#ifndef set_p4d
@@ -881,7 +881,7 @@ static inline unsigned long p4d_index(unsigned long address)
#if CONFIG_PGTABLE_LEVELS > 4
static inline int pgd_present(pgd_t pgd)
{
- if (!pgtable_l5_enabled)
+ if (!pgtable_l5_enabled())
return 1;
return pgd_flags(pgd) & _PAGE_PRESENT;
}
@@ -898,9 +898,9 @@ static inline unsigned long pgd_page_vaddr(pgd_t pgd)
#define pgd_page(pgd) pfn_to_page(pgd_pfn(pgd))
/* to find an entry in a page-table-directory. */
-static inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address)
+static __always_inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address)
{
- if (!pgtable_l5_enabled)
+ if (!pgtable_l5_enabled())
return (p4d_t *)pgd;
return (p4d_t *)pgd_page_vaddr(*pgd) + p4d_index(address);
}
@@ -909,7 +909,7 @@ static inline int pgd_bad(pgd_t pgd)
{
unsigned long ignore_flags = _PAGE_USER;
- if (!pgtable_l5_enabled)
+ if (!pgtable_l5_enabled())
return 0;
if (IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION))
@@ -920,7 +920,7 @@ static inline int pgd_bad(pgd_t pgd)
static inline int pgd_none(pgd_t pgd)
{
- if (!pgtable_l5_enabled)
+ if (!pgtable_l5_enabled())
return 0;
/*
* There is no need to do a workaround for the KNL stray
diff --git a/arch/x86/include/asm/pgtable_32_types.h b/arch/x86/include/asm/pgtable_32_types.h
index e3225e83db7d..d9a001a4a872 100644
--- a/arch/x86/include/asm/pgtable_32_types.h
+++ b/arch/x86/include/asm/pgtable_32_types.h
@@ -15,7 +15,7 @@
# include <asm/pgtable-2level_types.h>
#endif
-#define pgtable_l5_enabled 0
+#define pgtable_l5_enabled() 0
#define PGDIR_SIZE (1UL << PGDIR_SHIFT)
#define PGDIR_MASK (~(PGDIR_SIZE - 1))
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index 877bc27718ae..3c5385f9a88f 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -220,7 +220,7 @@ static inline void native_set_p4d(p4d_t *p4dp, p4d_t p4d)
{
pgd_t pgd;
- if (pgtable_l5_enabled || !IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION)) {
+ if (pgtable_l5_enabled() || !IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION)) {
*p4dp = p4d;
return;
}
diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h
index adb47552e6bb..054765ab2da2 100644
--- a/arch/x86/include/asm/pgtable_64_types.h
+++ b/arch/x86/include/asm/pgtable_64_types.h
@@ -22,12 +22,23 @@ typedef struct { pteval_t pte; } pte_t;
#ifdef CONFIG_X86_5LEVEL
extern unsigned int __pgtable_l5_enabled;
-#ifndef pgtable_l5_enabled
-#define pgtable_l5_enabled cpu_feature_enabled(X86_FEATURE_LA57)
-#endif
+
+#ifdef USE_EARLY_PGTABLE_L5
+/*
+ * cpu_feature_enabled() is not available in early boot code.
+ * Use variable instead.
+ */
+static inline bool pgtable_l5_enabled(void)
+{
+ return __pgtable_l5_enabled;
+}
#else
-#define pgtable_l5_enabled 0
-#endif
+#define pgtable_l5_enabled() cpu_feature_enabled(X86_FEATURE_LA57)
+#endif /* USE_EARLY_PGTABLE_L5 */
+
+#else
+#define pgtable_l5_enabled() 0
+#endif /* CONFIG_X86_5LEVEL */
extern unsigned int pgdir_shift;
extern unsigned int ptrs_per_p4d;
@@ -102,7 +113,7 @@ extern unsigned int ptrs_per_p4d;
#define LDT_PGD_ENTRY_L4 -3UL
#define LDT_PGD_ENTRY_L5 -112UL
-#define LDT_PGD_ENTRY (pgtable_l5_enabled ? LDT_PGD_ENTRY_L5 : LDT_PGD_ENTRY_L4)
+#define LDT_PGD_ENTRY (pgtable_l5_enabled() ? LDT_PGD_ENTRY_L5 : LDT_PGD_ENTRY_L4)
#define LDT_BASE_ADDR (LDT_PGD_ENTRY << PGDIR_SHIFT)
#define __VMALLOC_BASE_L4 0xffffc90000000000UL
@@ -116,7 +127,7 @@ extern unsigned int ptrs_per_p4d;
#ifdef CONFIG_DYNAMIC_MEMORY_LAYOUT
# define VMALLOC_START vmalloc_base
-# define VMALLOC_SIZE_TB (pgtable_l5_enabled ? VMALLOC_SIZE_TB_L5 : VMALLOC_SIZE_TB_L4)
+# define VMALLOC_SIZE_TB (pgtable_l5_enabled() ? VMALLOC_SIZE_TB_L5 : VMALLOC_SIZE_TB_L4)
# define VMEMMAP_START vmemmap_base
#else
# define VMALLOC_START __VMALLOC_BASE_L4
diff --git a/arch/x86/include/asm/pkeys.h b/arch/x86/include/asm/pkeys.h
index a0ba1ffda0df..851c04b7a092 100644
--- a/arch/x86/include/asm/pkeys.h
+++ b/arch/x86/include/asm/pkeys.h
@@ -2,6 +2,8 @@
#ifndef _ASM_X86_PKEYS_H
#define _ASM_X86_PKEYS_H
+#define ARCH_DEFAULT_PKEY 0
+
#define arch_max_pkey() (boot_cpu_has(X86_FEATURE_OSPKE) ? 16 : 1)
extern int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
@@ -15,7 +17,7 @@ extern int __execute_only_pkey(struct mm_struct *mm);
static inline int execute_only_pkey(struct mm_struct *mm)
{
if (!boot_cpu_has(X86_FEATURE_OSPKE))
- return 0;
+ return ARCH_DEFAULT_PKEY;
return __execute_only_pkey(mm);
}
@@ -49,13 +51,21 @@ bool mm_pkey_is_allocated(struct mm_struct *mm, int pkey)
{
/*
* "Allocated" pkeys are those that have been returned
- * from pkey_alloc(). pkey 0 is special, and never
- * returned from pkey_alloc().
+ * from pkey_alloc() or pkey 0 which is allocated
+ * implicitly when the mm is created.
*/
- if (pkey <= 0)
+ if (pkey < 0)
return false;
if (pkey >= arch_max_pkey())
return false;
+ /*
+ * The exec-only pkey is set in the allocation map, but
+ * is not available to any of the user interfaces like
+ * mprotect_pkey().
+ */
+ if (pkey == mm->context.execute_only_pkey)
+ return false;
+
return mm_pkey_allocation_map(mm) & (1U << pkey);
}
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 21a114914ba4..e28add6b791f 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -186,15 +186,6 @@ extern void identify_boot_cpu(void);
extern void identify_secondary_cpu(struct cpuinfo_x86 *);
extern void print_cpu_info(struct cpuinfo_x86 *);
void print_cpu_msr(struct cpuinfo_x86 *);
-extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c);
-extern u32 get_scattered_cpuid_leaf(unsigned int level,
- unsigned int sub_leaf,
- enum cpuid_regs_idx reg);
-extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c);
-extern void init_amd_cacheinfo(struct cpuinfo_x86 *c);
-
-extern void detect_extended_topology(struct cpuinfo_x86 *c);
-extern void detect_ht(struct cpuinfo_x86 *c);
#ifdef CONFIG_X86_32
extern int have_cpuid_p(void);
diff --git a/arch/x86/include/asm/qspinlock.h b/arch/x86/include/asm/qspinlock.h
index 5e16b5d40d32..3e70bed8a978 100644
--- a/arch/x86/include/asm/qspinlock.h
+++ b/arch/x86/include/asm/qspinlock.h
@@ -7,6 +7,14 @@
#include <asm-generic/qspinlock_types.h>
#include <asm/paravirt.h>
+#define _Q_PENDING_LOOPS (1 << 9)
+
+#ifdef CONFIG_PARAVIRT_SPINLOCKS
+extern void native_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val);
+extern void __pv_init_lock_hash(void);
+extern void __pv_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val);
+extern void __raw_callee_save___pv_queued_spin_unlock(struct qspinlock *lock);
+
#define queued_spin_unlock queued_spin_unlock
/**
* queued_spin_unlock - release a queued spinlock
@@ -16,15 +24,9 @@
*/
static inline void native_queued_spin_unlock(struct qspinlock *lock)
{
- smp_store_release((u8 *)lock, 0);
+ smp_store_release(&lock->locked, 0);
}
-#ifdef CONFIG_PARAVIRT_SPINLOCKS
-extern void native_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val);
-extern void __pv_init_lock_hash(void);
-extern void __pv_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val);
-extern void __raw_callee_save___pv_queued_spin_unlock(struct qspinlock *lock);
-
static inline void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
{
pv_queued_spin_lock_slowpath(lock, val);
@@ -40,11 +42,6 @@ static inline bool vcpu_is_preempted(long cpu)
{
return pv_vcpu_is_preempted(cpu);
}
-#else
-static inline void queued_spin_unlock(struct qspinlock *lock)
-{
- native_queued_spin_unlock(lock);
-}
#endif
#ifdef CONFIG_PARAVIRT
diff --git a/arch/x86/include/asm/qspinlock_paravirt.h b/arch/x86/include/asm/qspinlock_paravirt.h
index 923307ea11c7..9ef5ee03d2d7 100644
--- a/arch/x86/include/asm/qspinlock_paravirt.h
+++ b/arch/x86/include/asm/qspinlock_paravirt.h
@@ -22,8 +22,7 @@ PV_CALLEE_SAVE_REGS_THUNK(__pv_queued_spin_unlock_slowpath);
*
* void __pv_queued_spin_unlock(struct qspinlock *lock)
* {
- * struct __qspinlock *l = (void *)lock;
- * u8 lockval = cmpxchg(&l->locked, _Q_LOCKED_VAL, 0);
+ * u8 lockval = cmpxchg(&lock->locked, _Q_LOCKED_VAL, 0);
*
* if (likely(lockval == _Q_LOCKED_VAL))
* return;
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index f75bff8f9d82..547c4fe50711 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -171,7 +171,6 @@ static inline int wbinvd_on_all_cpus(void)
wbinvd();
return 0;
}
-#define smp_num_siblings 1
#endif /* CONFIG_SMP */
extern unsigned disabled_cpus;
diff --git a/arch/x86/include/asm/sparsemem.h b/arch/x86/include/asm/sparsemem.h
index 4617a2bf123c..199218719a86 100644
--- a/arch/x86/include/asm/sparsemem.h
+++ b/arch/x86/include/asm/sparsemem.h
@@ -27,8 +27,8 @@
# endif
#else /* CONFIG_X86_32 */
# define SECTION_SIZE_BITS 27 /* matt - 128 is convenient right now */
-# define MAX_PHYSADDR_BITS (pgtable_l5_enabled ? 52 : 44)
-# define MAX_PHYSMEM_BITS (pgtable_l5_enabled ? 52 : 46)
+# define MAX_PHYSADDR_BITS (pgtable_l5_enabled() ? 52 : 44)
+# define MAX_PHYSMEM_BITS (pgtable_l5_enabled() ? 52 : 46)
#endif
#endif /* CONFIG_SPARSEMEM */
diff --git a/arch/x86/include/asm/spec-ctrl.h b/arch/x86/include/asm/spec-ctrl.h
new file mode 100644
index 000000000000..ae7c2c5cd7f0
--- /dev/null
+++ b/arch/x86/include/asm/spec-ctrl.h
@@ -0,0 +1,80 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_SPECCTRL_H_
+#define _ASM_X86_SPECCTRL_H_
+
+#include <linux/thread_info.h>
+#include <asm/nospec-branch.h>
+
+/*
+ * On VMENTER we must preserve whatever view of the SPEC_CTRL MSR
+ * the guest has, while on VMEXIT we restore the host view. This
+ * would be easier if SPEC_CTRL were architecturally maskable or
+ * shadowable for guests but this is not (currently) the case.
+ * Takes the guest view of SPEC_CTRL MSR as a parameter and also
+ * the guest's version of VIRT_SPEC_CTRL, if emulated.
+ */
+extern void x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool guest);
+
+/**
+ * x86_spec_ctrl_set_guest - Set speculation control registers for the guest
+ * @guest_spec_ctrl: The guest content of MSR_SPEC_CTRL
+ * @guest_virt_spec_ctrl: The guest controlled bits of MSR_VIRT_SPEC_CTRL
+ * (may get translated to MSR_AMD64_LS_CFG bits)
+ *
+ * Avoids writing to the MSR if the content/bits are the same
+ */
+static inline
+void x86_spec_ctrl_set_guest(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl)
+{
+ x86_virt_spec_ctrl(guest_spec_ctrl, guest_virt_spec_ctrl, true);
+}
+
+/**
+ * x86_spec_ctrl_restore_host - Restore host speculation control registers
+ * @guest_spec_ctrl: The guest content of MSR_SPEC_CTRL
+ * @guest_virt_spec_ctrl: The guest controlled bits of MSR_VIRT_SPEC_CTRL
+ * (may get translated to MSR_AMD64_LS_CFG bits)
+ *
+ * Avoids writing to the MSR if the content/bits are the same
+ */
+static inline
+void x86_spec_ctrl_restore_host(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl)
+{
+ x86_virt_spec_ctrl(guest_spec_ctrl, guest_virt_spec_ctrl, false);
+}
+
+/* AMD specific Speculative Store Bypass MSR data */
+extern u64 x86_amd_ls_cfg_base;
+extern u64 x86_amd_ls_cfg_ssbd_mask;
+
+static inline u64 ssbd_tif_to_spec_ctrl(u64 tifn)
+{
+ BUILD_BUG_ON(TIF_SSBD < SPEC_CTRL_SSBD_SHIFT);
+ return (tifn & _TIF_SSBD) >> (TIF_SSBD - SPEC_CTRL_SSBD_SHIFT);
+}
+
+static inline unsigned long ssbd_spec_ctrl_to_tif(u64 spec_ctrl)
+{
+ BUILD_BUG_ON(TIF_SSBD < SPEC_CTRL_SSBD_SHIFT);
+ return (spec_ctrl & SPEC_CTRL_SSBD) << (TIF_SSBD - SPEC_CTRL_SSBD_SHIFT);
+}
+
+static inline u64 ssbd_tif_to_amd_ls_cfg(u64 tifn)
+{
+ return (tifn & _TIF_SSBD) ? x86_amd_ls_cfg_ssbd_mask : 0ULL;
+}
+
+#ifdef CONFIG_SMP
+extern void speculative_store_bypass_ht_init(void);
+#else
+static inline void speculative_store_bypass_ht_init(void) { }
+#endif
+
+extern void speculative_store_bypass_update(unsigned long tif);
+
+static inline void speculative_store_bypass_update_current(void)
+{
+ speculative_store_bypass_update(current_thread_info()->flags);
+}
+
+#endif
diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h
index 133d9425fced..b6dc698f992a 100644
--- a/arch/x86/include/asm/stacktrace.h
+++ b/arch/x86/include/asm/stacktrace.h
@@ -111,4 +111,6 @@ static inline unsigned long caller_frame_pointer(void)
return (unsigned long)frame;
}
+void show_opcodes(u8 *rip, const char *loglvl);
+void show_ip(struct pt_regs *regs, const char *loglvl);
#endif /* _ASM_X86_STACKTRACE_H */
diff --git a/arch/x86/include/asm/string_64.h b/arch/x86/include/asm/string_64.h
index 533f74c300c2..d33f92b9fa22 100644
--- a/arch/x86/include/asm/string_64.h
+++ b/arch/x86/include/asm/string_64.h
@@ -116,7 +116,8 @@ int strcmp(const char *cs, const char *ct);
#endif
#define __HAVE_ARCH_MEMCPY_MCSAFE 1
-__must_check int memcpy_mcsafe_unrolled(void *dst, const void *src, size_t cnt);
+__must_check unsigned long __memcpy_mcsafe(void *dst, const void *src,
+ size_t cnt);
DECLARE_STATIC_KEY_FALSE(mcsafe_key);
/**
@@ -131,14 +132,15 @@ DECLARE_STATIC_KEY_FALSE(mcsafe_key);
* actually do machine check recovery. Everyone else can just
* use memcpy().
*
- * Return 0 for success, -EFAULT for fail
+ * Return 0 for success, or number of bytes not copied if there was an
+ * exception.
*/
-static __always_inline __must_check int
+static __always_inline __must_check unsigned long
memcpy_mcsafe(void *dst, const void *src, size_t cnt)
{
#ifdef CONFIG_X86_MCE
if (static_branch_unlikely(&mcsafe_key))
- return memcpy_mcsafe_unrolled(dst, src, cnt);
+ return __memcpy_mcsafe(dst, src, cnt);
else
#endif
memcpy(dst, src, cnt);
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index a5d9521bb2cb..2ff2a30a264f 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -79,6 +79,7 @@ struct thread_info {
#define TIF_SIGPENDING 2 /* signal pending */
#define TIF_NEED_RESCHED 3 /* rescheduling necessary */
#define TIF_SINGLESTEP 4 /* reenable singlestep on user return*/
+#define TIF_SSBD 5 /* Reduced data speculation */
#define TIF_SYSCALL_EMU 6 /* syscall emulation active */
#define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */
#define TIF_SECCOMP 8 /* secure computing */
@@ -105,6 +106,7 @@ struct thread_info {
#define _TIF_SIGPENDING (1 << TIF_SIGPENDING)
#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED)
#define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP)
+#define _TIF_SSBD (1 << TIF_SSBD)
#define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU)
#define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT)
#define _TIF_SECCOMP (1 << TIF_SECCOMP)
@@ -144,7 +146,7 @@ struct thread_info {
/* flags to check in __switch_to() */
#define _TIF_WORK_CTXSW \
- (_TIF_IO_BITMAP|_TIF_NOCPUID|_TIF_NOTSC|_TIF_BLOCKSTEP)
+ (_TIF_IO_BITMAP|_TIF_NOCPUID|_TIF_NOTSC|_TIF_BLOCKSTEP|_TIF_SSBD)
#define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY)
#define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW)
diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h
index 62546b3a398e..62acb613114b 100644
--- a/arch/x86/include/asm/uaccess_64.h
+++ b/arch/x86/include/asm/uaccess_64.h
@@ -47,6 +47,17 @@ copy_user_generic(void *to, const void *from, unsigned len)
}
static __always_inline __must_check unsigned long
+copy_to_user_mcsafe(void *to, const void *from, unsigned len)
+{
+ unsigned long ret;
+
+ __uaccess_begin();
+ ret = memcpy_mcsafe(to, from, len);
+ __uaccess_end();
+ return ret;
+}
+
+static __always_inline __must_check unsigned long
raw_copy_from_user(void *dst, const void __user *src, unsigned long size)
{
int ret = 0;
@@ -194,4 +205,7 @@ __copy_from_user_flushcache(void *dst, const void __user *src, unsigned size)
unsigned long
copy_user_handle_tail(char *to, char *from, unsigned len);
+unsigned long
+mcsafe_handle_tail(char *to, char *from, unsigned len);
+
#endif /* _ASM_X86_UACCESS_64_H */
diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h
index 4c851ebb3ceb..0ede697c3961 100644
--- a/arch/x86/include/uapi/asm/kvm_para.h
+++ b/arch/x86/include/uapi/asm/kvm_para.h
@@ -29,7 +29,7 @@
#define KVM_FEATURE_PV_TLB_FLUSH 9
#define KVM_FEATURE_ASYNC_PF_VMEXIT 10
-#define KVM_HINTS_DEDICATED 0
+#define KVM_HINTS_REALTIME 0
/* The last 8 bits are used to indicate how to interpret the flags field
* in pvclock structure. If no bits are set, all flags are ignored.
diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c
index c88e0b127810..b481b95bd8f6 100644
--- a/arch/x86/kernel/amd_nb.c
+++ b/arch/x86/kernel/amd_nb.c
@@ -14,8 +14,11 @@
#include <asm/amd_nb.h>
#define PCI_DEVICE_ID_AMD_17H_ROOT 0x1450
+#define PCI_DEVICE_ID_AMD_17H_M10H_ROOT 0x15d0
#define PCI_DEVICE_ID_AMD_17H_DF_F3 0x1463
#define PCI_DEVICE_ID_AMD_17H_DF_F4 0x1464
+#define PCI_DEVICE_ID_AMD_17H_M10H_DF_F3 0x15eb
+#define PCI_DEVICE_ID_AMD_17H_M10H_DF_F4 0x15ec
/* Protect the PCI config register pairs used for SMN and DF indirect access. */
static DEFINE_MUTEX(smn_mutex);
@@ -24,6 +27,7 @@ static u32 *flush_words;
static const struct pci_device_id amd_root_ids[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_ROOT) },
+ { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M10H_ROOT) },
{}
};
@@ -39,6 +43,7 @@ const struct pci_device_id amd_nb_misc_ids[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_NB_F3) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_M30H_NB_F3) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_DF_F3) },
+ { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M10H_DF_F3) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CNB17H_F3) },
{}
};
@@ -51,6 +56,7 @@ static const struct pci_device_id amd_nb_link_ids[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_NB_F4) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_M30H_NB_F4) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_DF_F4) },
+ { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M10H_DF_F4) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CNB17H_F4) },
{}
};
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c
index 8b04234e010b..7685444a106b 100644
--- a/arch/x86/kernel/apic/x2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -116,6 +116,7 @@ static void init_x2apic_ldr(void)
goto update;
}
cmsk = cluster_hotplug_mask;
+ cmsk->clusterid = cluster;
cluster_hotplug_mask = NULL;
update:
this_cpu_write(cluster_masks, cmsk);
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index dfcbe6924eaf..5d0de79fdab0 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -1715,19 +1715,6 @@ static int proc_apm_show(struct seq_file *m, void *v)
return 0;
}
-static int proc_apm_open(struct inode *inode, struct file *file)
-{
- return single_open(file, proc_apm_show, NULL);
-}
-
-static const struct file_operations apm_file_ops = {
- .owner = THIS_MODULE,
- .open = proc_apm_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
static int apm(void *unused)
{
unsigned short bx;
@@ -2360,7 +2347,7 @@ static int __init apm_init(void)
set_desc_base(&gdt[APM_DS >> 3],
(unsigned long)__va((unsigned long)apm_info.bios.dseg << 4));
- proc_create("apm", 0, NULL, &apm_file_ops);
+ proc_create_single("apm", 0, NULL, proc_apm_show);
kapmd_task = kthread_create(apm, NULL, "kapmd");
if (IS_ERR(kapmd_task)) {
@@ -2446,7 +2433,7 @@ MODULE_PARM_DESC(idle_threshold,
"System idle percentage above which to make APM BIOS idle calls");
module_param(idle_period, int, 0444);
MODULE_PARM_DESC(idle_period,
- "Period (in sec/100) over which to caculate the idle percentage");
+ "Period (in sec/100) over which to calculate the idle percentage");
module_param(smp, bool, 0444);
MODULE_PARM_DESC(smp,
"Set this to enable APM use on an SMP platform. Use with caution on older systems");
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index a66229f51b12..7a40196967cb 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -17,7 +17,7 @@ KCOV_INSTRUMENT_perf_event.o := n
nostackp := $(call cc-option, -fno-stack-protector)
CFLAGS_common.o := $(nostackp)
-obj-y := intel_cacheinfo.o scattered.o topology.o
+obj-y := cacheinfo.o scattered.o topology.o
obj-y += common.o
obj-y += rdrand.o
obj-y += match.o
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 12bc0a1139da..082d7875cef8 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -9,7 +9,9 @@
#include <linux/random.h>
#include <asm/processor.h>
#include <asm/apic.h>
+#include <asm/cacheinfo.h>
#include <asm/cpu.h>
+#include <asm/spec-ctrl.h>
#include <asm/smp.h>
#include <asm/pci-direct.h>
#include <asm/delay.h>
@@ -297,7 +299,6 @@ static int nearby_node(int apicid)
}
#endif
-#ifdef CONFIG_SMP
/*
* Fix up cpu_core_id for pre-F17h systems to be in the
* [0 .. cores_per_node - 1] range. Not really needed but
@@ -327,6 +328,7 @@ static void amd_get_topology(struct cpuinfo_x86 *c)
/* get information required for multi-node processors */
if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
+ int err;
u32 eax, ebx, ecx, edx;
cpuid(0x8000001e, &eax, &ebx, &ecx, &edx);
@@ -345,21 +347,15 @@ static void amd_get_topology(struct cpuinfo_x86 *c)
}
/*
- * We may have multiple LLCs if L3 caches exist, so check if we
- * have an L3 cache by looking at the L3 cache CPUID leaf.
+ * In case leaf B is available, use it to derive
+ * topology information.
*/
- if (cpuid_edx(0x80000006)) {
- if (c->x86 == 0x17) {
- /*
- * LLC is at the core complex level.
- * Core complex id is ApicId[3].
- */
- per_cpu(cpu_llc_id, cpu) = c->apicid >> 3;
- } else {
- /* LLC is at the node level. */
- per_cpu(cpu_llc_id, cpu) = node_id;
- }
- }
+ err = detect_extended_topology(c);
+ if (!err)
+ c->x86_coreid_bits = get_count_order(c->x86_max_cores);
+
+ cacheinfo_amd_init_llc_id(c, cpu, node_id);
+
} else if (cpu_has(c, X86_FEATURE_NODEID_MSR)) {
u64 value;
@@ -375,7 +371,6 @@ static void amd_get_topology(struct cpuinfo_x86 *c)
legacy_fixup_core_id(c);
}
}
-#endif
/*
* On a AMD dual core setup the lower bits of the APIC id distinguish the cores.
@@ -383,7 +378,6 @@ static void amd_get_topology(struct cpuinfo_x86 *c)
*/
static void amd_detect_cmp(struct cpuinfo_x86 *c)
{
-#ifdef CONFIG_SMP
unsigned bits;
int cpu = smp_processor_id();
@@ -394,17 +388,11 @@ static void amd_detect_cmp(struct cpuinfo_x86 *c)
c->phys_proc_id = c->initial_apicid >> bits;
/* use socket ID also for last level cache */
per_cpu(cpu_llc_id, cpu) = c->phys_proc_id;
- amd_get_topology(c);
-#endif
}
u16 amd_get_nb_id(int cpu)
{
- u16 id = 0;
-#ifdef CONFIG_SMP
- id = per_cpu(cpu_llc_id, cpu);
-#endif
- return id;
+ return per_cpu(cpu_llc_id, cpu);
}
EXPORT_SYMBOL_GPL(amd_get_nb_id);
@@ -554,6 +542,26 @@ static void bsp_init_amd(struct cpuinfo_x86 *c)
rdmsrl(MSR_FAM10H_NODE_ID, value);
nodes_per_socket = ((value >> 3) & 7) + 1;
}
+
+ if (c->x86 >= 0x15 && c->x86 <= 0x17) {
+ unsigned int bit;
+
+ switch (c->x86) {
+ case 0x15: bit = 54; break;
+ case 0x16: bit = 33; break;
+ case 0x17: bit = 10; break;
+ default: return;
+ }
+ /*
+ * Try to cache the base value so further operations can
+ * avoid RMW. If that faults, do not enable SSBD.
+ */
+ if (!rdmsrl_safe(MSR_AMD64_LS_CFG, &x86_amd_ls_cfg_base)) {
+ setup_force_cpu_cap(X86_FEATURE_LS_CFG_SSBD);
+ setup_force_cpu_cap(X86_FEATURE_SSBD);
+ x86_amd_ls_cfg_ssbd_mask = 1ULL << bit;
+ }
+ }
}
static void early_detect_mem_encrypt(struct cpuinfo_x86 *c)
@@ -791,6 +799,7 @@ static void init_amd_bd(struct cpuinfo_x86 *c)
static void init_amd_zn(struct cpuinfo_x86 *c)
{
+ set_cpu_cap(c, X86_FEATURE_ZEN);
/*
* Fix erratum 1076: CPB feature bit not being set in CPUID. It affects
* all up to and including B1.
@@ -842,6 +851,7 @@ static void init_amd(struct cpuinfo_x86 *c)
/* Multi core CPU? */
if (c->extended_cpuid_level >= 0x80000008) {
amd_detect_cmp(c);
+ amd_get_topology(c);
srat_detect_node(c);
}
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index bfca937bdcc3..7416fc206b4a 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -12,8 +12,10 @@
#include <linux/utsname.h>
#include <linux/cpu.h>
#include <linux/module.h>
+#include <linux/nospec.h>
+#include <linux/prctl.h>
-#include <asm/nospec-branch.h>
+#include <asm/spec-ctrl.h>
#include <asm/cmdline.h>
#include <asm/bugs.h>
#include <asm/processor.h>
@@ -27,6 +29,27 @@
#include <asm/intel-family.h>
static void __init spectre_v2_select_mitigation(void);
+static void __init ssb_select_mitigation(void);
+
+/*
+ * Our boot-time value of the SPEC_CTRL MSR. We read it once so that any
+ * writes to SPEC_CTRL contain whatever reserved bits have been set.
+ */
+u64 __ro_after_init x86_spec_ctrl_base;
+EXPORT_SYMBOL_GPL(x86_spec_ctrl_base);
+
+/*
+ * The vendor and possibly platform specific bits which can be modified in
+ * x86_spec_ctrl_base.
+ */
+static u64 __ro_after_init x86_spec_ctrl_mask = SPEC_CTRL_IBRS;
+
+/*
+ * AMD specific MSR info for Speculative Store Bypass control.
+ * x86_amd_ls_cfg_ssbd_mask is initialized in identify_boot_cpu().
+ */
+u64 __ro_after_init x86_amd_ls_cfg_base;
+u64 __ro_after_init x86_amd_ls_cfg_ssbd_mask;
void __init check_bugs(void)
{
@@ -37,9 +60,27 @@ void __init check_bugs(void)
print_cpu_info(&boot_cpu_data);
}
+ /*
+ * Read the SPEC_CTRL MSR to account for reserved bits which may
+ * have unknown values. AMD64_LS_CFG MSR is cached in the early AMD
+ * init code as it is not enumerated and depends on the family.
+ */
+ if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL))
+ rdmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
+
+ /* Allow STIBP in MSR_SPEC_CTRL if supported */
+ if (boot_cpu_has(X86_FEATURE_STIBP))
+ x86_spec_ctrl_mask |= SPEC_CTRL_STIBP;
+
/* Select the proper spectre mitigation before patching alternatives */
spectre_v2_select_mitigation();
+ /*
+ * Select proper mitigation for any exposure to the Speculative Store
+ * Bypass vulnerability.
+ */
+ ssb_select_mitigation();
+
#ifdef CONFIG_X86_32
/*
* Check whether we are able to run this kernel safely on SMP.
@@ -93,7 +134,76 @@ static const char *spectre_v2_strings[] = {
#undef pr_fmt
#define pr_fmt(fmt) "Spectre V2 : " fmt
-static enum spectre_v2_mitigation spectre_v2_enabled = SPECTRE_V2_NONE;
+static enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init =
+ SPECTRE_V2_NONE;
+
+void
+x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest)
+{
+ u64 msrval, guestval, hostval = x86_spec_ctrl_base;
+ struct thread_info *ti = current_thread_info();
+
+ /* Is MSR_SPEC_CTRL implemented ? */
+ if (static_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) {
+ /*
+ * Restrict guest_spec_ctrl to supported values. Clear the
+ * modifiable bits in the host base value and or the
+ * modifiable bits from the guest value.
+ */
+ guestval = hostval & ~x86_spec_ctrl_mask;
+ guestval |= guest_spec_ctrl & x86_spec_ctrl_mask;
+
+ /* SSBD controlled in MSR_SPEC_CTRL */
+ if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD))
+ hostval |= ssbd_tif_to_spec_ctrl(ti->flags);
+
+ if (hostval != guestval) {
+ msrval = setguest ? guestval : hostval;
+ wrmsrl(MSR_IA32_SPEC_CTRL, msrval);
+ }
+ }
+
+ /*
+ * If SSBD is not handled in MSR_SPEC_CTRL on AMD, update
+ * MSR_AMD64_L2_CFG or MSR_VIRT_SPEC_CTRL if supported.
+ */
+ if (!static_cpu_has(X86_FEATURE_LS_CFG_SSBD) &&
+ !static_cpu_has(X86_FEATURE_VIRT_SSBD))
+ return;
+
+ /*
+ * If the host has SSBD mitigation enabled, force it in the host's
+ * virtual MSR value. If its not permanently enabled, evaluate
+ * current's TIF_SSBD thread flag.
+ */
+ if (static_cpu_has(X86_FEATURE_SPEC_STORE_BYPASS_DISABLE))
+ hostval = SPEC_CTRL_SSBD;
+ else
+ hostval = ssbd_tif_to_spec_ctrl(ti->flags);
+
+ /* Sanitize the guest value */
+ guestval = guest_virt_spec_ctrl & SPEC_CTRL_SSBD;
+
+ if (hostval != guestval) {
+ unsigned long tif;
+
+ tif = setguest ? ssbd_spec_ctrl_to_tif(guestval) :
+ ssbd_spec_ctrl_to_tif(hostval);
+
+ speculative_store_bypass_update(tif);
+ }
+}
+EXPORT_SYMBOL_GPL(x86_virt_spec_ctrl);
+
+static void x86_amd_ssb_disable(void)
+{
+ u64 msrval = x86_amd_ls_cfg_base | x86_amd_ls_cfg_ssbd_mask;
+
+ if (boot_cpu_has(X86_FEATURE_VIRT_SSBD))
+ wrmsrl(MSR_AMD64_VIRT_SPEC_CTRL, SPEC_CTRL_SSBD);
+ else if (boot_cpu_has(X86_FEATURE_LS_CFG_SSBD))
+ wrmsrl(MSR_AMD64_LS_CFG, msrval);
+}
#ifdef RETPOLINE
static bool spectre_v2_bad_module;
@@ -312,32 +422,289 @@ retpoline_auto:
}
#undef pr_fmt
+#define pr_fmt(fmt) "Speculative Store Bypass: " fmt
+
+static enum ssb_mitigation ssb_mode __ro_after_init = SPEC_STORE_BYPASS_NONE;
+
+/* The kernel command line selection */
+enum ssb_mitigation_cmd {
+ SPEC_STORE_BYPASS_CMD_NONE,
+ SPEC_STORE_BYPASS_CMD_AUTO,
+ SPEC_STORE_BYPASS_CMD_ON,
+ SPEC_STORE_BYPASS_CMD_PRCTL,
+ SPEC_STORE_BYPASS_CMD_SECCOMP,
+};
+
+static const char *ssb_strings[] = {
+ [SPEC_STORE_BYPASS_NONE] = "Vulnerable",
+ [SPEC_STORE_BYPASS_DISABLE] = "Mitigation: Speculative Store Bypass disabled",
+ [SPEC_STORE_BYPASS_PRCTL] = "Mitigation: Speculative Store Bypass disabled via prctl",
+ [SPEC_STORE_BYPASS_SECCOMP] = "Mitigation: Speculative Store Bypass disabled via prctl and seccomp",
+};
+
+static const struct {
+ const char *option;
+ enum ssb_mitigation_cmd cmd;
+} ssb_mitigation_options[] = {
+ { "auto", SPEC_STORE_BYPASS_CMD_AUTO }, /* Platform decides */
+ { "on", SPEC_STORE_BYPASS_CMD_ON }, /* Disable Speculative Store Bypass */
+ { "off", SPEC_STORE_BYPASS_CMD_NONE }, /* Don't touch Speculative Store Bypass */
+ { "prctl", SPEC_STORE_BYPASS_CMD_PRCTL }, /* Disable Speculative Store Bypass via prctl */
+ { "seccomp", SPEC_STORE_BYPASS_CMD_SECCOMP }, /* Disable Speculative Store Bypass via prctl and seccomp */
+};
+
+static enum ssb_mitigation_cmd __init ssb_parse_cmdline(void)
+{
+ enum ssb_mitigation_cmd cmd = SPEC_STORE_BYPASS_CMD_AUTO;
+ char arg[20];
+ int ret, i;
+
+ if (cmdline_find_option_bool(boot_command_line, "nospec_store_bypass_disable")) {
+ return SPEC_STORE_BYPASS_CMD_NONE;
+ } else {
+ ret = cmdline_find_option(boot_command_line, "spec_store_bypass_disable",
+ arg, sizeof(arg));
+ if (ret < 0)
+ return SPEC_STORE_BYPASS_CMD_AUTO;
+
+ for (i = 0; i < ARRAY_SIZE(ssb_mitigation_options); i++) {
+ if (!match_option(arg, ret, ssb_mitigation_options[i].option))
+ continue;
+
+ cmd = ssb_mitigation_options[i].cmd;
+ break;
+ }
+
+ if (i >= ARRAY_SIZE(ssb_mitigation_options)) {
+ pr_err("unknown option (%s). Switching to AUTO select\n", arg);
+ return SPEC_STORE_BYPASS_CMD_AUTO;
+ }
+ }
+
+ return cmd;
+}
+
+static enum ssb_mitigation __init __ssb_select_mitigation(void)
+{
+ enum ssb_mitigation mode = SPEC_STORE_BYPASS_NONE;
+ enum ssb_mitigation_cmd cmd;
+
+ if (!boot_cpu_has(X86_FEATURE_SSBD))
+ return mode;
+
+ cmd = ssb_parse_cmdline();
+ if (!boot_cpu_has_bug(X86_BUG_SPEC_STORE_BYPASS) &&
+ (cmd == SPEC_STORE_BYPASS_CMD_NONE ||
+ cmd == SPEC_STORE_BYPASS_CMD_AUTO))
+ return mode;
+
+ switch (cmd) {
+ case SPEC_STORE_BYPASS_CMD_AUTO:
+ case SPEC_STORE_BYPASS_CMD_SECCOMP:
+ /*
+ * Choose prctl+seccomp as the default mode if seccomp is
+ * enabled.
+ */
+ if (IS_ENABLED(CONFIG_SECCOMP))
+ mode = SPEC_STORE_BYPASS_SECCOMP;
+ else
+ mode = SPEC_STORE_BYPASS_PRCTL;
+ break;
+ case SPEC_STORE_BYPASS_CMD_ON:
+ mode = SPEC_STORE_BYPASS_DISABLE;
+ break;
+ case SPEC_STORE_BYPASS_CMD_PRCTL:
+ mode = SPEC_STORE_BYPASS_PRCTL;
+ break;
+ case SPEC_STORE_BYPASS_CMD_NONE:
+ break;
+ }
+
+ /*
+ * We have three CPU feature flags that are in play here:
+ * - X86_BUG_SPEC_STORE_BYPASS - CPU is susceptible.
+ * - X86_FEATURE_SSBD - CPU is able to turn off speculative store bypass
+ * - X86_FEATURE_SPEC_STORE_BYPASS_DISABLE - engage the mitigation
+ */
+ if (mode == SPEC_STORE_BYPASS_DISABLE) {
+ setup_force_cpu_cap(X86_FEATURE_SPEC_STORE_BYPASS_DISABLE);
+ /*
+ * Intel uses the SPEC CTRL MSR Bit(2) for this, while AMD uses
+ * a completely different MSR and bit dependent on family.
+ */
+ switch (boot_cpu_data.x86_vendor) {
+ case X86_VENDOR_INTEL:
+ x86_spec_ctrl_base |= SPEC_CTRL_SSBD;
+ x86_spec_ctrl_mask |= SPEC_CTRL_SSBD;
+ wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
+ break;
+ case X86_VENDOR_AMD:
+ x86_amd_ssb_disable();
+ break;
+ }
+ }
+
+ return mode;
+}
+
+static void ssb_select_mitigation(void)
+{
+ ssb_mode = __ssb_select_mitigation();
+
+ if (boot_cpu_has_bug(X86_BUG_SPEC_STORE_BYPASS))
+ pr_info("%s\n", ssb_strings[ssb_mode]);
+}
+
+#undef pr_fmt
+#define pr_fmt(fmt) "Speculation prctl: " fmt
+
+static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl)
+{
+ bool update;
+
+ if (ssb_mode != SPEC_STORE_BYPASS_PRCTL &&
+ ssb_mode != SPEC_STORE_BYPASS_SECCOMP)
+ return -ENXIO;
+
+ switch (ctrl) {
+ case PR_SPEC_ENABLE:
+ /* If speculation is force disabled, enable is not allowed */
+ if (task_spec_ssb_force_disable(task))
+ return -EPERM;
+ task_clear_spec_ssb_disable(task);
+ update = test_and_clear_tsk_thread_flag(task, TIF_SSBD);
+ break;
+ case PR_SPEC_DISABLE:
+ task_set_spec_ssb_disable(task);
+ update = !test_and_set_tsk_thread_flag(task, TIF_SSBD);
+ break;
+ case PR_SPEC_FORCE_DISABLE:
+ task_set_spec_ssb_disable(task);
+ task_set_spec_ssb_force_disable(task);
+ update = !test_and_set_tsk_thread_flag(task, TIF_SSBD);
+ break;
+ default:
+ return -ERANGE;
+ }
+
+ /*
+ * If being set on non-current task, delay setting the CPU
+ * mitigation until it is next scheduled.
+ */
+ if (task == current && update)
+ speculative_store_bypass_update_current();
+
+ return 0;
+}
+
+int arch_prctl_spec_ctrl_set(struct task_struct *task, unsigned long which,
+ unsigned long ctrl)
+{
+ switch (which) {
+ case PR_SPEC_STORE_BYPASS:
+ return ssb_prctl_set(task, ctrl);
+ default:
+ return -ENODEV;
+ }
+}
+
+#ifdef CONFIG_SECCOMP
+void arch_seccomp_spec_mitigate(struct task_struct *task)
+{
+ if (ssb_mode == SPEC_STORE_BYPASS_SECCOMP)
+ ssb_prctl_set(task, PR_SPEC_FORCE_DISABLE);
+}
+#endif
+
+static int ssb_prctl_get(struct task_struct *task)
+{
+ switch (ssb_mode) {
+ case SPEC_STORE_BYPASS_DISABLE:
+ return PR_SPEC_DISABLE;
+ case SPEC_STORE_BYPASS_SECCOMP:
+ case SPEC_STORE_BYPASS_PRCTL:
+ if (task_spec_ssb_force_disable(task))
+ return PR_SPEC_PRCTL | PR_SPEC_FORCE_DISABLE;
+ if (task_spec_ssb_disable(task))
+ return PR_SPEC_PRCTL | PR_SPEC_DISABLE;
+ return PR_SPEC_PRCTL | PR_SPEC_ENABLE;
+ default:
+ if (boot_cpu_has_bug(X86_BUG_SPEC_STORE_BYPASS))
+ return PR_SPEC_ENABLE;
+ return PR_SPEC_NOT_AFFECTED;
+ }
+}
+
+int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which)
+{
+ switch (which) {
+ case PR_SPEC_STORE_BYPASS:
+ return ssb_prctl_get(task);
+ default:
+ return -ENODEV;
+ }
+}
+
+void x86_spec_ctrl_setup_ap(void)
+{
+ if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL))
+ wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
+
+ if (ssb_mode == SPEC_STORE_BYPASS_DISABLE)
+ x86_amd_ssb_disable();
+}
#ifdef CONFIG_SYSFS
-ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf)
+
+static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr,
+ char *buf, unsigned int bug)
{
- if (!boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN))
+ if (!boot_cpu_has_bug(bug))
return sprintf(buf, "Not affected\n");
- if (boot_cpu_has(X86_FEATURE_PTI))
- return sprintf(buf, "Mitigation: PTI\n");
+
+ switch (bug) {
+ case X86_BUG_CPU_MELTDOWN:
+ if (boot_cpu_has(X86_FEATURE_PTI))
+ return sprintf(buf, "Mitigation: PTI\n");
+
+ break;
+
+ case X86_BUG_SPECTRE_V1:
+ return sprintf(buf, "Mitigation: __user pointer sanitization\n");
+
+ case X86_BUG_SPECTRE_V2:
+ return sprintf(buf, "%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled],
+ boot_cpu_has(X86_FEATURE_USE_IBPB) ? ", IBPB" : "",
+ boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "",
+ spectre_v2_module_string());
+
+ case X86_BUG_SPEC_STORE_BYPASS:
+ return sprintf(buf, "%s\n", ssb_strings[ssb_mode]);
+
+ default:
+ break;
+ }
+
return sprintf(buf, "Vulnerable\n");
}
+ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ return cpu_show_common(dev, attr, buf, X86_BUG_CPU_MELTDOWN);
+}
+
ssize_t cpu_show_spectre_v1(struct device *dev, struct device_attribute *attr, char *buf)
{
- if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V1))
- return sprintf(buf, "Not affected\n");
- return sprintf(buf, "Mitigation: __user pointer sanitization\n");
+ return cpu_show_common(dev, attr, buf, X86_BUG_SPECTRE_V1);
}
ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, char *buf)
{
- if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
- return sprintf(buf, "Not affected\n");
+ return cpu_show_common(dev, attr, buf, X86_BUG_SPECTRE_V2);
+}
- return sprintf(buf, "%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled],
- boot_cpu_has(X86_FEATURE_USE_IBPB) ? ", IBPB" : "",
- boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "",
- spectre_v2_module_string());
+ssize_t cpu_show_spec_store_bypass(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ return cpu_show_common(dev, attr, buf, X86_BUG_SPEC_STORE_BYPASS);
}
#endif
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/cacheinfo.c
index 54d04d574148..38354c66df81 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/cacheinfo.c
@@ -20,6 +20,8 @@
#include <asm/amd_nb.h>
#include <asm/smp.h>
+#include "cpu.h"
+
#define LVL_1_INST 1
#define LVL_1_DATA 2
#define LVL_2 3
@@ -637,6 +639,45 @@ static int find_num_cache_leaves(struct cpuinfo_x86 *c)
return i;
}
+void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, int cpu, u8 node_id)
+{
+ /*
+ * We may have multiple LLCs if L3 caches exist, so check if we
+ * have an L3 cache by looking at the L3 cache CPUID leaf.
+ */
+ if (!cpuid_edx(0x80000006))
+ return;
+
+ if (c->x86 < 0x17) {
+ /* LLC is at the node level. */
+ per_cpu(cpu_llc_id, cpu) = node_id;
+ } else if (c->x86 == 0x17 &&
+ c->x86_model >= 0 && c->x86_model <= 0x1F) {
+ /*
+ * LLC is at the core complex level.
+ * Core complex ID is ApicId[3] for these processors.
+ */
+ per_cpu(cpu_llc_id, cpu) = c->apicid >> 3;
+ } else {
+ /*
+ * LLC ID is calculated from the number of threads sharing the
+ * cache.
+ * */
+ u32 eax, ebx, ecx, edx, num_sharing_cache = 0;
+ u32 llc_index = find_num_cache_leaves(c) - 1;
+
+ cpuid_count(0x8000001d, llc_index, &eax, &ebx, &ecx, &edx);
+ if (eax)
+ num_sharing_cache = ((eax >> 14) & 0xfff) + 1;
+
+ if (num_sharing_cache) {
+ int bits = get_count_order(num_sharing_cache) - 1;
+
+ per_cpu(cpu_llc_id, cpu) = c->apicid >> bits;
+ }
+ }
+}
+
void init_amd_cacheinfo(struct cpuinfo_x86 *c)
{
@@ -650,7 +691,7 @@ void init_amd_cacheinfo(struct cpuinfo_x86 *c)
}
}
-unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c)
+void init_intel_cacheinfo(struct cpuinfo_x86 *c)
{
/* Cache sizes */
unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0;
@@ -802,7 +843,8 @@ unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c)
c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d));
- return l2;
+ if (!l2)
+ cpu_detect_cache_sizes(c);
}
static int __cache_amd_cpumap_setup(unsigned int cpu, int index,
diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c
index e5ec0f11c0de..14433ff5b828 100644
--- a/arch/x86/kernel/cpu/centaur.c
+++ b/arch/x86/kernel/cpu/centaur.c
@@ -18,6 +18,13 @@
#define RNG_ENABLED (1 << 3)
#define RNG_ENABLE (1 << 6) /* MSR_VIA_RNG */
+#define X86_VMX_FEATURE_PROC_CTLS_TPR_SHADOW 0x00200000
+#define X86_VMX_FEATURE_PROC_CTLS_VNMI 0x00400000
+#define X86_VMX_FEATURE_PROC_CTLS_2ND_CTLS 0x80000000
+#define X86_VMX_FEATURE_PROC_CTLS2_VIRT_APIC 0x00000001
+#define X86_VMX_FEATURE_PROC_CTLS2_EPT 0x00000002
+#define X86_VMX_FEATURE_PROC_CTLS2_VPID 0x00000020
+
static void init_c3(struct cpuinfo_x86 *c)
{
u32 lo, hi;
@@ -112,6 +119,31 @@ static void early_init_centaur(struct cpuinfo_x86 *c)
}
}
+static void centaur_detect_vmx_virtcap(struct cpuinfo_x86 *c)
+{
+ u32 vmx_msr_low, vmx_msr_high, msr_ctl, msr_ctl2;
+
+ rdmsr(MSR_IA32_VMX_PROCBASED_CTLS, vmx_msr_low, vmx_msr_high);
+ msr_ctl = vmx_msr_high | vmx_msr_low;
+
+ if (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_TPR_SHADOW)
+ set_cpu_cap(c, X86_FEATURE_TPR_SHADOW);
+ if (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_VNMI)
+ set_cpu_cap(c, X86_FEATURE_VNMI);
+ if (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_2ND_CTLS) {
+ rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2,
+ vmx_msr_low, vmx_msr_high);
+ msr_ctl2 = vmx_msr_high | vmx_msr_low;
+ if ((msr_ctl2 & X86_VMX_FEATURE_PROC_CTLS2_VIRT_APIC) &&
+ (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_TPR_SHADOW))
+ set_cpu_cap(c, X86_FEATURE_FLEXPRIORITY);
+ if (msr_ctl2 & X86_VMX_FEATURE_PROC_CTLS2_EPT)
+ set_cpu_cap(c, X86_FEATURE_EPT);
+ if (msr_ctl2 & X86_VMX_FEATURE_PROC_CTLS2_VPID)
+ set_cpu_cap(c, X86_FEATURE_VPID);
+ }
+}
+
static void init_centaur(struct cpuinfo_x86 *c)
{
#ifdef CONFIG_X86_32
@@ -128,6 +160,24 @@ static void init_centaur(struct cpuinfo_x86 *c)
clear_cpu_cap(c, 0*32+31);
#endif
early_init_centaur(c);
+ init_intel_cacheinfo(c);
+ detect_num_cpu_cores(c);
+#ifdef CONFIG_X86_32
+ detect_ht(c);
+#endif
+
+ if (c->cpuid_level > 9) {
+ unsigned int eax = cpuid_eax(10);
+
+ /*
+ * Check for version and the number of counters
+ * Version(eax[7:0]) can't be 0;
+ * Counters(eax[15:8]) should be greater than 1;
+ */
+ if ((eax & 0xff) && (((eax >> 8) & 0xff) > 1))
+ set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON);
+ }
+
switch (c->x86) {
#ifdef CONFIG_X86_32
case 5:
@@ -199,6 +249,9 @@ static void init_centaur(struct cpuinfo_x86 *c)
#ifdef CONFIG_X86_64
set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
#endif
+
+ if (cpu_has(c, X86_FEATURE_VMX))
+ centaur_detect_vmx_virtcap(c);
}
#ifdef CONFIG_X86_32
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 8a5b185735e1..95c8e507580d 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -66,6 +66,13 @@ cpumask_var_t cpu_callin_mask;
/* representing cpus for which sibling maps can be computed */
cpumask_var_t cpu_sibling_setup_mask;
+/* Number of siblings per CPU package */
+int smp_num_siblings = 1;
+EXPORT_SYMBOL(smp_num_siblings);
+
+/* Last level cache ID of each logical CPU */
+DEFINE_PER_CPU_READ_MOSTLY(u16, cpu_llc_id) = BAD_APICID;
+
/* correctly size the local cpu masks */
void __init setup_cpu_local_masks(void)
{
@@ -577,6 +584,19 @@ static void get_model_name(struct cpuinfo_x86 *c)
*(s + 1) = '\0';
}
+void detect_num_cpu_cores(struct cpuinfo_x86 *c)
+{
+ unsigned int eax, ebx, ecx, edx;
+
+ c->x86_max_cores = 1;
+ if (!IS_ENABLED(CONFIG_SMP) || c->cpuid_level < 4)
+ return;
+
+ cpuid_count(4, 0, &eax, &ebx, &ecx, &edx);
+ if (eax & 0x1f)
+ c->x86_max_cores = (eax >> 26) + 1;
+}
+
void cpu_detect_cache_sizes(struct cpuinfo_x86 *c)
{
unsigned int n, dummy, ebx, ecx, edx, l2size;
@@ -757,17 +777,32 @@ static void init_speculation_control(struct cpuinfo_x86 *c)
* and they also have a different bit for STIBP support. Also,
* a hypervisor might have set the individual AMD bits even on
* Intel CPUs, for finer-grained selection of what's available.
- *
- * We use the AMD bits in 0x8000_0008 EBX as the generic hardware
- * features, which are visible in /proc/cpuinfo and used by the
- * kernel. So set those accordingly from the Intel bits.
*/
if (cpu_has(c, X86_FEATURE_SPEC_CTRL)) {
set_cpu_cap(c, X86_FEATURE_IBRS);
set_cpu_cap(c, X86_FEATURE_IBPB);
+ set_cpu_cap(c, X86_FEATURE_MSR_SPEC_CTRL);
}
+
if (cpu_has(c, X86_FEATURE_INTEL_STIBP))
set_cpu_cap(c, X86_FEATURE_STIBP);
+
+ if (cpu_has(c, X86_FEATURE_SPEC_CTRL_SSBD) ||
+ cpu_has(c, X86_FEATURE_VIRT_SSBD))
+ set_cpu_cap(c, X86_FEATURE_SSBD);
+
+ if (cpu_has(c, X86_FEATURE_AMD_IBRS)) {
+ set_cpu_cap(c, X86_FEATURE_IBRS);
+ set_cpu_cap(c, X86_FEATURE_MSR_SPEC_CTRL);
+ }
+
+ if (cpu_has(c, X86_FEATURE_AMD_IBPB))
+ set_cpu_cap(c, X86_FEATURE_IBPB);
+
+ if (cpu_has(c, X86_FEATURE_AMD_STIBP)) {
+ set_cpu_cap(c, X86_FEATURE_STIBP);
+ set_cpu_cap(c, X86_FEATURE_MSR_SPEC_CTRL);
+ }
}
void get_cpu_cap(struct cpuinfo_x86 *c)
@@ -848,6 +883,11 @@ void get_cpu_cap(struct cpuinfo_x86 *c)
c->x86_power = edx;
}
+ if (c->extended_cpuid_level >= 0x80000008) {
+ cpuid(0x80000008, &eax, &ebx, &ecx, &edx);
+ c->x86_capability[CPUID_8000_0008_EBX] = ebx;
+ }
+
if (c->extended_cpuid_level >= 0x8000000a)
c->x86_capability[CPUID_8000_000A_EDX] = cpuid_edx(0x8000000a);
@@ -871,7 +911,6 @@ static void get_cpu_address_sizes(struct cpuinfo_x86 *c)
c->x86_virt_bits = (eax >> 8) & 0xff;
c->x86_phys_bits = eax & 0xff;
- c->x86_capability[CPUID_8000_0008_EBX] = ebx;
}
#ifdef CONFIG_X86_32
else if (cpu_has(c, X86_FEATURE_PAE) || cpu_has(c, X86_FEATURE_PSE36))
@@ -923,21 +962,47 @@ static const __initconst struct x86_cpu_id cpu_no_meltdown[] = {
{}
};
-static bool __init cpu_vulnerable_to_meltdown(struct cpuinfo_x86 *c)
+/* Only list CPUs which speculate but are non susceptible to SSB */
+static const __initconst struct x86_cpu_id cpu_no_spec_store_bypass[] = {
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT1 },
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_AIRMONT },
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT2 },
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_MERRIFIELD },
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_CORE_YONAH },
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_XEON_PHI_KNL },
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_XEON_PHI_KNM },
+ { X86_VENDOR_AMD, 0x12, },
+ { X86_VENDOR_AMD, 0x11, },
+ { X86_VENDOR_AMD, 0x10, },
+ { X86_VENDOR_AMD, 0xf, },
+ {}
+};
+
+static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
{
u64 ia32_cap = 0;
- if (x86_match_cpu(cpu_no_meltdown))
- return false;
+ if (x86_match_cpu(cpu_no_speculation))
+ return;
+
+ setup_force_cpu_bug(X86_BUG_SPECTRE_V1);
+ setup_force_cpu_bug(X86_BUG_SPECTRE_V2);
if (cpu_has(c, X86_FEATURE_ARCH_CAPABILITIES))
rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap);
+ if (!x86_match_cpu(cpu_no_spec_store_bypass) &&
+ !(ia32_cap & ARCH_CAP_SSB_NO))
+ setup_force_cpu_bug(X86_BUG_SPEC_STORE_BYPASS);
+
+ if (x86_match_cpu(cpu_no_meltdown))
+ return;
+
/* Rogue Data Cache Load? No! */
if (ia32_cap & ARCH_CAP_RDCL_NO)
- return false;
+ return;
- return true;
+ setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN);
}
/*
@@ -988,12 +1053,7 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
setup_force_cpu_cap(X86_FEATURE_ALWAYS);
- if (!x86_match_cpu(cpu_no_speculation)) {
- if (cpu_vulnerable_to_meltdown(c))
- setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN);
- setup_force_cpu_bug(X86_BUG_SPECTRE_V1);
- setup_force_cpu_bug(X86_BUG_SPECTRE_V2);
- }
+ cpu_set_bug_bits(c);
fpu__init_system(c);
@@ -1004,6 +1064,21 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
*/
setup_clear_cpu_cap(X86_FEATURE_PCID);
#endif
+
+ /*
+ * Later in the boot process pgtable_l5_enabled() relies on
+ * cpu_feature_enabled(X86_FEATURE_LA57). If 5-level paging is not
+ * enabled by this point we need to clear the feature bit to avoid
+ * false-positives at the later stage.
+ *
+ * pgtable_l5_enabled() can be false here for several reasons:
+ * - 5-level paging is disabled compile-time;
+ * - it's 32-bit kernel;
+ * - machine doesn't support 5-level paging;
+ * - user specified 'no5lvl' in kernel command line.
+ */
+ if (!pgtable_l5_enabled())
+ setup_clear_cpu_cap(X86_FEATURE_LA57);
}
void __init early_cpu_init(void)
@@ -1355,6 +1430,7 @@ void identify_secondary_cpu(struct cpuinfo_x86 *c)
#endif
mtrr_ap_init();
validate_apic_and_package_id(c);
+ x86_spec_ctrl_setup_ap();
}
static __init int setup_noclflush(char *arg)
diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h
index e806b11a99af..38216f678fc3 100644
--- a/arch/x86/kernel/cpu/cpu.h
+++ b/arch/x86/kernel/cpu/cpu.h
@@ -47,7 +47,19 @@ extern const struct cpu_dev *const __x86_cpu_dev_start[],
extern void get_cpu_cap(struct cpuinfo_x86 *c);
extern void cpu_detect_cache_sizes(struct cpuinfo_x86 *c);
+extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c);
+extern u32 get_scattered_cpuid_leaf(unsigned int level,
+ unsigned int sub_leaf,
+ enum cpuid_regs_idx reg);
+extern void init_intel_cacheinfo(struct cpuinfo_x86 *c);
+extern void init_amd_cacheinfo(struct cpuinfo_x86 *c);
+
+extern void detect_num_cpu_cores(struct cpuinfo_x86 *c);
+extern int detect_extended_topology(struct cpuinfo_x86 *c);
+extern void detect_ht(struct cpuinfo_x86 *c);
unsigned int aperfmperf_get_khz(int cpu);
+extern void x86_spec_ctrl_setup_ap(void);
+
#endif /* ARCH_X86_CPU_H */
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 60d1897041da..eb75564f2d25 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -188,7 +188,10 @@ static void early_init_intel(struct cpuinfo_x86 *c)
setup_clear_cpu_cap(X86_FEATURE_IBPB);
setup_clear_cpu_cap(X86_FEATURE_STIBP);
setup_clear_cpu_cap(X86_FEATURE_SPEC_CTRL);
+ setup_clear_cpu_cap(X86_FEATURE_MSR_SPEC_CTRL);
setup_clear_cpu_cap(X86_FEATURE_INTEL_STIBP);
+ setup_clear_cpu_cap(X86_FEATURE_SSBD);
+ setup_clear_cpu_cap(X86_FEATURE_SPEC_CTRL_SSBD);
}
/*
@@ -453,24 +456,6 @@ static void srat_detect_node(struct cpuinfo_x86 *c)
#endif
}
-/*
- * find out the number of processor cores on the die
- */
-static int intel_num_cpu_cores(struct cpuinfo_x86 *c)
-{
- unsigned int eax, ebx, ecx, edx;
-
- if (!IS_ENABLED(CONFIG_SMP) || c->cpuid_level < 4)
- return 1;
-
- /* Intel has a non-standard dependency on %ecx for this CPUID level. */
- cpuid_count(4, 0, &eax, &ebx, &ecx, &edx);
- if (eax & 0x1f)
- return (eax >> 26) + 1;
- else
- return 1;
-}
-
static void detect_vmx_virtcap(struct cpuinfo_x86 *c)
{
/* Intel VMX MSR indicated features */
@@ -653,8 +638,6 @@ static void init_intel_misc_features(struct cpuinfo_x86 *c)
static void init_intel(struct cpuinfo_x86 *c)
{
- unsigned int l2 = 0;
-
early_init_intel(c);
intel_workarounds(c);
@@ -671,19 +654,13 @@ static void init_intel(struct cpuinfo_x86 *c)
* let's use the legacy cpuid vector 0x1 and 0x4 for topology
* detection.
*/
- c->x86_max_cores = intel_num_cpu_cores(c);
+ detect_num_cpu_cores(c);
#ifdef CONFIG_X86_32
detect_ht(c);
#endif
}
- l2 = init_intel_cacheinfo(c);
-
- /* Detect legacy cache sizes if init_intel_cacheinfo did not */
- if (l2 == 0) {
- cpu_detect_cache_sizes(c);
- l2 = c->x86_cache_size;
- }
+ init_intel_cacheinfo(c);
if (c->cpuid_level > 9) {
unsigned eax = cpuid_eax(10);
@@ -696,7 +673,8 @@ static void init_intel(struct cpuinfo_x86 *c)
set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
if (boot_cpu_has(X86_FEATURE_DS)) {
- unsigned int l1;
+ unsigned int l1, l2;
+
rdmsr(MSR_IA32_MISC_ENABLE, l1, l2);
if (!(l1 & (1<<11)))
set_cpu_cap(c, X86_FEATURE_BTS);
@@ -724,6 +702,7 @@ static void init_intel(struct cpuinfo_x86 *c)
* Dixon is NOT a Celeron.
*/
if (c->x86 == 6) {
+ unsigned int l2 = c->x86_cache_size;
char *p = NULL;
switch (c->x86_model) {
diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c
index 475cb4f5f14f..c805a06e14c3 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-inject.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c
@@ -48,7 +48,7 @@ static struct dentry *dfs_inj;
static u8 n_banks;
-#define MAX_FLAG_OPT_SIZE 3
+#define MAX_FLAG_OPT_SIZE 4
#define NBCFG 0x44
enum injection_type {
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 42cf2880d0ed..cd76380af79f 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -1727,6 +1727,21 @@ static void __mcheck_cpu_init_early(struct cpuinfo_x86 *c)
}
}
+static void mce_centaur_feature_init(struct cpuinfo_x86 *c)
+{
+ struct mca_config *cfg = &mca_cfg;
+
+ /*
+ * All newer Centaur CPUs support MCE broadcasting. Enable
+ * synchronization with a one second timeout.
+ */
+ if ((c->x86 == 6 && c->x86_model == 0xf && c->x86_stepping >= 0xe) ||
+ c->x86 > 6) {
+ if (cfg->monarch_timeout < 0)
+ cfg->monarch_timeout = USEC_PER_SEC;
+ }
+}
+
static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c)
{
switch (c->x86_vendor) {
@@ -1739,6 +1754,9 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c)
mce_amd_feature_init(c);
break;
}
+ case X86_VENDOR_CENTAUR:
+ mce_centaur_feature_init(c);
+ break;
default:
break;
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index f7666eef4a87..f591b01930db 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -94,6 +94,11 @@ static struct smca_bank_name smca_names[] = {
[SMCA_SMU] = { "smu", "System Management Unit" },
};
+static u32 smca_bank_addrs[MAX_NR_BANKS][NR_BLOCKS] __ro_after_init =
+{
+ [0 ... MAX_NR_BANKS - 1] = { [0 ... NR_BLOCKS - 1] = -1 }
+};
+
const char *smca_get_name(enum smca_bank_types t)
{
if (t >= N_SMCA_BANK_TYPES)
@@ -431,8 +436,7 @@ static void deferred_error_interrupt_enable(struct cpuinfo_x86 *c)
wrmsr(MSR_CU_DEF_ERR, low, high);
}
-static u32 smca_get_block_address(unsigned int cpu, unsigned int bank,
- unsigned int block)
+static u32 smca_get_block_address(unsigned int bank, unsigned int block)
{
u32 low, high;
u32 addr = 0;
@@ -443,24 +447,30 @@ static u32 smca_get_block_address(unsigned int cpu, unsigned int bank,
if (!block)
return MSR_AMD64_SMCA_MCx_MISC(bank);
+ /* Check our cache first: */
+ if (smca_bank_addrs[bank][block] != -1)
+ return smca_bank_addrs[bank][block];
+
/*
* For SMCA enabled processors, BLKPTR field of the first MISC register
* (MCx_MISC0) indicates presence of additional MISC regs set (MISC1-4).
*/
- if (rdmsr_safe_on_cpu(cpu, MSR_AMD64_SMCA_MCx_CONFIG(bank), &low, &high))
- return addr;
+ if (rdmsr_safe(MSR_AMD64_SMCA_MCx_CONFIG(bank), &low, &high))
+ goto out;
if (!(low & MCI_CONFIG_MCAX))
- return addr;
+ goto out;
- if (!rdmsr_safe_on_cpu(cpu, MSR_AMD64_SMCA_MCx_MISC(bank), &low, &high) &&
+ if (!rdmsr_safe(MSR_AMD64_SMCA_MCx_MISC(bank), &low, &high) &&
(low & MASK_BLKPTR_LO))
- return MSR_AMD64_SMCA_MCx_MISCy(bank, block - 1);
+ addr = MSR_AMD64_SMCA_MCx_MISCy(bank, block - 1);
+out:
+ smca_bank_addrs[bank][block] = addr;
return addr;
}
-static u32 get_block_address(unsigned int cpu, u32 current_addr, u32 low, u32 high,
+static u32 get_block_address(u32 current_addr, u32 low, u32 high,
unsigned int bank, unsigned int block)
{
u32 addr = 0, offset = 0;
@@ -468,20 +478,8 @@ static u32 get_block_address(unsigned int cpu, u32 current_addr, u32 low, u32 hi
if ((bank >= mca_cfg.banks) || (block >= NR_BLOCKS))
return addr;
- /* Get address from already initialized block. */
- if (per_cpu(threshold_banks, cpu)) {
- struct threshold_bank *bankp = per_cpu(threshold_banks, cpu)[bank];
-
- if (bankp && bankp->blocks) {
- struct threshold_block *blockp = &bankp->blocks[block];
-
- if (blockp)
- return blockp->address;
- }
- }
-
if (mce_flags.smca)
- return smca_get_block_address(cpu, bank, block);
+ return smca_get_block_address(bank, block);
/* Fall back to method we used for older processors: */
switch (block) {
@@ -559,7 +557,7 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
smca_configure(bank, cpu);
for (block = 0; block < NR_BLOCKS; ++block) {
- address = get_block_address(cpu, address, low, high, bank, block);
+ address = get_block_address(address, low, high, bank, block);
if (!address)
break;
@@ -1176,7 +1174,7 @@ static int allocate_threshold_blocks(unsigned int cpu, unsigned int bank,
if (err)
goto out_free;
recurse:
- address = get_block_address(cpu, address, low, high, bank, ++block);
+ address = get_block_address(address, low, high, bank, ++block);
if (!address)
return 0;
diff --git a/arch/x86/kernel/cpu/mtrr/Makefile b/arch/x86/kernel/cpu/mtrr/Makefile
index ad9e5ed81181..2ad9107ee980 100644
--- a/arch/x86/kernel/cpu/mtrr/Makefile
+++ b/arch/x86/kernel/cpu/mtrr/Makefile
@@ -1,3 +1,3 @@
-obj-y := main.o if.o generic.o cleanup.o
+obj-y := mtrr.o if.o generic.o cleanup.o
obj-$(CONFIG_X86_32) += amd.o cyrix.o centaur.o
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/mtrr.c
index 7468de429087..9a19c800fe40 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/mtrr.c
@@ -46,6 +46,7 @@
#include <linux/pci.h>
#include <linux/smp.h>
#include <linux/syscore_ops.h>
+#include <linux/rcupdate.h>
#include <asm/cpufeature.h>
#include <asm/e820/api.h>
@@ -100,7 +101,7 @@ static int have_wrcomb(void)
if (dev->vendor == PCI_VENDOR_ID_SERVERWORKS &&
dev->device == PCI_DEVICE_ID_SERVERWORKS_LE &&
dev->revision <= 5) {
- pr_info("mtrr: Serverworks LE rev < 6 detected. Write-combining disabled.\n");
+ pr_info("Serverworks LE rev < 6 detected. Write-combining disabled.\n");
pci_dev_put(dev);
return 0;
}
@@ -110,7 +111,7 @@ static int have_wrcomb(void)
*/
if (dev->vendor == PCI_VENDOR_ID_INTEL &&
dev->device == PCI_DEVICE_ID_INTEL_82451NX) {
- pr_info("mtrr: Intel 450NX MMC detected. Write-combining disabled.\n");
+ pr_info("Intel 450NX MMC detected. Write-combining disabled.\n");
pci_dev_put(dev);
return 0;
}
@@ -312,24 +313,24 @@ int mtrr_add_page(unsigned long base, unsigned long size,
return error;
if (type >= MTRR_NUM_TYPES) {
- pr_warn("mtrr: type: %u invalid\n", type);
+ pr_warn("type: %u invalid\n", type);
return -EINVAL;
}
/* If the type is WC, check that this processor supports it */
if ((type == MTRR_TYPE_WRCOMB) && !have_wrcomb()) {
- pr_warn("mtrr: your processor doesn't support write-combining\n");
+ pr_warn("your processor doesn't support write-combining\n");
return -ENOSYS;
}
if (!size) {
- pr_warn("mtrr: zero sized request\n");
+ pr_warn("zero sized request\n");
return -EINVAL;
}
if ((base | (base + size - 1)) >>
(boot_cpu_data.x86_phys_bits - PAGE_SHIFT)) {
- pr_warn("mtrr: base or size exceeds the MTRR width\n");
+ pr_warn("base or size exceeds the MTRR width\n");
return -EINVAL;
}
@@ -360,8 +361,7 @@ int mtrr_add_page(unsigned long base, unsigned long size,
} else if (types_compatible(type, ltype))
continue;
}
- pr_warn("mtrr: 0x%lx000,0x%lx000 overlaps existing"
- " 0x%lx000,0x%lx000\n", base, size, lbase,
+ pr_warn("0x%lx000,0x%lx000 overlaps existing 0x%lx000,0x%lx000\n", base, size, lbase,
lsize);
goto out;
}
@@ -369,7 +369,7 @@ int mtrr_add_page(unsigned long base, unsigned long size,
if (ltype != type) {
if (types_compatible(type, ltype))
continue;
- pr_warn("mtrr: type mismatch for %lx000,%lx000 old: %s new: %s\n",
+ pr_warn("type mismatch for %lx000,%lx000 old: %s new: %s\n",
base, size, mtrr_attrib_to_str(ltype),
mtrr_attrib_to_str(type));
goto out;
@@ -395,7 +395,7 @@ int mtrr_add_page(unsigned long base, unsigned long size,
}
}
} else {
- pr_info("mtrr: no more MTRRs available\n");
+ pr_info("no more MTRRs available\n");
}
error = i;
out:
@@ -407,8 +407,8 @@ int mtrr_add_page(unsigned long base, unsigned long size,
static int mtrr_check(unsigned long base, unsigned long size)
{
if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) {
- pr_warn("mtrr: size and base must be multiples of 4 kiB\n");
- pr_debug("mtrr: size: 0x%lx base: 0x%lx\n", size, base);
+ pr_warn("size and base must be multiples of 4 kiB\n");
+ pr_debug("size: 0x%lx base: 0x%lx\n", size, base);
dump_stack();
return -1;
}
@@ -499,22 +499,22 @@ int mtrr_del_page(int reg, unsigned long base, unsigned long size)
}
}
if (reg < 0) {
- pr_debug("mtrr: no MTRR for %lx000,%lx000 found\n",
+ pr_debug("no MTRR for %lx000,%lx000 found\n",
base, size);
goto out;
}
}
if (reg >= max) {
- pr_warn("mtrr: register: %d too big\n", reg);
+ pr_warn("register: %d too big\n", reg);
goto out;
}
mtrr_if->get(reg, &lbase, &lsize, &ltype);
if (lsize < 1) {
- pr_warn("mtrr: MTRR %d not used\n", reg);
+ pr_warn("MTRR %d not used\n", reg);
goto out;
}
if (mtrr_usage_table[reg] < 1) {
- pr_warn("mtrr: reg: %d has count=0\n", reg);
+ pr_warn("reg: %d has count=0\n", reg);
goto out;
}
if (--mtrr_usage_table[reg] < 1)
@@ -775,7 +775,7 @@ void __init mtrr_bp_init(void)
}
if (!mtrr_enabled()) {
- pr_info("MTRR: Disabled\n");
+ pr_info("Disabled\n");
/*
* PAT initialization relies on MTRR's rendezvous handler.
@@ -793,6 +793,9 @@ void mtrr_ap_init(void)
if (!use_intel() || mtrr_aps_delayed_init)
return;
+
+ rcu_cpu_starting(smp_processor_id());
+
/*
* Ideally we should hold mtrr_mutex here to avoid mtrr entries
* changed, but this routine will be called in cpu boot time,
diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c
index b099024d339c..81c0afb39d0a 100644
--- a/arch/x86/kernel/cpu/topology.c
+++ b/arch/x86/kernel/cpu/topology.c
@@ -27,7 +27,7 @@
* exists, use it for populating initial_apicid and cpu topology
* detection.
*/
-void detect_extended_topology(struct cpuinfo_x86 *c)
+int detect_extended_topology(struct cpuinfo_x86 *c)
{
#ifdef CONFIG_SMP
unsigned int eax, ebx, ecx, edx, sub_index;
@@ -36,7 +36,7 @@ void detect_extended_topology(struct cpuinfo_x86 *c)
static bool printed;
if (c->cpuid_level < 0xb)
- return;
+ return -1;
cpuid_count(0xb, SMT_LEVEL, &eax, &ebx, &ecx, &edx);
@@ -44,7 +44,7 @@ void detect_extended_topology(struct cpuinfo_x86 *c)
* check if the cpuid leaf 0xb is actually implemented.
*/
if (ebx == 0 || (LEAFB_SUBTYPE(ecx) != SMT_TYPE))
- return;
+ return -1;
set_cpu_cap(c, X86_FEATURE_XTOPOLOGY);
@@ -95,6 +95,6 @@ void detect_extended_topology(struct cpuinfo_x86 *c)
c->cpu_core_id);
printed = 1;
}
- return;
#endif
+ return 0;
}
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index 18fa9d74c182..666a284116ac 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -22,11 +22,14 @@
#include <asm/stacktrace.h>
#include <asm/unwind.h>
+#define OPCODE_BUFSIZE 64
+
int panic_on_unrecovered_nmi;
int panic_on_io_nmi;
-static unsigned int code_bytes = 64;
static int die_counter;
+static struct pt_regs exec_summary_regs;
+
bool in_task_stack(unsigned long *stack, struct task_struct *task,
struct stack_info *info)
{
@@ -69,9 +72,62 @@ static void printk_stack_address(unsigned long address, int reliable,
printk("%s %s%pB\n", log_lvl, reliable ? "" : "? ", (void *)address);
}
+/*
+ * There are a couple of reasons for the 2/3rd prologue, courtesy of Linus:
+ *
+ * In case where we don't have the exact kernel image (which, if we did, we can
+ * simply disassemble and navigate to the RIP), the purpose of the bigger
+ * prologue is to have more context and to be able to correlate the code from
+ * the different toolchains better.
+ *
+ * In addition, it helps in recreating the register allocation of the failing
+ * kernel and thus make sense of the register dump.
+ *
+ * What is more, the additional complication of a variable length insn arch like
+ * x86 warrants having longer byte sequence before rIP so that the disassembler
+ * can "sync" up properly and find instruction boundaries when decoding the
+ * opcode bytes.
+ *
+ * Thus, the 2/3rds prologue and 64 byte OPCODE_BUFSIZE is just a random
+ * guesstimate in attempt to achieve all of the above.
+ */
+void show_opcodes(u8 *rip, const char *loglvl)
+{
+ unsigned int code_prologue = OPCODE_BUFSIZE * 2 / 3;
+ u8 opcodes[OPCODE_BUFSIZE];
+ u8 *ip;
+ int i;
+
+ printk("%sCode: ", loglvl);
+
+ ip = (u8 *)rip - code_prologue;
+ if (probe_kernel_read(opcodes, ip, OPCODE_BUFSIZE)) {
+ pr_cont("Bad RIP value.\n");
+ return;
+ }
+
+ for (i = 0; i < OPCODE_BUFSIZE; i++, ip++) {
+ if (ip == rip)
+ pr_cont("<%02x> ", opcodes[i]);
+ else
+ pr_cont("%02x ", opcodes[i]);
+ }
+ pr_cont("\n");
+}
+
+void show_ip(struct pt_regs *regs, const char *loglvl)
+{
+#ifdef CONFIG_X86_32
+ printk("%sEIP: %pS\n", loglvl, (void *)regs->ip);
+#else
+ printk("%sRIP: %04x:%pS\n", loglvl, (int)regs->cs, (void *)regs->ip);
+#endif
+ show_opcodes((u8 *)regs->ip, loglvl);
+}
+
void show_iret_regs(struct pt_regs *regs)
{
- printk(KERN_DEFAULT "RIP: %04x:%pS\n", (int)regs->cs, (void *)regs->ip);
+ show_ip(regs, KERN_DEFAULT);
printk(KERN_DEFAULT "RSP: %04x:%016lx EFLAGS: %08lx", (int)regs->ss,
regs->sp, regs->flags);
}
@@ -267,7 +323,6 @@ unsigned long oops_begin(void)
bust_spinlocks(1);
return flags;
}
-EXPORT_SYMBOL_GPL(oops_begin);
NOKPROBE_SYMBOL(oops_begin);
void __noreturn rewind_stack_do_exit(int signr);
@@ -287,6 +342,9 @@ void oops_end(unsigned long flags, struct pt_regs *regs, int signr)
raw_local_irq_restore(flags);
oops_exit();
+ /* Executive summary in case the oops scrolled away */
+ __show_regs(&exec_summary_regs, true);
+
if (!signr)
return;
if (in_interrupt())
@@ -305,10 +363,10 @@ NOKPROBE_SYMBOL(oops_end);
int __die(const char *str, struct pt_regs *regs, long err)
{
-#ifdef CONFIG_X86_32
- unsigned short ss;
- unsigned long sp;
-#endif
+ /* Save the regs of the first oops for the executive summary later. */
+ if (!die_counter)
+ exec_summary_regs = *regs;
+
printk(KERN_DEFAULT
"%s: %04lx [#%d]%s%s%s%s%s\n", str, err & 0xffff, ++die_counter,
IS_ENABLED(CONFIG_PREEMPT) ? " PREEMPT" : "",
@@ -318,26 +376,13 @@ int __die(const char *str, struct pt_regs *regs, long err)
IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION) ?
(boot_cpu_has(X86_FEATURE_PTI) ? " PTI" : " NOPTI") : "");
+ show_regs(regs);
+ print_modules();
+
if (notify_die(DIE_OOPS, str, regs, err,
current->thread.trap_nr, SIGSEGV) == NOTIFY_STOP)
return 1;
- print_modules();
- show_regs(regs);
-#ifdef CONFIG_X86_32
- if (user_mode(regs)) {
- sp = regs->sp;
- ss = regs->ss;
- } else {
- sp = kernel_stack_pointer(regs);
- savesegment(ss, ss);
- }
- printk(KERN_EMERG "EIP: %pS SS:ESP: %04x:%08lx\n",
- (void *)regs->ip, ss, sp);
-#else
- /* Executive summary in case the oops scrolled away */
- printk(KERN_ALERT "RIP: %pS RSP: %016lx\n", (void *)regs->ip, regs->sp);
-#endif
return 0;
}
NOKPROBE_SYMBOL(__die);
@@ -356,30 +401,9 @@ void die(const char *str, struct pt_regs *regs, long err)
oops_end(flags, regs, sig);
}
-static int __init code_bytes_setup(char *s)
-{
- ssize_t ret;
- unsigned long val;
-
- if (!s)
- return -EINVAL;
-
- ret = kstrtoul(s, 0, &val);
- if (ret)
- return ret;
-
- code_bytes = val;
- if (code_bytes > 8192)
- code_bytes = 8192;
-
- return 1;
-}
-__setup("code_bytes=", code_bytes_setup);
-
void show_regs(struct pt_regs *regs)
{
bool all = true;
- int i;
show_regs_print_info(KERN_DEFAULT);
@@ -389,36 +413,8 @@ void show_regs(struct pt_regs *regs)
__show_regs(regs, all);
/*
- * When in-kernel, we also print out the stack and code at the
- * time of the fault..
+ * When in-kernel, we also print out the stack at the time of the fault..
*/
- if (!user_mode(regs)) {
- unsigned int code_prologue = code_bytes * 43 / 64;
- unsigned int code_len = code_bytes;
- unsigned char c;
- u8 *ip;
-
+ if (!user_mode(regs))
show_trace_log_lvl(current, regs, NULL, KERN_DEFAULT);
-
- printk(KERN_DEFAULT "Code: ");
-
- ip = (u8 *)regs->ip - code_prologue;
- if (ip < (u8 *)PAGE_OFFSET || probe_kernel_address(ip, c)) {
- /* try starting at IP */
- ip = (u8 *)regs->ip;
- code_len = code_len - code_prologue + 1;
- }
- for (i = 0; i < code_len; i++, ip++) {
- if (ip < (u8 *)PAGE_OFFSET ||
- probe_kernel_address(ip, c)) {
- pr_cont(" Bad RIP value.");
- break;
- }
- if (ip == (u8 *)regs->ip)
- pr_cont("<%02x> ", c);
- else
- pr_cont("%02x ", c);
- }
- }
- pr_cont("\n");
}
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 6a2cb1442e05..d1f25c831447 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -155,7 +155,8 @@ static void __init __e820__range_add(struct e820_table *table, u64 start, u64 si
int x = table->nr_entries;
if (x >= ARRAY_SIZE(table->entries)) {
- pr_err("e820: too many entries; ignoring [mem %#010llx-%#010llx]\n", start, start + size - 1);
+ pr_err("too many entries; ignoring [mem %#010llx-%#010llx]\n",
+ start, start + size - 1);
return;
}
@@ -190,9 +191,10 @@ void __init e820__print_table(char *who)
int i;
for (i = 0; i < e820_table->nr_entries; i++) {
- pr_info("%s: [mem %#018Lx-%#018Lx] ", who,
- e820_table->entries[i].addr,
- e820_table->entries[i].addr + e820_table->entries[i].size - 1);
+ pr_info("%s: [mem %#018Lx-%#018Lx] ",
+ who,
+ e820_table->entries[i].addr,
+ e820_table->entries[i].addr + e820_table->entries[i].size - 1);
e820_print_type(e820_table->entries[i].type);
pr_cont("\n");
@@ -574,7 +576,7 @@ void __init e820__update_table_print(void)
if (e820__update_table(e820_table))
return;
- pr_info("e820: modified physical RAM map:\n");
+ pr_info("modified physical RAM map:\n");
e820__print_table("modified");
}
@@ -636,9 +638,8 @@ __init void e820__setup_pci_gap(void)
if (!found) {
#ifdef CONFIG_X86_64
gapstart = (max_pfn << PAGE_SHIFT) + 1024*1024;
- pr_err(
- "e820: Cannot find an available gap in the 32-bit address range\n"
- "e820: PCI devices with unassigned 32-bit BARs may not work!\n");
+ pr_err("Cannot find an available gap in the 32-bit address range\n");
+ pr_err("PCI devices with unassigned 32-bit BARs may not work!\n");
#else
gapstart = 0x10000000;
#endif
@@ -649,7 +650,8 @@ __init void e820__setup_pci_gap(void)
*/
pci_mem_start = gapstart;
- pr_info("e820: [mem %#010lx-%#010lx] available for PCI devices\n", gapstart, gapstart + gapsize - 1);
+ pr_info("[mem %#010lx-%#010lx] available for PCI devices\n",
+ gapstart, gapstart + gapsize - 1);
}
/*
@@ -711,7 +713,7 @@ void __init e820__memory_setup_extended(u64 phys_addr, u32 data_len)
memcpy(e820_table_firmware, e820_table, sizeof(*e820_table_firmware));
early_memunmap(sdata, data_len);
- pr_info("e820: extended physical RAM map:\n");
+ pr_info("extended physical RAM map:\n");
e820__print_table("extended");
}
@@ -780,7 +782,7 @@ u64 __init e820__memblock_alloc_reserved(u64 size, u64 align)
addr = __memblock_alloc_base(size, align, MEMBLOCK_ALLOC_ACCESSIBLE);
if (addr) {
e820__range_update_kexec(addr, size, E820_TYPE_RAM, E820_TYPE_RESERVED);
- pr_info("e820: update e820_table_kexec for e820__memblock_alloc_reserved()\n");
+ pr_info("update e820_table_kexec for e820__memblock_alloc_reserved()\n");
e820__update_table_kexec();
}
@@ -830,8 +832,8 @@ static unsigned long __init e820_end_pfn(unsigned long limit_pfn, enum e820_type
if (last_pfn > max_arch_pfn)
last_pfn = max_arch_pfn;
- pr_info("e820: last_pfn = %#lx max_arch_pfn = %#lx\n",
- last_pfn, max_arch_pfn);
+ pr_info("last_pfn = %#lx max_arch_pfn = %#lx\n",
+ last_pfn, max_arch_pfn);
return last_pfn;
}
@@ -1005,7 +1007,7 @@ void __init e820__finish_early_params(void)
if (e820__update_table(e820_table) < 0)
early_panic("Invalid user supplied memory map");
- pr_info("e820: user-defined physical RAM map:\n");
+ pr_info("user-defined physical RAM map:\n");
e820__print_table("user");
}
}
@@ -1238,7 +1240,7 @@ void __init e820__memory_setup(void)
memcpy(e820_table_kexec, e820_table, sizeof(*e820_table_kexec));
memcpy(e820_table_firmware, e820_table, sizeof(*e820_table_firmware));
- pr_info("e820: BIOS-provided physical RAM map:\n");
+ pr_info("BIOS-provided physical RAM map:\n");
e820__print_table(who);
}
diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c
index bae0d32e327b..da5d8ac60062 100644
--- a/arch/x86/kernel/early-quirks.c
+++ b/arch/x86/kernel/early-quirks.c
@@ -28,8 +28,6 @@
#include <asm/irq_remapping.h>
#include <asm/early_ioremap.h>
-#define dev_err(msg) pr_err("pci 0000:%02x:%02x.%d: %s", bus, slot, func, msg)
-
static void __init fix_hypertransport_config(int num, int slot, int func)
{
u32 htcfg;
@@ -617,7 +615,8 @@ static void __init apple_airport_reset(int bus, int slot, int func)
pmcsr = read_pci_config_16(bus, slot, func, BCM4331_PM_CAP + PCI_PM_CTRL);
if ((pmcsr & PCI_PM_CTRL_STATE_MASK) != PCI_D0) {
- dev_err("Cannot power up Apple AirPort card\n");
+ pr_err("pci 0000:%02x:%02x.%d: Cannot power up Apple AirPort card\n",
+ bus, slot, func);
return;
}
}
@@ -628,7 +627,8 @@ static void __init apple_airport_reset(int bus, int slot, int func)
mmio = early_ioremap(addr, BCM4331_MMIO_SIZE);
if (!mmio) {
- dev_err("Cannot iomap Apple AirPort card\n");
+ pr_err("pci 0000:%02x:%02x.%d: Cannot iomap Apple AirPort card\n",
+ bus, slot, func);
return;
}
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 0c408f8c4ed4..a21d6ace648e 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -6,6 +6,10 @@
*/
#define DISABLE_BRANCH_PROFILING
+
+/* cpu_feature_enabled() cannot be used this early */
+#define USE_EARLY_PGTABLE_L5
+
#include <linux/init.h>
#include <linux/linkage.h>
#include <linux/types.h>
@@ -32,11 +36,6 @@
#include <asm/microcode.h>
#include <asm/kasan.h>
-#ifdef CONFIG_X86_5LEVEL
-#undef pgtable_l5_enabled
-#define pgtable_l5_enabled __pgtable_l5_enabled
-#endif
-
/*
* Manage page tables very early on.
*/
@@ -45,8 +44,7 @@ static unsigned int __initdata next_early_pgt;
pmdval_t early_pmd_flags = __PAGE_KERNEL_LARGE & ~(_PAGE_GLOBAL | _PAGE_NX);
#ifdef CONFIG_X86_5LEVEL
-unsigned int __pgtable_l5_enabled __ro_after_init;
-EXPORT_SYMBOL(__pgtable_l5_enabled);
+unsigned int __pgtable_l5_enabled __initdata;
unsigned int pgdir_shift __ro_after_init = 39;
EXPORT_SYMBOL(pgdir_shift);
unsigned int ptrs_per_p4d __ro_after_init = 1;
@@ -82,13 +80,14 @@ static unsigned int __head *fixup_int(void *ptr, unsigned long physaddr)
static bool __head check_la57_support(unsigned long physaddr)
{
- if (native_cpuid_eax(0) < 7)
- return false;
-
- if (!(native_cpuid_ecx(7) & (1 << (X86_FEATURE_LA57 & 31))))
+ /*
+ * 5-level paging is detected and enabled at kernel decomression
+ * stage. Only check if it has been enabled there.
+ */
+ if (!(native_read_cr4() & X86_CR4_LA57))
return false;
- *fixup_int(&pgtable_l5_enabled, physaddr) = 1;
+ *fixup_int(&__pgtable_l5_enabled, physaddr) = 1;
*fixup_int(&pgdir_shift, physaddr) = 48;
*fixup_int(&ptrs_per_p4d, physaddr) = 512;
*fixup_long(&page_offset_base, physaddr) = __PAGE_OFFSET_BASE_L5;
@@ -104,6 +103,12 @@ static bool __head check_la57_support(unsigned long physaddr)
}
#endif
+/* Code in __startup_64() can be relocated during execution, but the compiler
+ * doesn't have to generate PC-relative relocations when accessing globals from
+ * that function. Clang actually does not generate them, which leads to
+ * boot-time crashes. To work around this problem, every global pointer must
+ * be adjusted using fixup_pointer().
+ */
unsigned long __head __startup_64(unsigned long physaddr,
struct boot_params *bp)
{
@@ -113,6 +118,7 @@ unsigned long __head __startup_64(unsigned long physaddr,
p4dval_t *p4d;
pudval_t *pud;
pmdval_t *pmd, pmd_entry;
+ pteval_t *mask_ptr;
bool la57;
int i;
unsigned int *next_pgt_ptr;
@@ -196,7 +202,8 @@ unsigned long __head __startup_64(unsigned long physaddr,
pmd_entry = __PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL;
/* Filter out unsupported __PAGE_KERNEL_* bits: */
- pmd_entry &= __supported_pte_mask;
+ mask_ptr = fixup_pointer(&__supported_pte_mask, physaddr);
+ pmd_entry &= *mask_ptr;
pmd_entry += sme_get_me_mask();
pmd_entry += physaddr;
@@ -273,7 +280,7 @@ again:
* critical -- __PAGE_OFFSET would point us back into the dynamic
* range and we might end up looping forever...
*/
- if (!pgtable_l5_enabled)
+ if (!pgtable_l5_enabled())
p4d_p = pgd_p;
else if (pgd)
p4d_p = (p4dval_t *)((pgd & PTE_PFN_MASK) + __START_KERNEL_map - phys_base);
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 8ce4212e2b8d..b6be34ee88e9 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -975,8 +975,7 @@ int __init hpet_enable(void)
cfg &= ~(HPET_CFG_ENABLE | HPET_CFG_LEGACY);
hpet_writel(cfg, HPET_CFG);
if (cfg)
- pr_warn("HPET: Unrecognized bits %#x set in global cfg\n",
- cfg);
+ pr_warn("Unrecognized bits %#x set in global cfg\n", cfg);
for (i = 0; i <= last; ++i) {
cfg = hpet_readl(HPET_Tn_CFG(i));
@@ -988,7 +987,7 @@ int __init hpet_enable(void)
| HPET_TN_64BIT_CAP | HPET_TN_32BIT | HPET_TN_ROUTE
| HPET_TN_FSB | HPET_TN_FSB_CAP);
if (cfg)
- pr_warn("HPET: Unrecognized bits %#x set in cfg#%u\n",
+ pr_warn("Unrecognized bits %#x set in cfg#%u\n",
cfg, i);
}
hpet_print_config();
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index 0715f827607c..6f4d42377fe5 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -370,6 +370,10 @@ int __copy_instruction(u8 *dest, u8 *src, u8 *real, struct insn *insn)
if (insn->opcode.bytes[0] == BREAKPOINT_INSTRUCTION)
return 0;
+ /* We should not singlestep on the exception masking instructions */
+ if (insn_masking_exception(insn))
+ return 0;
+
#ifdef CONFIG_X86_64
/* Only x86_64 has RIP relative instructions */
if (insn_rip_relative(insn)) {
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 7867417cfaff..5b2300b818af 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -457,7 +457,7 @@ static void __init sev_map_percpu_data(void)
static void __init kvm_smp_prepare_cpus(unsigned int max_cpus)
{
native_smp_prepare_cpus(max_cpus);
- if (kvm_para_has_hint(KVM_HINTS_DEDICATED))
+ if (kvm_para_has_hint(KVM_HINTS_REALTIME))
static_branch_disable(&virt_spin_lock_key);
}
@@ -553,7 +553,7 @@ static void __init kvm_guest_init(void)
}
if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH) &&
- !kvm_para_has_hint(KVM_HINTS_DEDICATED) &&
+ !kvm_para_has_hint(KVM_HINTS_REALTIME) &&
kvm_para_has_feature(KVM_FEATURE_STEAL_TIME))
pv_mmu_ops.flush_tlb_others = kvm_flush_tlb_others;
@@ -649,7 +649,7 @@ static __init int kvm_setup_pv_tlb_flush(void)
int cpu;
if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH) &&
- !kvm_para_has_hint(KVM_HINTS_DEDICATED) &&
+ !kvm_para_has_hint(KVM_HINTS_REALTIME) &&
kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) {
for_each_possible_cpu(cpu) {
zalloc_cpumask_var_node(per_cpu_ptr(&__pv_tlb_mask, cpu),
@@ -745,7 +745,7 @@ void __init kvm_spinlock_init(void)
if (!kvm_para_has_feature(KVM_FEATURE_PV_UNHALT))
return;
- if (kvm_para_has_hint(KVM_HINTS_DEDICATED))
+ if (kvm_para_has_hint(KVM_HINTS_REALTIME))
return;
__pv_init_lock_hash();
diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c
index 60cdec6628b0..d1ab07ec8c9a 100644
--- a/arch/x86/kernel/machine_kexec_32.c
+++ b/arch/x86/kernel/machine_kexec_32.c
@@ -57,12 +57,17 @@ static void load_segments(void)
static void machine_kexec_free_page_tables(struct kimage *image)
{
free_page((unsigned long)image->arch.pgd);
+ image->arch.pgd = NULL;
#ifdef CONFIG_X86_PAE
free_page((unsigned long)image->arch.pmd0);
+ image->arch.pmd0 = NULL;
free_page((unsigned long)image->arch.pmd1);
+ image->arch.pmd1 = NULL;
#endif
free_page((unsigned long)image->arch.pte0);
+ image->arch.pte0 = NULL;
free_page((unsigned long)image->arch.pte1);
+ image->arch.pte1 = NULL;
}
static int machine_kexec_alloc_page_tables(struct kimage *image)
@@ -79,7 +84,6 @@ static int machine_kexec_alloc_page_tables(struct kimage *image)
!image->arch.pmd0 || !image->arch.pmd1 ||
#endif
!image->arch.pte0 || !image->arch.pte1) {
- machine_kexec_free_page_tables(image);
return -ENOMEM;
}
return 0;
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index a5e55d832d0a..4c8acdfdc5a7 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -39,9 +39,13 @@ const struct kexec_file_ops * const kexec_file_loaders[] = {
static void free_transition_pgtable(struct kimage *image)
{
free_page((unsigned long)image->arch.p4d);
+ image->arch.p4d = NULL;
free_page((unsigned long)image->arch.pud);
+ image->arch.pud = NULL;
free_page((unsigned long)image->arch.pmd);
+ image->arch.pmd = NULL;
free_page((unsigned long)image->arch.pte);
+ image->arch.pte = NULL;
}
static int init_transition_pgtable(struct kimage *image, pgd_t *pgd)
@@ -91,7 +95,6 @@ static int init_transition_pgtable(struct kimage *image, pgd_t *pgd)
set_pte(pte, pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL_EXEC_NOENC));
return 0;
err:
- free_transition_pgtable(image);
return result;
}
@@ -351,7 +354,8 @@ void arch_crash_save_vmcoreinfo(void)
{
VMCOREINFO_NUMBER(phys_base);
VMCOREINFO_SYMBOL(init_top_pgt);
- VMCOREINFO_NUMBER(pgtable_l5_enabled);
+ vmcoreinfo_append_str("NUMBER(pgtable_l5_enabled)=%d\n",
+ pgtable_l5_enabled());
#ifdef CONFIG_NUMA
VMCOREINFO_SYMBOL(node_data);
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index 77625b60a510..ab5d9dd668d2 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -15,13 +15,11 @@
#include <asm/x86_init.h>
#include <asm/iommu_table.h>
-static int forbid_dac __read_mostly;
+static bool disable_dac_quirk __read_mostly;
const struct dma_map_ops *dma_ops = &dma_direct_ops;
EXPORT_SYMBOL(dma_ops);
-static int iommu_sac_force __read_mostly;
-
#ifdef CONFIG_IOMMU_DEBUG
int panic_on_overflow __read_mostly = 1;
int force_iommu __read_mostly = 1;
@@ -55,9 +53,6 @@ struct device x86_dma_fallback_dev = {
};
EXPORT_SYMBOL(x86_dma_fallback_dev);
-/* Number of entries preallocated for DMA-API debugging */
-#define PREALLOC_DMA_DEBUG_ENTRIES 65536
-
void __init pci_iommu_alloc(void)
{
struct iommu_table_entry *p;
@@ -76,7 +71,7 @@ void __init pci_iommu_alloc(void)
}
}
-bool arch_dma_alloc_attrs(struct device **dev, gfp_t *gfp)
+bool arch_dma_alloc_attrs(struct device **dev)
{
if (!*dev)
*dev = &x86_dma_fallback_dev;
@@ -125,13 +120,13 @@ static __init int iommu_setup(char *p)
if (!strncmp(p, "nomerge", 7))
iommu_merge = 0;
if (!strncmp(p, "forcesac", 8))
- iommu_sac_force = 1;
+ pr_warn("forcesac option ignored.\n");
if (!strncmp(p, "allowdac", 8))
- forbid_dac = 0;
+ pr_warn("allowdac option ignored.\n");
if (!strncmp(p, "nodac", 5))
- forbid_dac = 1;
+ pr_warn("nodac option ignored.\n");
if (!strncmp(p, "usedac", 6)) {
- forbid_dac = -1;
+ disable_dac_quirk = true;
return 1;
}
#ifdef CONFIG_SWIOTLB
@@ -156,40 +151,9 @@ static __init int iommu_setup(char *p)
}
early_param("iommu", iommu_setup);
-int arch_dma_supported(struct device *dev, u64 mask)
-{
-#ifdef CONFIG_PCI
- if (mask > 0xffffffff && forbid_dac > 0) {
- dev_info(dev, "PCI: Disallowing DAC for device\n");
- return 0;
- }
-#endif
-
- /* Tell the device to use SAC when IOMMU force is on. This
- allows the driver to use cheaper accesses in some cases.
-
- Problem with this is that if we overflow the IOMMU area and
- return DAC as fallback address the device may not handle it
- correctly.
-
- As a special case some controllers have a 39bit address
- mode that is as efficient as 32bit (aic79xx). Don't force
- SAC for these. Assume all masks <= 40 bits are of this
- type. Normally this doesn't make any difference, but gives
- more gentle handling of IOMMU overflow. */
- if (iommu_sac_force && (mask >= DMA_BIT_MASK(40))) {
- dev_info(dev, "Force SAC with mask %Lx\n", mask);
- return 0;
- }
-
- return 1;
-}
-EXPORT_SYMBOL(arch_dma_supported);
-
static int __init pci_iommu_init(void)
{
struct iommu_table_entry *p;
- dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES);
#ifdef CONFIG_PCI
dma_debug_add_bus(&pci_bus_type);
@@ -209,11 +173,17 @@ rootfs_initcall(pci_iommu_init);
#ifdef CONFIG_PCI
/* Many VIA bridges seem to corrupt data for DAC. Disable it here */
+static int via_no_dac_cb(struct pci_dev *pdev, void *data)
+{
+ pdev->dev.dma_32bit_limit = true;
+ return 0;
+}
+
static void via_no_dac(struct pci_dev *dev)
{
- if (forbid_dac == 0) {
+ if (!disable_dac_quirk) {
dev_info(&dev->dev, "disabling DAC on VIA PCI bridge\n");
- forbid_dac = 1;
+ pci_walk_bus(dev->subordinate, via_no_dac_cb, NULL);
}
}
DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_VENDOR_ID_VIA, PCI_ANY_ID,
diff --git a/arch/x86/kernel/perf_regs.c b/arch/x86/kernel/perf_regs.c
index e47b2dbbdef3..c06c4c16c6b6 100644
--- a/arch/x86/kernel/perf_regs.c
+++ b/arch/x86/kernel/perf_regs.c
@@ -151,17 +151,19 @@ void perf_get_regs_user(struct perf_regs *regs_user,
regs_user_copy->sp = user_regs->sp;
regs_user_copy->cs = user_regs->cs;
regs_user_copy->ss = user_regs->ss;
-
/*
- * Most system calls don't save these registers, don't report them.
+ * Store user space frame-pointer value on sample
+ * to facilitate stack unwinding for cases when
+ * user space executable code has such support
+ * enabled at compile time:
*/
+ regs_user_copy->bp = user_regs->bp;
+
regs_user_copy->bx = -1;
- regs_user_copy->bp = -1;
regs_user_copy->r12 = -1;
regs_user_copy->r13 = -1;
regs_user_copy->r14 = -1;
regs_user_copy->r15 = -1;
-
/*
* For this to be at all useful, we need a reasonable guess for
* the ABI. Be careful: we're in NMI context, and we're
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 03408b942adb..30ca2d1a9231 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -38,6 +38,7 @@
#include <asm/switch_to.h>
#include <asm/desc.h>
#include <asm/prctl.h>
+#include <asm/spec-ctrl.h>
/*
* per-CPU TSS segments. Threads are completely 'soft' on Linux,
@@ -278,6 +279,148 @@ static inline void switch_to_bitmap(struct tss_struct *tss,
}
}
+#ifdef CONFIG_SMP
+
+struct ssb_state {
+ struct ssb_state *shared_state;
+ raw_spinlock_t lock;
+ unsigned int disable_state;
+ unsigned long local_state;
+};
+
+#define LSTATE_SSB 0
+
+static DEFINE_PER_CPU(struct ssb_state, ssb_state);
+
+void speculative_store_bypass_ht_init(void)
+{
+ struct ssb_state *st = this_cpu_ptr(&ssb_state);
+ unsigned int this_cpu = smp_processor_id();
+ unsigned int cpu;
+
+ st->local_state = 0;
+
+ /*
+ * Shared state setup happens once on the first bringup
+ * of the CPU. It's not destroyed on CPU hotunplug.
+ */
+ if (st->shared_state)
+ return;
+
+ raw_spin_lock_init(&st->lock);
+
+ /*
+ * Go over HT siblings and check whether one of them has set up the
+ * shared state pointer already.
+ */
+ for_each_cpu(cpu, topology_sibling_cpumask(this_cpu)) {
+ if (cpu == this_cpu)
+ continue;
+
+ if (!per_cpu(ssb_state, cpu).shared_state)
+ continue;
+
+ /* Link it to the state of the sibling: */
+ st->shared_state = per_cpu(ssb_state, cpu).shared_state;
+ return;
+ }
+
+ /*
+ * First HT sibling to come up on the core. Link shared state of
+ * the first HT sibling to itself. The siblings on the same core
+ * which come up later will see the shared state pointer and link
+ * themself to the state of this CPU.
+ */
+ st->shared_state = st;
+}
+
+/*
+ * Logic is: First HT sibling enables SSBD for both siblings in the core
+ * and last sibling to disable it, disables it for the whole core. This how
+ * MSR_SPEC_CTRL works in "hardware":
+ *
+ * CORE_SPEC_CTRL = THREAD0_SPEC_CTRL | THREAD1_SPEC_CTRL
+ */
+static __always_inline void amd_set_core_ssb_state(unsigned long tifn)
+{
+ struct ssb_state *st = this_cpu_ptr(&ssb_state);
+ u64 msr = x86_amd_ls_cfg_base;
+
+ if (!static_cpu_has(X86_FEATURE_ZEN)) {
+ msr |= ssbd_tif_to_amd_ls_cfg(tifn);
+ wrmsrl(MSR_AMD64_LS_CFG, msr);
+ return;
+ }
+
+ if (tifn & _TIF_SSBD) {
+ /*
+ * Since this can race with prctl(), block reentry on the
+ * same CPU.
+ */
+ if (__test_and_set_bit(LSTATE_SSB, &st->local_state))
+ return;
+
+ msr |= x86_amd_ls_cfg_ssbd_mask;
+
+ raw_spin_lock(&st->shared_state->lock);
+ /* First sibling enables SSBD: */
+ if (!st->shared_state->disable_state)
+ wrmsrl(MSR_AMD64_LS_CFG, msr);
+ st->shared_state->disable_state++;
+ raw_spin_unlock(&st->shared_state->lock);
+ } else {
+ if (!__test_and_clear_bit(LSTATE_SSB, &st->local_state))
+ return;
+
+ raw_spin_lock(&st->shared_state->lock);
+ st->shared_state->disable_state--;
+ if (!st->shared_state->disable_state)
+ wrmsrl(MSR_AMD64_LS_CFG, msr);
+ raw_spin_unlock(&st->shared_state->lock);
+ }
+}
+#else
+static __always_inline void amd_set_core_ssb_state(unsigned long tifn)
+{
+ u64 msr = x86_amd_ls_cfg_base | ssbd_tif_to_amd_ls_cfg(tifn);
+
+ wrmsrl(MSR_AMD64_LS_CFG, msr);
+}
+#endif
+
+static __always_inline void amd_set_ssb_virt_state(unsigned long tifn)
+{
+ /*
+ * SSBD has the same definition in SPEC_CTRL and VIRT_SPEC_CTRL,
+ * so ssbd_tif_to_spec_ctrl() just works.
+ */
+ wrmsrl(MSR_AMD64_VIRT_SPEC_CTRL, ssbd_tif_to_spec_ctrl(tifn));
+}
+
+static __always_inline void intel_set_ssb_state(unsigned long tifn)
+{
+ u64 msr = x86_spec_ctrl_base | ssbd_tif_to_spec_ctrl(tifn);
+
+ wrmsrl(MSR_IA32_SPEC_CTRL, msr);
+}
+
+static __always_inline void __speculative_store_bypass_update(unsigned long tifn)
+{
+ if (static_cpu_has(X86_FEATURE_VIRT_SSBD))
+ amd_set_ssb_virt_state(tifn);
+ else if (static_cpu_has(X86_FEATURE_LS_CFG_SSBD))
+ amd_set_core_ssb_state(tifn);
+ else
+ intel_set_ssb_state(tifn);
+}
+
+void speculative_store_bypass_update(unsigned long tif)
+{
+ preempt_disable();
+ __speculative_store_bypass_update(tif);
+ preempt_enable();
+}
+
void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
struct tss_struct *tss)
{
@@ -309,6 +452,9 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
if ((tifp ^ tifn) & _TIF_NOCPUID)
set_cpuid_faulting(!!(tifn & _TIF_NOCPUID));
+
+ if ((tifp ^ tifn) & _TIF_SSBD)
+ __speculative_store_bypass_update(tifn);
}
/*
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 5224c6099184..0ae659de21eb 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -76,16 +76,14 @@ void __show_regs(struct pt_regs *regs, int all)
savesegment(gs, gs);
}
- printk(KERN_DEFAULT "EIP: %pS\n", (void *)regs->ip);
- printk(KERN_DEFAULT "EFLAGS: %08lx CPU: %d\n", regs->flags,
- raw_smp_processor_id());
+ show_ip(regs, KERN_DEFAULT);
printk(KERN_DEFAULT "EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n",
regs->ax, regs->bx, regs->cx, regs->dx);
printk(KERN_DEFAULT "ESI: %08lx EDI: %08lx EBP: %08lx ESP: %08lx\n",
regs->si, regs->di, regs->bp, sp);
- printk(KERN_DEFAULT " DS: %04x ES: %04x FS: %04x GS: %04x SS: %04x\n",
- (u16)regs->ds, (u16)regs->es, (u16)regs->fs, gs, ss);
+ printk(KERN_DEFAULT "DS: %04x ES: %04x FS: %04x GS: %04x SS: %04x EFLAGS: %08lx\n",
+ (u16)regs->ds, (u16)regs->es, (u16)regs->fs, gs, ss, regs->flags);
if (!all)
return;
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 4b100fe0f508..12bb445fb98d 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -542,6 +542,7 @@ void set_personality_64bit(void)
clear_thread_flag(TIF_X32);
/* Pretend that this comes from a 64bit execve */
task_pt_regs(current)->orig_ax = __NR_execve;
+ current_thread_info()->status &= ~TS_COMPAT;
/* Ensure the corresponding mm is not marked. */
if (current->mm)
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index ed5c4cdf0a34..e2ee403865eb 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -1377,7 +1377,6 @@ static void fill_sigtrap_info(struct task_struct *tsk,
tsk->thread.trap_nr = X86_TRAP_DB;
tsk->thread.error_code = error_code;
- memset(info, 0, sizeof(*info));
info->si_signo = SIGTRAP;
info->si_code = si_code;
info->si_addr = user_mode(regs) ? (void __user *)regs->ip : NULL;
@@ -1395,6 +1394,7 @@ void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs,
{
struct siginfo info;
+ clear_siginfo(&info);
fill_sigtrap_info(tsk, regs, error_code, si_code, &info);
/* Send us the fake SIGTRAP */
force_sig_info(SIGTRAP, &info, tsk);
diff --git a/arch/x86/kernel/signal_compat.c b/arch/x86/kernel/signal_compat.c
index 14c057f29979..9ccbf0576cd0 100644
--- a/arch/x86/kernel/signal_compat.c
+++ b/arch/x86/kernel/signal_compat.c
@@ -29,7 +29,7 @@ static inline void signal_compat_build_tests(void)
BUILD_BUG_ON(NSIGFPE != 15);
BUILD_BUG_ON(NSIGSEGV != 7);
BUILD_BUG_ON(NSIGBUS != 5);
- BUILD_BUG_ON(NSIGTRAP != 4);
+ BUILD_BUG_ON(NSIGTRAP != 5);
BUILD_BUG_ON(NSIGCHLD != 6);
BUILD_BUG_ON(NSIGSYS != 1);
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 0f1cbb042f49..c2f7d1d2a5c3 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -79,13 +79,7 @@
#include <asm/qspinlock.h>
#include <asm/intel-family.h>
#include <asm/cpu_device_id.h>
-
-/* Number of siblings per CPU package */
-int smp_num_siblings = 1;
-EXPORT_SYMBOL(smp_num_siblings);
-
-/* Last level cache ID of each logical CPU */
-DEFINE_PER_CPU_READ_MOSTLY(u16, cpu_llc_id) = BAD_APICID;
+#include <asm/spec-ctrl.h>
/* representing HT siblings of each logical CPU */
DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_sibling_map);
@@ -244,6 +238,8 @@ static void notrace start_secondary(void *unused)
*/
check_tsc_sync_target();
+ speculative_store_bypass_ht_init();
+
/*
* Lock vector_lock, set CPU online and bring the vector
* allocator online. Online must be set with vector_lock held
@@ -1292,6 +1288,8 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
set_mtrr_aps_delayed_init();
smp_quirk_init_udelay();
+
+ speculative_store_bypass_ht_init();
}
void arch_enable_nonboot_cpus_begin(void)
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 03f3d7695dac..a535dd64de63 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -299,6 +299,7 @@ static void do_error_trap(struct pt_regs *regs, long error_code, char *str,
if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) !=
NOTIFY_STOP) {
cond_local_irq_enable(regs);
+ clear_siginfo(&info);
do_trap(trapnr, signr, str, regs, error_code,
fill_trap_info(regs, signr, trapnr, &info));
}
@@ -854,6 +855,7 @@ static void math_error(struct pt_regs *regs, int error_code, int trapnr)
task->thread.trap_nr = trapnr;
task->thread.error_code = error_code;
+ clear_siginfo(&info);
info.si_signo = SIGFPE;
info.si_errno = 0;
info.si_addr = (void __user *)uprobe_get_trap_addr(regs);
@@ -929,6 +931,7 @@ dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code)
RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
local_irq_enable();
+ clear_siginfo(&info);
info.si_signo = SIGILL;
info.si_errno = 0;
info.si_code = ILL_BADSTK;
diff --git a/arch/x86/kernel/umip.c b/arch/x86/kernel/umip.c
index f44ce0fb3583..ff20b35e98dd 100644
--- a/arch/x86/kernel/umip.c
+++ b/arch/x86/kernel/umip.c
@@ -278,6 +278,7 @@ static void force_sig_info_umip_fault(void __user *addr, struct pt_regs *regs)
tsk->thread.error_code = X86_PF_USER | X86_PF_WRITE;
tsk->thread.trap_nr = X86_TRAP_PF;
+ clear_siginfo(&info);
info.si_signo = SIGSEGV;
info.si_errno = 0;
info.si_code = SEGV_MAPERR;
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index 85c7ef23d99f..58d8d800875d 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -299,6 +299,10 @@ static int uprobe_init_insn(struct arch_uprobe *auprobe, struct insn *insn, bool
if (is_prefix_bad(insn))
return -ENOTSUPP;
+ /* We should not singlestep on the exception masking instructions */
+ if (insn_masking_exception(insn))
+ return -ENOTSUPP;
+
if (x86_64)
good_insns = good_insns_64;
else
@@ -1079,8 +1083,8 @@ arch_uretprobe_hijack_return_addr(unsigned long trampoline_vaddr, struct pt_regs
return orig_ret_vaddr;
if (nleft != rasize) {
- pr_err("uprobe: return address clobbered: pid=%d, %%sp=%#lx, "
- "%%ip=%#lx\n", current->pid, regs->sp, regs->ip);
+ pr_err("return address clobbered: pid=%d, %%sp=%#lx, %%ip=%#lx\n",
+ current->pid, regs->sp, regs->ip);
force_sig_info(SIGSEGV, SEND_SIG_FORCED, current);
}
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 795f3a80e576..5e1458f609a1 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -117,11 +117,11 @@ SECTIONS
#ifdef CONFIG_X86_64
. = ALIGN(PAGE_SIZE);
- VMLINUX_SYMBOL(__entry_trampoline_start) = .;
+ __entry_trampoline_start = .;
_entry_trampoline = .;
*(.entry_trampoline)
. = ALIGN(PAGE_SIZE);
- VMLINUX_SYMBOL(__entry_trampoline_end) = .;
+ __entry_trampoline_end = .;
ASSERT(. - _entry_trampoline == PAGE_SIZE, "entry trampoline is too big");
#endif
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 82055b90a8b3..92bf2f2e7cdd 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -379,7 +379,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
/* cpuid 0x80000008.ebx */
const u32 kvm_cpuid_8000_0008_ebx_x86_features =
- F(IBPB) | F(IBRS);
+ F(AMD_IBPB) | F(AMD_IBRS) | F(VIRT_SSBD);
/* cpuid 0xC0000001.edx */
const u32 kvm_cpuid_C000_0001_edx_x86_features =
@@ -408,7 +408,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
/* cpuid 7.0.edx*/
const u32 kvm_cpuid_7_0_edx_x86_features =
F(AVX512_4VNNIW) | F(AVX512_4FMAPS) | F(SPEC_CTRL) |
- F(ARCH_CAPABILITIES);
+ F(SPEC_CTRL_SSBD) | F(ARCH_CAPABILITIES);
/* all calls to cpuid_count() should be made on the same cpu */
get_cpu();
@@ -495,6 +495,11 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
entry->ecx &= ~F(PKU);
entry->edx &= kvm_cpuid_7_0_edx_x86_features;
cpuid_mask(&entry->edx, CPUID_7_EDX);
+ /*
+ * We emulate ARCH_CAPABILITIES in software even
+ * if the host doesn't support it.
+ */
+ entry->edx |= F(ARCH_CAPABILITIES);
} else {
entry->ebx = 0;
entry->ecx = 0;
@@ -647,13 +652,20 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
g_phys_as = phys_as;
entry->eax = g_phys_as | (virt_as << 8);
entry->edx = 0;
- /* IBRS and IBPB aren't necessarily present in hardware cpuid */
- if (boot_cpu_has(X86_FEATURE_IBPB))
- entry->ebx |= F(IBPB);
- if (boot_cpu_has(X86_FEATURE_IBRS))
- entry->ebx |= F(IBRS);
+ /*
+ * IBRS, IBPB and VIRT_SSBD aren't necessarily present in
+ * hardware cpuid
+ */
+ if (boot_cpu_has(X86_FEATURE_AMD_IBPB))
+ entry->ebx |= F(AMD_IBPB);
+ if (boot_cpu_has(X86_FEATURE_AMD_IBRS))
+ entry->ebx |= F(AMD_IBRS);
+ if (boot_cpu_has(X86_FEATURE_VIRT_SSBD))
+ entry->ebx |= F(VIRT_SSBD);
entry->ebx &= kvm_cpuid_8000_0008_ebx_x86_features;
cpuid_mask(&entry->ebx, CPUID_8000_0008_EBX);
+ if (boot_cpu_has(X86_FEATURE_LS_CFG_SSBD))
+ entry->ebx |= F(VIRT_SSBD);
break;
}
case 0x80000019:
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 98618e397342..46ff64da44ca 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -1260,12 +1260,16 @@ static void kvm_hv_hypercall_set_result(struct kvm_vcpu *vcpu, u64 result)
}
}
-static int kvm_hv_hypercall_complete_userspace(struct kvm_vcpu *vcpu)
+static int kvm_hv_hypercall_complete(struct kvm_vcpu *vcpu, u64 result)
{
- struct kvm_run *run = vcpu->run;
+ kvm_hv_hypercall_set_result(vcpu, result);
+ ++vcpu->stat.hypercalls;
+ return kvm_skip_emulated_instruction(vcpu);
+}
- kvm_hv_hypercall_set_result(vcpu, run->hyperv.u.hcall.result);
- return 1;
+static int kvm_hv_hypercall_complete_userspace(struct kvm_vcpu *vcpu)
+{
+ return kvm_hv_hypercall_complete(vcpu, vcpu->run->hyperv.u.hcall.result);
}
static u16 kvm_hvcall_signal_event(struct kvm_vcpu *vcpu, bool fast, u64 param)
@@ -1296,8 +1300,10 @@ static u16 kvm_hvcall_signal_event(struct kvm_vcpu *vcpu, bool fast, u64 param)
if (param & ~KVM_HYPERV_CONN_ID_MASK)
return HV_STATUS_INVALID_HYPERCALL_INPUT;
- /* conn_to_evt is protected by vcpu->kvm->srcu */
+ /* the eventfd is protected by vcpu->kvm->srcu, but conn_to_evt isn't */
+ rcu_read_lock();
eventfd = idr_find(&vcpu->kvm->arch.hyperv.conn_to_evt, param);
+ rcu_read_unlock();
if (!eventfd)
return HV_STATUS_INVALID_PORT_ID;
@@ -1348,7 +1354,7 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
/* Hypercall continuation is not supported yet */
if (rep_cnt || rep_idx) {
ret = HV_STATUS_INVALID_HYPERCALL_CODE;
- goto set_result;
+ goto out;
}
switch (code) {
@@ -1379,9 +1385,8 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
break;
}
-set_result:
- kvm_hv_hypercall_set_result(vcpu, ret);
- return 1;
+out:
+ return kvm_hv_hypercall_complete(vcpu, ret);
}
void kvm_hv_init_vm(struct kvm *kvm)
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 70dcb5548022..3773c4625114 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1463,23 +1463,6 @@ static void start_sw_tscdeadline(struct kvm_lapic *apic)
local_irq_restore(flags);
}
-static void start_sw_period(struct kvm_lapic *apic)
-{
- if (!apic->lapic_timer.period)
- return;
-
- if (apic_lvtt_oneshot(apic) &&
- ktime_after(ktime_get(),
- apic->lapic_timer.target_expiration)) {
- apic_timer_expired(apic);
- return;
- }
-
- hrtimer_start(&apic->lapic_timer.timer,
- apic->lapic_timer.target_expiration,
- HRTIMER_MODE_ABS_PINNED);
-}
-
static void update_target_expiration(struct kvm_lapic *apic, uint32_t old_divisor)
{
ktime_t now, remaining;
@@ -1539,11 +1522,43 @@ static bool set_target_expiration(struct kvm_lapic *apic)
static void advance_periodic_target_expiration(struct kvm_lapic *apic)
{
- apic->lapic_timer.tscdeadline +=
- nsec_to_cycles(apic->vcpu, apic->lapic_timer.period);
+ ktime_t now = ktime_get();
+ u64 tscl = rdtsc();
+ ktime_t delta;
+
+ /*
+ * Synchronize both deadlines to the same time source or
+ * differences in the periods (caused by differences in the
+ * underlying clocks or numerical approximation errors) will
+ * cause the two to drift apart over time as the errors
+ * accumulate.
+ */
apic->lapic_timer.target_expiration =
ktime_add_ns(apic->lapic_timer.target_expiration,
apic->lapic_timer.period);
+ delta = ktime_sub(apic->lapic_timer.target_expiration, now);
+ apic->lapic_timer.tscdeadline = kvm_read_l1_tsc(apic->vcpu, tscl) +
+ nsec_to_cycles(apic->vcpu, delta);
+}
+
+static void start_sw_period(struct kvm_lapic *apic)
+{
+ if (!apic->lapic_timer.period)
+ return;
+
+ if (ktime_after(ktime_get(),
+ apic->lapic_timer.target_expiration)) {
+ apic_timer_expired(apic);
+
+ if (apic_lvtt_oneshot(apic))
+ return;
+
+ advance_periodic_target_expiration(apic);
+ }
+
+ hrtimer_start(&apic->lapic_timer.timer,
+ apic->lapic_timer.target_expiration,
+ HRTIMER_MODE_ABS_PINNED);
}
bool kvm_lapic_hv_timer_in_use(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 8494dbae41b9..d634f0332c0f 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3007,6 +3007,7 @@ static void kvm_send_hwpoison_signal(unsigned long address, struct task_struct *
{
siginfo_t info;
+ clear_siginfo(&info);
info.si_signo = SIGBUS;
info.si_errno = 0;
info.si_code = BUS_MCEERR_AR;
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 1fc05e428aba..26110c202b19 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -49,7 +49,7 @@
#include <asm/debugreg.h>
#include <asm/kvm_para.h>
#include <asm/irq_remapping.h>
-#include <asm/nospec-branch.h>
+#include <asm/spec-ctrl.h>
#include <asm/virtext.h>
#include "trace.h"
@@ -213,6 +213,12 @@ struct vcpu_svm {
} host;
u64 spec_ctrl;
+ /*
+ * Contains guest-controlled bits of VIRT_SPEC_CTRL, which will be
+ * translated into the appropriate L2_CFG bits on the host to
+ * perform speculative control.
+ */
+ u64 virt_spec_ctrl;
u32 *msrpm;
@@ -2060,6 +2066,7 @@ static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
vcpu->arch.microcode_version = 0x01000065;
svm->spec_ctrl = 0;
+ svm->virt_spec_ctrl = 0;
if (!init_event) {
svm->vcpu.arch.apic_base = APIC_DEFAULT_PHYS_BASE |
@@ -4108,11 +4115,18 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
break;
case MSR_IA32_SPEC_CTRL:
if (!msr_info->host_initiated &&
- !guest_cpuid_has(vcpu, X86_FEATURE_IBRS))
+ !guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBRS))
return 1;
msr_info->data = svm->spec_ctrl;
break;
+ case MSR_AMD64_VIRT_SPEC_CTRL:
+ if (!msr_info->host_initiated &&
+ !guest_cpuid_has(vcpu, X86_FEATURE_VIRT_SSBD))
+ return 1;
+
+ msr_info->data = svm->virt_spec_ctrl;
+ break;
case MSR_F15H_IC_CFG: {
int family, model;
@@ -4203,7 +4217,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
break;
case MSR_IA32_SPEC_CTRL:
if (!msr->host_initiated &&
- !guest_cpuid_has(vcpu, X86_FEATURE_IBRS))
+ !guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBRS))
return 1;
/* The STIBP bit doesn't fault even if it's not advertised */
@@ -4230,7 +4244,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
break;
case MSR_IA32_PRED_CMD:
if (!msr->host_initiated &&
- !guest_cpuid_has(vcpu, X86_FEATURE_IBPB))
+ !guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBPB))
return 1;
if (data & ~PRED_CMD_IBPB)
@@ -4244,6 +4258,16 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
break;
set_msr_interception(svm->msrpm, MSR_IA32_PRED_CMD, 0, 1);
break;
+ case MSR_AMD64_VIRT_SPEC_CTRL:
+ if (!msr->host_initiated &&
+ !guest_cpuid_has(vcpu, X86_FEATURE_VIRT_SSBD))
+ return 1;
+
+ if (data & ~SPEC_CTRL_SSBD)
+ return 1;
+
+ svm->virt_spec_ctrl = data;
+ break;
case MSR_STAR:
svm->vmcb->save.star = data;
break;
@@ -5557,8 +5581,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
* is no need to worry about the conditional branch over the wrmsr
* being speculatively taken.
*/
- if (svm->spec_ctrl)
- native_wrmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl);
+ x86_spec_ctrl_set_guest(svm->spec_ctrl, svm->virt_spec_ctrl);
asm volatile (
"push %%" _ASM_BP "; \n\t"
@@ -5652,6 +5675,18 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
#endif
);
+ /* Eliminate branch target predictions from guest mode */
+ vmexit_fill_RSB();
+
+#ifdef CONFIG_X86_64
+ wrmsrl(MSR_GS_BASE, svm->host.gs_base);
+#else
+ loadsegment(fs, svm->host.fs);
+#ifndef CONFIG_X86_32_LAZY_GS
+ loadsegment(gs, svm->host.gs);
+#endif
+#endif
+
/*
* We do not use IBRS in the kernel. If this vCPU has used the
* SPEC_CTRL MSR it may have left it on; save the value and
@@ -5670,20 +5705,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)))
svm->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL);
- if (svm->spec_ctrl)
- native_wrmsrl(MSR_IA32_SPEC_CTRL, 0);
-
- /* Eliminate branch target predictions from guest mode */
- vmexit_fill_RSB();
-
-#ifdef CONFIG_X86_64
- wrmsrl(MSR_GS_BASE, svm->host.gs_base);
-#else
- loadsegment(fs, svm->host.fs);
-#ifndef CONFIG_X86_32_LAZY_GS
- loadsegment(gs, svm->host.gs);
-#endif
-#endif
+ x86_spec_ctrl_restore_host(svm->spec_ctrl, svm->virt_spec_ctrl);
reload_tss(vcpu);
@@ -5786,7 +5808,7 @@ static bool svm_cpu_has_accelerated_tpr(void)
return false;
}
-static bool svm_has_high_real_mode_segbase(void)
+static bool svm_has_emulated_msr(int index)
{
return true;
}
@@ -7012,7 +7034,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
.hardware_enable = svm_hardware_enable,
.hardware_disable = svm_hardware_disable,
.cpu_has_accelerated_tpr = svm_cpu_has_accelerated_tpr,
- .cpu_has_high_real_mode_segbase = svm_has_high_real_mode_segbase,
+ .has_emulated_msr = svm_has_emulated_msr,
.vcpu_create = svm_create_vcpu,
.vcpu_free = svm_free_vcpu,
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index c7668806163f..40aa29204baf 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -51,7 +51,7 @@
#include <asm/apic.h>
#include <asm/irq_remapping.h>
#include <asm/mmu_context.h>
-#include <asm/nospec-branch.h>
+#include <asm/spec-ctrl.h>
#include <asm/mshyperv.h>
#include "trace.h"
@@ -1494,6 +1494,12 @@ static inline bool cpu_has_vmx_vmfunc(void)
SECONDARY_EXEC_ENABLE_VMFUNC;
}
+static bool vmx_umip_emulated(void)
+{
+ return vmcs_config.cpu_based_2nd_exec_ctrl &
+ SECONDARY_EXEC_DESC;
+}
+
static inline bool report_flexpriority(void)
{
return flexpriority_enabled;
@@ -3523,7 +3529,6 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
return kvm_get_msr_common(vcpu, msr_info);
case MSR_IA32_SPEC_CTRL:
if (!msr_info->host_initiated &&
- !guest_cpuid_has(vcpu, X86_FEATURE_IBRS) &&
!guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL))
return 1;
@@ -3642,12 +3647,11 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
break;
case MSR_IA32_SPEC_CTRL:
if (!msr_info->host_initiated &&
- !guest_cpuid_has(vcpu, X86_FEATURE_IBRS) &&
!guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL))
return 1;
/* The STIBP bit doesn't fault even if it's not advertised */
- if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP))
+ if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP | SPEC_CTRL_SSBD))
return 1;
vmx->spec_ctrl = data;
@@ -3673,7 +3677,6 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
break;
case MSR_IA32_PRED_CMD:
if (!msr_info->host_initiated &&
- !guest_cpuid_has(vcpu, X86_FEATURE_IBPB) &&
!guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL))
return 1;
@@ -4761,14 +4764,16 @@ static int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
else
hw_cr4 |= KVM_PMODE_VM_CR4_ALWAYS_ON;
- if ((cr4 & X86_CR4_UMIP) && !boot_cpu_has(X86_FEATURE_UMIP)) {
- vmcs_set_bits(SECONDARY_VM_EXEC_CONTROL,
- SECONDARY_EXEC_DESC);
- hw_cr4 &= ~X86_CR4_UMIP;
- } else if (!is_guest_mode(vcpu) ||
- !nested_cpu_has2(get_vmcs12(vcpu), SECONDARY_EXEC_DESC))
- vmcs_clear_bits(SECONDARY_VM_EXEC_CONTROL,
+ if (!boot_cpu_has(X86_FEATURE_UMIP) && vmx_umip_emulated()) {
+ if (cr4 & X86_CR4_UMIP) {
+ vmcs_set_bits(SECONDARY_VM_EXEC_CONTROL,
SECONDARY_EXEC_DESC);
+ hw_cr4 &= ~X86_CR4_UMIP;
+ } else if (!is_guest_mode(vcpu) ||
+ !nested_cpu_has2(get_vmcs12(vcpu), SECONDARY_EXEC_DESC))
+ vmcs_clear_bits(SECONDARY_VM_EXEC_CONTROL,
+ SECONDARY_EXEC_DESC);
+ }
if (cr4 & X86_CR4_VMXE) {
/*
@@ -9480,9 +9485,21 @@ static void vmx_handle_external_intr(struct kvm_vcpu *vcpu)
}
STACK_FRAME_NON_STANDARD(vmx_handle_external_intr);
-static bool vmx_has_high_real_mode_segbase(void)
+static bool vmx_has_emulated_msr(int index)
{
- return enable_unrestricted_guest || emulate_invalid_guest_state;
+ switch (index) {
+ case MSR_IA32_SMBASE:
+ /*
+ * We cannot do SMM unless we can run the guest in big
+ * real mode.
+ */
+ return enable_unrestricted_guest || emulate_invalid_guest_state;
+ case MSR_AMD64_VIRT_SPEC_CTRL:
+ /* This is AMD only. */
+ return false;
+ default:
+ return true;
+ }
}
static bool vmx_mpx_supported(void)
@@ -9497,12 +9514,6 @@ static bool vmx_xsaves_supported(void)
SECONDARY_EXEC_XSAVES;
}
-static bool vmx_umip_emulated(void)
-{
- return vmcs_config.cpu_based_2nd_exec_ctrl &
- SECONDARY_EXEC_DESC;
-}
-
static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx)
{
u32 exit_intr_info;
@@ -9720,8 +9731,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
* is no need to worry about the conditional branch over the wrmsr
* being speculatively taken.
*/
- if (vmx->spec_ctrl)
- native_wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl);
+ x86_spec_ctrl_set_guest(vmx->spec_ctrl, 0);
vmx->__launched = vmx->loaded_vmcs->launched;
@@ -9869,8 +9879,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)))
vmx->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL);
- if (vmx->spec_ctrl)
- native_wrmsrl(MSR_IA32_SPEC_CTRL, 0);
+ x86_spec_ctrl_restore_host(vmx->spec_ctrl, 0);
/* Eliminate branch target predictions from guest mode */
vmexit_fill_RSB();
@@ -12630,7 +12639,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
.hardware_enable = hardware_enable,
.hardware_disable = hardware_disable,
.cpu_has_accelerated_tpr = report_flexpriority,
- .cpu_has_high_real_mode_segbase = vmx_has_high_real_mode_segbase,
+ .has_emulated_msr = vmx_has_emulated_msr,
.vm_init = vmx_vm_init,
.vm_alloc = vmx_vm_alloc,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 51ecd381793b..71e7cda6d014 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -114,7 +114,7 @@ module_param(ignore_msrs, bool, S_IRUGO | S_IWUSR);
static bool __read_mostly report_ignored_msrs = true;
module_param(report_ignored_msrs, bool, S_IRUGO | S_IWUSR);
-unsigned int min_timer_period_us = 500;
+unsigned int min_timer_period_us = 200;
module_param(min_timer_period_us, uint, S_IRUGO | S_IWUSR);
static bool __read_mostly kvmclock_periodic_sync = true;
@@ -843,7 +843,10 @@ EXPORT_SYMBOL_GPL(kvm_set_cr4);
int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
{
#ifdef CONFIG_X86_64
- cr3 &= ~CR3_PCID_INVD;
+ bool pcid_enabled = kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE);
+
+ if (pcid_enabled)
+ cr3 &= ~CR3_PCID_INVD;
#endif
if (cr3 == kvm_read_cr3(vcpu) && !pdptrs_changed(vcpu)) {
@@ -1058,6 +1061,7 @@ static u32 emulated_msrs[] = {
MSR_SMI_COUNT,
MSR_PLATFORM_INFO,
MSR_MISC_FEATURES_ENABLES,
+ MSR_AMD64_VIRT_SPEC_CTRL,
};
static unsigned num_emulated_msrs;
@@ -2903,7 +2907,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
* fringe case that is not enabled except via specific settings
* of the module parameters.
*/
- r = kvm_x86_ops->cpu_has_high_real_mode_segbase();
+ r = kvm_x86_ops->has_emulated_msr(MSR_IA32_SMBASE);
break;
case KVM_CAP_VAPIC:
r = !kvm_x86_ops->cpu_has_accelerated_tpr();
@@ -4603,14 +4607,8 @@ static void kvm_init_msr_list(void)
num_msrs_to_save = j;
for (i = j = 0; i < ARRAY_SIZE(emulated_msrs); i++) {
- switch (emulated_msrs[i]) {
- case MSR_IA32_SMBASE:
- if (!kvm_x86_ops->cpu_has_high_real_mode_segbase())
- continue;
- break;
- default:
- break;
- }
+ if (!kvm_x86_ops->has_emulated_msr(emulated_msrs[i]))
+ continue;
if (j < i)
emulated_msrs[j] = emulated_msrs[i];
@@ -6671,9 +6669,7 @@ void kvm_vcpu_deactivate_apicv(struct kvm_vcpu *vcpu)
int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
{
unsigned long nr, a0, a1, a2, a3, ret;
- int op_64_bit, r;
-
- r = kvm_skip_emulated_instruction(vcpu);
+ int op_64_bit;
if (kvm_hv_hypercall_enabled(vcpu->kvm))
return kvm_hv_hypercall(vcpu);
@@ -6721,8 +6717,9 @@ out:
if (!op_64_bit)
ret = (u32)ret;
kvm_register_write(vcpu, VCPU_REGS_RAX, ret);
+
++vcpu->stat.hypercalls;
- return r;
+ return kvm_skip_emulated_instruction(vcpu);
}
EXPORT_SYMBOL_GPL(kvm_emulate_hypercall);
@@ -7979,6 +7976,7 @@ static int __set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
{
struct msr_data apic_base_msr;
int mmu_reset_needed = 0;
+ int cpuid_update_needed = 0;
int pending_vec, max_bits, idx;
struct desc_ptr dt;
int ret = -EINVAL;
@@ -8017,8 +8015,10 @@ static int __set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
vcpu->arch.cr0 = sregs->cr0;
mmu_reset_needed |= kvm_read_cr4(vcpu) != sregs->cr4;
+ cpuid_update_needed |= ((kvm_read_cr4(vcpu) ^ sregs->cr4) &
+ (X86_CR4_OSXSAVE | X86_CR4_PKE));
kvm_x86_ops->set_cr4(vcpu, sregs->cr4);
- if (sregs->cr4 & (X86_CR4_OSXSAVE | X86_CR4_PKE))
+ if (cpuid_update_needed)
kvm_update_cpuid(vcpu);
idx = srcu_read_lock(&vcpu->kvm->srcu);
diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S
index 9a53a06e5a3e..c3b527a9f95d 100644
--- a/arch/x86/lib/memcpy_64.S
+++ b/arch/x86/lib/memcpy_64.S
@@ -184,11 +184,11 @@ ENDPROC(memcpy_orig)
#ifndef CONFIG_UML
/*
- * memcpy_mcsafe_unrolled - memory copy with machine check exception handling
+ * __memcpy_mcsafe - memory copy with machine check exception handling
* Note that we only catch machine checks when reading the source addresses.
* Writes to target are posted and don't generate machine checks.
*/
-ENTRY(memcpy_mcsafe_unrolled)
+ENTRY(__memcpy_mcsafe)
cmpl $8, %edx
/* Less than 8 bytes? Go to byte copy loop */
jb .L_no_whole_words
@@ -204,58 +204,29 @@ ENTRY(memcpy_mcsafe_unrolled)
subl $8, %ecx
negl %ecx
subl %ecx, %edx
-.L_copy_leading_bytes:
+.L_read_leading_bytes:
movb (%rsi), %al
+.L_write_leading_bytes:
movb %al, (%rdi)
incq %rsi
incq %rdi
decl %ecx
- jnz .L_copy_leading_bytes
+ jnz .L_read_leading_bytes
.L_8byte_aligned:
- /* Figure out how many whole cache lines (64-bytes) to copy */
- movl %edx, %ecx
- andl $63, %edx
- shrl $6, %ecx
- jz .L_no_whole_cache_lines
-
- /* Loop copying whole cache lines */
-.L_cache_w0: movq (%rsi), %r8
-.L_cache_w1: movq 1*8(%rsi), %r9
-.L_cache_w2: movq 2*8(%rsi), %r10
-.L_cache_w3: movq 3*8(%rsi), %r11
- movq %r8, (%rdi)
- movq %r9, 1*8(%rdi)
- movq %r10, 2*8(%rdi)
- movq %r11, 3*8(%rdi)
-.L_cache_w4: movq 4*8(%rsi), %r8
-.L_cache_w5: movq 5*8(%rsi), %r9
-.L_cache_w6: movq 6*8(%rsi), %r10
-.L_cache_w7: movq 7*8(%rsi), %r11
- movq %r8, 4*8(%rdi)
- movq %r9, 5*8(%rdi)
- movq %r10, 6*8(%rdi)
- movq %r11, 7*8(%rdi)
- leaq 64(%rsi), %rsi
- leaq 64(%rdi), %rdi
- decl %ecx
- jnz .L_cache_w0
-
- /* Are there any trailing 8-byte words? */
-.L_no_whole_cache_lines:
movl %edx, %ecx
andl $7, %edx
shrl $3, %ecx
jz .L_no_whole_words
- /* Copy trailing words */
-.L_copy_trailing_words:
+.L_read_words:
movq (%rsi), %r8
- mov %r8, (%rdi)
- leaq 8(%rsi), %rsi
- leaq 8(%rdi), %rdi
+.L_write_words:
+ movq %r8, (%rdi)
+ addq $8, %rsi
+ addq $8, %rdi
decl %ecx
- jnz .L_copy_trailing_words
+ jnz .L_read_words
/* Any trailing bytes? */
.L_no_whole_words:
@@ -264,38 +235,53 @@ ENTRY(memcpy_mcsafe_unrolled)
/* Copy trailing bytes */
movl %edx, %ecx
-.L_copy_trailing_bytes:
+.L_read_trailing_bytes:
movb (%rsi), %al
+.L_write_trailing_bytes:
movb %al, (%rdi)
incq %rsi
incq %rdi
decl %ecx
- jnz .L_copy_trailing_bytes
+ jnz .L_read_trailing_bytes
/* Copy successful. Return zero */
.L_done_memcpy_trap:
xorq %rax, %rax
ret
-ENDPROC(memcpy_mcsafe_unrolled)
-EXPORT_SYMBOL_GPL(memcpy_mcsafe_unrolled)
+ENDPROC(__memcpy_mcsafe)
+EXPORT_SYMBOL_GPL(__memcpy_mcsafe)
.section .fixup, "ax"
- /* Return -EFAULT for any failure */
-.L_memcpy_mcsafe_fail:
- mov $-EFAULT, %rax
+ /*
+ * Return number of bytes not copied for any failure. Note that
+ * there is no "tail" handling since the source buffer is 8-byte
+ * aligned and poison is cacheline aligned.
+ */
+.E_read_words:
+ shll $3, %ecx
+.E_leading_bytes:
+ addl %edx, %ecx
+.E_trailing_bytes:
+ mov %ecx, %eax
ret
+ /*
+ * For write fault handling, given the destination is unaligned,
+ * we handle faults on multi-byte writes with a byte-by-byte
+ * copy up to the write-protected page.
+ */
+.E_write_words:
+ shll $3, %ecx
+ addl %edx, %ecx
+ movl %ecx, %edx
+ jmp mcsafe_handle_tail
+
.previous
- _ASM_EXTABLE_FAULT(.L_copy_leading_bytes, .L_memcpy_mcsafe_fail)
- _ASM_EXTABLE_FAULT(.L_cache_w0, .L_memcpy_mcsafe_fail)
- _ASM_EXTABLE_FAULT(.L_cache_w1, .L_memcpy_mcsafe_fail)
- _ASM_EXTABLE_FAULT(.L_cache_w2, .L_memcpy_mcsafe_fail)
- _ASM_EXTABLE_FAULT(.L_cache_w3, .L_memcpy_mcsafe_fail)
- _ASM_EXTABLE_FAULT(.L_cache_w4, .L_memcpy_mcsafe_fail)
- _ASM_EXTABLE_FAULT(.L_cache_w5, .L_memcpy_mcsafe_fail)
- _ASM_EXTABLE_FAULT(.L_cache_w6, .L_memcpy_mcsafe_fail)
- _ASM_EXTABLE_FAULT(.L_cache_w7, .L_memcpy_mcsafe_fail)
- _ASM_EXTABLE_FAULT(.L_copy_trailing_words, .L_memcpy_mcsafe_fail)
- _ASM_EXTABLE_FAULT(.L_copy_trailing_bytes, .L_memcpy_mcsafe_fail)
+ _ASM_EXTABLE_FAULT(.L_read_leading_bytes, .E_leading_bytes)
+ _ASM_EXTABLE_FAULT(.L_read_words, .E_read_words)
+ _ASM_EXTABLE_FAULT(.L_read_trailing_bytes, .E_trailing_bytes)
+ _ASM_EXTABLE(.L_write_leading_bytes, .E_leading_bytes)
+ _ASM_EXTABLE(.L_write_words, .E_write_words)
+ _ASM_EXTABLE(.L_write_trailing_bytes, .E_trailing_bytes)
#endif
diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c
index 75d3776123cc..9c5606d88f61 100644
--- a/arch/x86/lib/usercopy_64.c
+++ b/arch/x86/lib/usercopy_64.c
@@ -23,13 +23,13 @@ unsigned long __clear_user(void __user *addr, unsigned long size)
asm volatile(
" testq %[size8],%[size8]\n"
" jz 4f\n"
- "0: movq %[zero],(%[dst])\n"
- " addq %[eight],%[dst]\n"
+ "0: movq $0,(%[dst])\n"
+ " addq $8,%[dst]\n"
" decl %%ecx ; jnz 0b\n"
"4: movq %[size1],%%rcx\n"
" testl %%ecx,%%ecx\n"
" jz 2f\n"
- "1: movb %b[zero],(%[dst])\n"
+ "1: movb $0,(%[dst])\n"
" incq %[dst]\n"
" decl %%ecx ; jnz 1b\n"
"2:\n"
@@ -40,8 +40,7 @@ unsigned long __clear_user(void __user *addr, unsigned long size)
_ASM_EXTABLE(0b,3b)
_ASM_EXTABLE(1b,2b)
: [size8] "=&c"(size), [dst] "=&D" (__d0)
- : [size1] "r"(size & 7), "[size8]" (size / 8), "[dst]"(addr),
- [zero] "r" (0UL), [eight] "r" (8UL));
+ : [size1] "r"(size & 7), "[size8]" (size / 8), "[dst]"(addr));
clac();
return size;
}
@@ -75,6 +74,27 @@ copy_user_handle_tail(char *to, char *from, unsigned len)
return len;
}
+/*
+ * Similar to copy_user_handle_tail, probe for the write fault point,
+ * but reuse __memcpy_mcsafe in case a new read error is encountered.
+ * clac() is handled in _copy_to_iter_mcsafe().
+ */
+__visible unsigned long
+mcsafe_handle_tail(char *to, char *from, unsigned len)
+{
+ for (; len; --len, to++, from++) {
+ /*
+ * Call the assembly routine back directly since
+ * memcpy_mcsafe() may silently fallback to memcpy.
+ */
+ unsigned long rem = __memcpy_mcsafe(to, from, 1);
+
+ if (rem)
+ break;
+ }
+ return len;
+}
+
#ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE
/**
* clean_cache_range - write back a cache range with CLWB
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
index cc7ff5957194..2f3c9196b834 100644
--- a/arch/x86/mm/dump_pagetables.c
+++ b/arch/x86/mm/dump_pagetables.c
@@ -360,7 +360,7 @@ static inline bool kasan_page_table(struct seq_file *m, struct pg_state *st,
void *pt)
{
if (__pa(pt) == __pa(kasan_zero_pmd) ||
- (pgtable_l5_enabled && __pa(pt) == __pa(kasan_zero_p4d)) ||
+ (pgtable_l5_enabled() && __pa(pt) == __pa(kasan_zero_p4d)) ||
__pa(pt) == __pa(kasan_zero_pud)) {
pgprotval_t prot = pte_flags(kasan_zero_pte[0]);
note_page(m, st, __pgprot(prot), 0, 5);
@@ -476,8 +476,8 @@ static void walk_p4d_level(struct seq_file *m, struct pg_state *st, pgd_t addr,
}
}
-#define pgd_large(a) (pgtable_l5_enabled ? pgd_large(a) : p4d_large(__p4d(pgd_val(a))))
-#define pgd_none(a) (pgtable_l5_enabled ? pgd_none(a) : p4d_none(__p4d(pgd_val(a))))
+#define pgd_large(a) (pgtable_l5_enabled() ? pgd_large(a) : p4d_large(__p4d(pgd_val(a))))
+#define pgd_none(a) (pgtable_l5_enabled() ? pgd_none(a) : p4d_none(__p4d(pgd_val(a))))
static inline bool is_hypervisor_range(int idx)
{
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 73bd8c95ac71..9a84a0d08727 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -209,6 +209,7 @@ force_sig_info_fault(int si_signo, int si_code, unsigned long address,
unsigned lsb = 0;
siginfo_t info;
+ clear_siginfo(&info);
info.si_signo = si_signo;
info.si_errno = 0;
info.si_code = si_code;
@@ -439,7 +440,7 @@ static noinline int vmalloc_fault(unsigned long address)
if (pgd_none(*pgd_k))
return -1;
- if (pgtable_l5_enabled) {
+ if (pgtable_l5_enabled()) {
if (pgd_none(*pgd)) {
set_pgd(pgd, *pgd_k);
arch_flush_lazy_mmu_mode();
@@ -454,7 +455,7 @@ static noinline int vmalloc_fault(unsigned long address)
if (p4d_none(*p4d_k))
return -1;
- if (p4d_none(*p4d) && !pgtable_l5_enabled) {
+ if (p4d_none(*p4d) && !pgtable_l5_enabled()) {
set_p4d(p4d, *p4d_k);
arch_flush_lazy_mmu_mode();
} else {
@@ -828,6 +829,8 @@ static inline void
show_signal_msg(struct pt_regs *regs, unsigned long error_code,
unsigned long address, struct task_struct *tsk)
{
+ const char *loglvl = task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG;
+
if (!unhandled_signal(tsk, SIGSEGV))
return;
@@ -835,13 +838,14 @@ show_signal_msg(struct pt_regs *regs, unsigned long error_code,
return;
printk("%s%s[%d]: segfault at %lx ip %px sp %px error %lx",
- task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
- tsk->comm, task_pid_nr(tsk), address,
+ loglvl, tsk->comm, task_pid_nr(tsk), address,
(void *)regs->ip, (void *)regs->sp, error_code);
print_vma_addr(KERN_CONT " in ", regs->ip);
printk(KERN_CONT "\n");
+
+ show_opcodes((u8 *)regs->ip, loglvl);
}
static void
diff --git a/arch/x86/mm/ident_map.c b/arch/x86/mm/ident_map.c
index a2f0c7e20fb0..fe7a12599d8e 100644
--- a/arch/x86/mm/ident_map.c
+++ b/arch/x86/mm/ident_map.c
@@ -123,7 +123,7 @@ int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page,
result = ident_p4d_init(info, p4d, addr, next);
if (result)
return result;
- if (pgtable_l5_enabled) {
+ if (pgtable_l5_enabled()) {
set_pgd(pgd, __pgd(__pa(p4d) | info->kernpg_flag));
} else {
/*
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 0a400606dea0..17383f9677fa 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -180,7 +180,7 @@ static void sync_global_pgds_l4(unsigned long start, unsigned long end)
*/
void sync_global_pgds(unsigned long start, unsigned long end)
{
- if (pgtable_l5_enabled)
+ if (pgtable_l5_enabled())
sync_global_pgds_l5(start, end);
else
sync_global_pgds_l4(start, end);
@@ -643,7 +643,7 @@ phys_p4d_init(p4d_t *p4d_page, unsigned long paddr, unsigned long paddr_end,
unsigned long vaddr = (unsigned long)__va(paddr);
int i = p4d_index(vaddr);
- if (!pgtable_l5_enabled)
+ if (!pgtable_l5_enabled())
return phys_pud_init((pud_t *) p4d_page, paddr, paddr_end, page_size_mask);
for (; i < PTRS_PER_P4D; i++, paddr = paddr_next) {
@@ -723,7 +723,7 @@ kernel_physical_mapping_init(unsigned long paddr_start,
page_size_mask);
spin_lock(&init_mm.page_table_lock);
- if (pgtable_l5_enabled)
+ if (pgtable_l5_enabled())
pgd_populate(&init_mm, pgd, p4d);
else
p4d_populate(&init_mm, p4d_offset(pgd, vaddr), (pud_t *) p4d);
@@ -1100,7 +1100,7 @@ remove_p4d_table(p4d_t *p4d_start, unsigned long addr, unsigned long end,
* 5-level case we should free them. This code will have to change
* to adapt for boot-time switching between 4 and 5 level page tables.
*/
- if (pgtable_l5_enabled)
+ if (pgtable_l5_enabled())
free_pud_table(pud_base, p4d);
}
diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c
index 980dbebd0ca7..e3e77527f8df 100644
--- a/arch/x86/mm/kasan_init_64.c
+++ b/arch/x86/mm/kasan_init_64.c
@@ -2,10 +2,8 @@
#define DISABLE_BRANCH_PROFILING
#define pr_fmt(fmt) "kasan: " fmt
-#ifdef CONFIG_X86_5LEVEL
-/* Too early to use cpu_feature_enabled() */
-#define pgtable_l5_enabled __pgtable_l5_enabled
-#endif
+/* cpu_feature_enabled() cannot be used this early */
+#define USE_EARLY_PGTABLE_L5
#include <linux/bootmem.h>
#include <linux/kasan.h>
@@ -182,7 +180,7 @@ static void __init clear_pgds(unsigned long start,
* With folded p4d, pgd_clear() is nop, use p4d_clear()
* instead.
*/
- if (pgtable_l5_enabled)
+ if (pgtable_l5_enabled())
pgd_clear(pgd);
else
p4d_clear(p4d_offset(pgd, start));
@@ -197,7 +195,7 @@ static inline p4d_t *early_p4d_offset(pgd_t *pgd, unsigned long addr)
{
unsigned long p4d;
- if (!pgtable_l5_enabled)
+ if (!pgtable_l5_enabled())
return (p4d_t *)pgd;
p4d = __pa_nodebug(pgd_val(*pgd)) & PTE_PFN_MASK;
@@ -284,7 +282,7 @@ void __init kasan_early_init(void)
for (i = 0; i < PTRS_PER_PUD; i++)
kasan_zero_pud[i] = __pud(pud_val);
- for (i = 0; pgtable_l5_enabled && i < PTRS_PER_P4D; i++)
+ for (i = 0; pgtable_l5_enabled() && i < PTRS_PER_P4D; i++)
kasan_zero_p4d[i] = __p4d(p4d_val);
kasan_map_early_shadow(early_top_pgt);
@@ -315,7 +313,7 @@ void __init kasan_init(void)
* bunch of things like kernel code, modules, EFI mapping, etc.
* We need to take extra steps to not overwrite them.
*/
- if (pgtable_l5_enabled) {
+ if (pgtable_l5_enabled()) {
void *ptr;
ptr = (void *)pgd_page_vaddr(*pgd_offset_k(KASAN_SHADOW_END));
diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c
index 615cc03ced84..61db77b0eda9 100644
--- a/arch/x86/mm/kaslr.c
+++ b/arch/x86/mm/kaslr.c
@@ -78,7 +78,7 @@ void __init kernel_randomize_memory(void)
struct rnd_state rand_state;
unsigned long remain_entropy;
- vaddr_start = pgtable_l5_enabled ? __PAGE_OFFSET_BASE_L5 : __PAGE_OFFSET_BASE_L4;
+ vaddr_start = pgtable_l5_enabled() ? __PAGE_OFFSET_BASE_L5 : __PAGE_OFFSET_BASE_L4;
vaddr = vaddr_start;
/*
@@ -124,7 +124,7 @@ void __init kernel_randomize_memory(void)
*/
entropy = remain_entropy / (ARRAY_SIZE(kaslr_regions) - i);
prandom_bytes_state(&rand_state, &rand, sizeof(rand));
- if (pgtable_l5_enabled)
+ if (pgtable_l5_enabled())
entropy = (rand % (entropy + 1)) & P4D_MASK;
else
entropy = (rand % (entropy + 1)) & PUD_MASK;
@@ -136,7 +136,7 @@ void __init kernel_randomize_memory(void)
* randomization alignment.
*/
vaddr += get_padding(&kaslr_regions[i]);
- if (pgtable_l5_enabled)
+ if (pgtable_l5_enabled())
vaddr = round_up(vaddr + 1, P4D_SIZE);
else
vaddr = round_up(vaddr + 1, PUD_SIZE);
@@ -212,7 +212,7 @@ void __meminit init_trampoline(void)
return;
}
- if (pgtable_l5_enabled)
+ if (pgtable_l5_enabled())
init_trampoline_p4d();
else
init_trampoline_pud();
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index 25504d5aa816..fa150855647c 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -136,13 +136,13 @@ static int __init numa_add_memblk_to(int nid, u64 start, u64 end,
/* whine about and ignore invalid blks */
if (start > end || nid < 0 || nid >= MAX_NUMNODES) {
- pr_warning("NUMA: Warning: invalid memblk node %d [mem %#010Lx-%#010Lx]\n",
- nid, start, end - 1);
+ pr_warn("Warning: invalid memblk node %d [mem %#010Lx-%#010Lx]\n",
+ nid, start, end - 1);
return 0;
}
if (mi->nr_blks >= NR_NODE_MEMBLKS) {
- pr_err("NUMA: too many memblk ranges\n");
+ pr_err("too many memblk ranges\n");
return -EINVAL;
}
@@ -267,14 +267,14 @@ int __init numa_cleanup_meminfo(struct numa_meminfo *mi)
*/
if (bi->end > bj->start && bi->start < bj->end) {
if (bi->nid != bj->nid) {
- pr_err("NUMA: node %d [mem %#010Lx-%#010Lx] overlaps with node %d [mem %#010Lx-%#010Lx]\n",
+ pr_err("node %d [mem %#010Lx-%#010Lx] overlaps with node %d [mem %#010Lx-%#010Lx]\n",
bi->nid, bi->start, bi->end - 1,
bj->nid, bj->start, bj->end - 1);
return -EINVAL;
}
- pr_warning("NUMA: Warning: node %d [mem %#010Lx-%#010Lx] overlaps with itself [mem %#010Lx-%#010Lx]\n",
- bi->nid, bi->start, bi->end - 1,
- bj->start, bj->end - 1);
+ pr_warn("Warning: node %d [mem %#010Lx-%#010Lx] overlaps with itself [mem %#010Lx-%#010Lx]\n",
+ bi->nid, bi->start, bi->end - 1,
+ bj->start, bj->end - 1);
}
/*
@@ -364,7 +364,7 @@ static int __init numa_alloc_distance(void)
phys = memblock_find_in_range(0, PFN_PHYS(max_pfn_mapped),
size, PAGE_SIZE);
if (!phys) {
- pr_warning("NUMA: Warning: can't allocate distance table!\n");
+ pr_warn("Warning: can't allocate distance table!\n");
/* don't retry until explicitly reset */
numa_distance = (void *)1LU;
return -ENOMEM;
@@ -410,14 +410,14 @@ void __init numa_set_distance(int from, int to, int distance)
if (from >= numa_distance_cnt || to >= numa_distance_cnt ||
from < 0 || to < 0) {
- pr_warn_once("NUMA: Warning: node ids are out of bound, from=%d to=%d distance=%d\n",
- from, to, distance);
+ pr_warn_once("Warning: node ids are out of bound, from=%d to=%d distance=%d\n",
+ from, to, distance);
return;
}
if ((u8)distance != distance ||
(from == to && distance != LOCAL_DISTANCE)) {
- pr_warn_once("NUMA: Warning: invalid distance parameter, from=%d to=%d distance=%d\n",
+ pr_warn_once("Warning: invalid distance parameter, from=%d to=%d distance=%d\n",
from, to, distance);
return;
}
diff --git a/arch/x86/mm/pkeys.c b/arch/x86/mm/pkeys.c
index d7bc0eea20a5..6e98e0a7c923 100644
--- a/arch/x86/mm/pkeys.c
+++ b/arch/x86/mm/pkeys.c
@@ -94,26 +94,27 @@ int __arch_override_mprotect_pkey(struct vm_area_struct *vma, int prot, int pkey
*/
if (pkey != -1)
return pkey;
- /*
- * Look for a protection-key-drive execute-only mapping
- * which is now being given permissions that are not
- * execute-only. Move it back to the default pkey.
- */
- if (vma_is_pkey_exec_only(vma) &&
- (prot & (PROT_READ|PROT_WRITE))) {
- return 0;
- }
+
/*
* The mapping is execute-only. Go try to get the
* execute-only protection key. If we fail to do that,
* fall through as if we do not have execute-only
- * support.
+ * support in this mm.
*/
if (prot == PROT_EXEC) {
pkey = execute_only_pkey(vma->vm_mm);
if (pkey > 0)
return pkey;
+ } else if (vma_is_pkey_exec_only(vma)) {
+ /*
+ * Protections are *not* PROT_EXEC, but the mapping
+ * is using the exec-only pkey. This mapping was
+ * PROT_EXEC and will no longer be. Move back to
+ * the default pkey.
+ */
+ return ARCH_DEFAULT_PKEY;
}
+
/*
* This is a vanilla, non-pkey mprotect (or we failed to
* setup execute-only), inherit the pkey from the VMA we
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index e055d1a06699..6eb1f34c3c85 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -157,7 +157,7 @@ static void sync_current_stack_to_mm(struct mm_struct *mm)
unsigned long sp = current_stack_pointer;
pgd_t *pgd = pgd_offset(mm, sp);
- if (pgtable_l5_enabled) {
+ if (pgtable_l5_enabled()) {
if (unlikely(pgd_none(*pgd))) {
pgd_t *pgd_ref = pgd_offset_k(sp);
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index b725154182cc..d765acedc05c 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -1,4 +1,5 @@
-/* bpf_jit_comp.c : BPF JIT compiler
+/*
+ * bpf_jit_comp.c: BPF JIT compiler
*
* Copyright (C) 2011-2013 Eric Dumazet (eric.dumazet@gmail.com)
* Internal BPF Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
@@ -17,7 +18,7 @@
#include <asm/nospec-branch.h>
/*
- * assembly code in arch/x86/net/bpf_jit.S
+ * Assembly code in arch/x86/net/bpf_jit.S
*/
extern u8 sk_load_word[], sk_load_half[], sk_load_byte[];
extern u8 sk_load_word_positive_offset[], sk_load_half_positive_offset[];
@@ -45,14 +46,15 @@ static u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len)
#define EMIT2(b1, b2) EMIT((b1) + ((b2) << 8), 2)
#define EMIT3(b1, b2, b3) EMIT((b1) + ((b2) << 8) + ((b3) << 16), 3)
#define EMIT4(b1, b2, b3, b4) EMIT((b1) + ((b2) << 8) + ((b3) << 16) + ((b4) << 24), 4)
+
#define EMIT1_off32(b1, off) \
- do {EMIT1(b1); EMIT(off, 4); } while (0)
+ do { EMIT1(b1); EMIT(off, 4); } while (0)
#define EMIT2_off32(b1, b2, off) \
- do {EMIT2(b1, b2); EMIT(off, 4); } while (0)
+ do { EMIT2(b1, b2); EMIT(off, 4); } while (0)
#define EMIT3_off32(b1, b2, b3, off) \
- do {EMIT3(b1, b2, b3); EMIT(off, 4); } while (0)
+ do { EMIT3(b1, b2, b3); EMIT(off, 4); } while (0)
#define EMIT4_off32(b1, b2, b3, b4, off) \
- do {EMIT4(b1, b2, b3, b4); EMIT(off, 4); } while (0)
+ do { EMIT4(b1, b2, b3, b4); EMIT(off, 4); } while (0)
static bool is_imm8(int value)
{
@@ -70,9 +72,10 @@ static bool is_uimm32(u64 value)
}
/* mov dst, src */
-#define EMIT_mov(DST, SRC) \
- do {if (DST != SRC) \
- EMIT3(add_2mod(0x48, DST, SRC), 0x89, add_2reg(0xC0, DST, SRC)); \
+#define EMIT_mov(DST, SRC) \
+ do { \
+ if (DST != SRC) \
+ EMIT3(add_2mod(0x48, DST, SRC), 0x89, add_2reg(0xC0, DST, SRC)); \
} while (0)
static int bpf_size_to_x86_bytes(int bpf_size)
@@ -89,7 +92,8 @@ static int bpf_size_to_x86_bytes(int bpf_size)
return 0;
}
-/* list of x86 cond jumps opcodes (. + s8)
+/*
+ * List of x86 cond jumps opcodes (. + s8)
* Add 0x10 (and an extra 0x0f) to generate far jumps (. + s32)
*/
#define X86_JB 0x72
@@ -106,35 +110,37 @@ static int bpf_size_to_x86_bytes(int bpf_size)
#define CHOOSE_LOAD_FUNC(K, func) \
((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative_offset : func) : func##_positive_offset)
-/* pick a register outside of BPF range for JIT internal work */
+/* Pick a register outside of BPF range for JIT internal work */
#define AUX_REG (MAX_BPF_JIT_REG + 1)
-/* The following table maps BPF registers to x64 registers.
+/*
+ * The following table maps BPF registers to x86-64 registers.
*
- * x64 register r12 is unused, since if used as base address
+ * x86-64 register R12 is unused, since if used as base address
* register in load/store instructions, it always needs an
* extra byte of encoding and is callee saved.
*
- * r9 caches skb->len - skb->data_len
- * r10 caches skb->data, and used for blinding (if enabled)
+ * R9 caches skb->len - skb->data_len
+ * R10 caches skb->data, and used for blinding (if enabled)
*/
static const int reg2hex[] = {
- [BPF_REG_0] = 0, /* rax */
- [BPF_REG_1] = 7, /* rdi */
- [BPF_REG_2] = 6, /* rsi */
- [BPF_REG_3] = 2, /* rdx */
- [BPF_REG_4] = 1, /* rcx */
- [BPF_REG_5] = 0, /* r8 */
- [BPF_REG_6] = 3, /* rbx callee saved */
- [BPF_REG_7] = 5, /* r13 callee saved */
- [BPF_REG_8] = 6, /* r14 callee saved */
- [BPF_REG_9] = 7, /* r15 callee saved */
- [BPF_REG_FP] = 5, /* rbp readonly */
- [BPF_REG_AX] = 2, /* r10 temp register */
- [AUX_REG] = 3, /* r11 temp register */
+ [BPF_REG_0] = 0, /* RAX */
+ [BPF_REG_1] = 7, /* RDI */
+ [BPF_REG_2] = 6, /* RSI */
+ [BPF_REG_3] = 2, /* RDX */
+ [BPF_REG_4] = 1, /* RCX */
+ [BPF_REG_5] = 0, /* R8 */
+ [BPF_REG_6] = 3, /* RBX callee saved */
+ [BPF_REG_7] = 5, /* R13 callee saved */
+ [BPF_REG_8] = 6, /* R14 callee saved */
+ [BPF_REG_9] = 7, /* R15 callee saved */
+ [BPF_REG_FP] = 5, /* RBP readonly */
+ [BPF_REG_AX] = 2, /* R10 temp register */
+ [AUX_REG] = 3, /* R11 temp register */
};
-/* is_ereg() == true if BPF register 'reg' maps to x64 r8..r15
+/*
+ * is_ereg() == true if BPF register 'reg' maps to x86-64 r8..r15
* which need extra byte of encoding.
* rax,rcx,...,rbp have simpler encoding
*/
@@ -153,7 +159,7 @@ static bool is_axreg(u32 reg)
return reg == BPF_REG_0;
}
-/* add modifiers if 'reg' maps to x64 registers r8..r15 */
+/* Add modifiers if 'reg' maps to x86-64 registers R8..R15 */
static u8 add_1mod(u8 byte, u32 reg)
{
if (is_ereg(reg))
@@ -170,13 +176,13 @@ static u8 add_2mod(u8 byte, u32 r1, u32 r2)
return byte;
}
-/* encode 'dst_reg' register into x64 opcode 'byte' */
+/* Encode 'dst_reg' register into x86-64 opcode 'byte' */
static u8 add_1reg(u8 byte, u32 dst_reg)
{
return byte + reg2hex[dst_reg];
}
-/* encode 'dst_reg' and 'src_reg' registers into x64 opcode 'byte' */
+/* Encode 'dst_reg' and 'src_reg' registers into x86-64 opcode 'byte' */
static u8 add_2reg(u8 byte, u32 dst_reg, u32 src_reg)
{
return byte + reg2hex[dst_reg] + (reg2hex[src_reg] << 3);
@@ -184,27 +190,28 @@ static u8 add_2reg(u8 byte, u32 dst_reg, u32 src_reg)
static void jit_fill_hole(void *area, unsigned int size)
{
- /* fill whole space with int3 instructions */
+ /* Fill whole space with INT3 instructions */
memset(area, 0xcc, size);
}
struct jit_context {
- int cleanup_addr; /* epilogue code offset */
+ int cleanup_addr; /* Epilogue code offset */
bool seen_ld_abs;
bool seen_ax_reg;
};
-/* maximum number of bytes emitted while JITing one eBPF insn */
+/* Maximum number of bytes emitted while JITing one eBPF insn */
#define BPF_MAX_INSN_SIZE 128
#define BPF_INSN_SAFETY 64
#define AUX_STACK_SPACE \
- (32 /* space for rbx, r13, r14, r15 */ + \
- 8 /* space for skb_copy_bits() buffer */)
+ (32 /* Space for RBX, R13, R14, R15 */ + \
+ 8 /* Space for skb_copy_bits() buffer */)
#define PROLOGUE_SIZE 37
-/* emit x64 prologue code for BPF program and check it's size.
+/*
+ * Emit x86-64 prologue code for BPF program and check its size.
* bpf_tail_call helper will skip it while jumping into another program
*/
static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf)
@@ -212,8 +219,11 @@ static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf)
u8 *prog = *pprog;
int cnt = 0;
- EMIT1(0x55); /* push rbp */
- EMIT3(0x48, 0x89, 0xE5); /* mov rbp,rsp */
+ /* push rbp */
+ EMIT1(0x55);
+
+ /* mov rbp,rsp */
+ EMIT3(0x48, 0x89, 0xE5);
/* sub rsp, rounded_stack_depth + AUX_STACK_SPACE */
EMIT3_off32(0x48, 0x81, 0xEC,
@@ -222,14 +232,15 @@ static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf)
/* sub rbp, AUX_STACK_SPACE */
EMIT4(0x48, 0x83, 0xED, AUX_STACK_SPACE);
- /* all classic BPF filters use R6(rbx) save it */
+ /* All classic BPF filters use R6(rbx) save it */
/* mov qword ptr [rbp+0],rbx */
EMIT4(0x48, 0x89, 0x5D, 0);
- /* bpf_convert_filter() maps classic BPF register X to R7 and uses R8
- * as temporary, so all tcpdump filters need to spill/fill R7(r13) and
- * R8(r14). R9(r15) spill could be made conditional, but there is only
+ /*
+ * bpf_convert_filter() maps classic BPF register X to R7 and uses R8
+ * as temporary, so all tcpdump filters need to spill/fill R7(R13) and
+ * R8(R14). R9(R15) spill could be made conditional, but there is only
* one 'bpf_error' return path out of helper functions inside bpf_jit.S
* The overhead of extra spill is negligible for any filter other
* than synthetic ones. Therefore not worth adding complexity.
@@ -243,9 +254,10 @@ static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf)
EMIT4(0x4C, 0x89, 0x7D, 24);
if (!ebpf_from_cbpf) {
- /* Clear the tail call counter (tail_call_cnt): for eBPF tail
+ /*
+ * Clear the tail call counter (tail_call_cnt): for eBPF tail
* calls we need to reset the counter to 0. It's done in two
- * instructions, resetting rax register to 0, and moving it
+ * instructions, resetting RAX register to 0, and moving it
* to the counter location.
*/
@@ -260,7 +272,9 @@ static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf)
*pprog = prog;
}
-/* generate the following code:
+/*
+ * Generate the following code:
+ *
* ... bpf_tail_call(void *ctx, struct bpf_array *array, u64 index) ...
* if (index >= array->map.max_entries)
* goto out;
@@ -278,23 +292,26 @@ static void emit_bpf_tail_call(u8 **pprog)
int label1, label2, label3;
int cnt = 0;
- /* rdi - pointer to ctx
+ /*
+ * rdi - pointer to ctx
* rsi - pointer to bpf_array
* rdx - index in bpf_array
*/
- /* if (index >= array->map.max_entries)
- * goto out;
+ /*
+ * if (index >= array->map.max_entries)
+ * goto out;
*/
EMIT2(0x89, 0xD2); /* mov edx, edx */
EMIT3(0x39, 0x56, /* cmp dword ptr [rsi + 16], edx */
offsetof(struct bpf_array, map.max_entries));
-#define OFFSET1 (41 + RETPOLINE_RAX_BPF_JIT_SIZE) /* number of bytes to jump */
+#define OFFSET1 (41 + RETPOLINE_RAX_BPF_JIT_SIZE) /* Number of bytes to jump */
EMIT2(X86_JBE, OFFSET1); /* jbe out */
label1 = cnt;
- /* if (tail_call_cnt > MAX_TAIL_CALL_CNT)
- * goto out;
+ /*
+ * if (tail_call_cnt > MAX_TAIL_CALL_CNT)
+ * goto out;
*/
EMIT2_off32(0x8B, 0x85, 36); /* mov eax, dword ptr [rbp + 36] */
EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT); /* cmp eax, MAX_TAIL_CALL_CNT */
@@ -308,8 +325,9 @@ static void emit_bpf_tail_call(u8 **pprog)
EMIT4_off32(0x48, 0x8B, 0x84, 0xD6, /* mov rax, [rsi + rdx * 8 + offsetof(...)] */
offsetof(struct bpf_array, ptrs));
- /* if (prog == NULL)
- * goto out;
+ /*
+ * if (prog == NULL)
+ * goto out;
*/
EMIT3(0x48, 0x85, 0xC0); /* test rax,rax */
#define OFFSET3 (8 + RETPOLINE_RAX_BPF_JIT_SIZE)
@@ -321,7 +339,8 @@ static void emit_bpf_tail_call(u8 **pprog)
offsetof(struct bpf_prog, bpf_func));
EMIT4(0x48, 0x83, 0xC0, PROLOGUE_SIZE); /* add rax, prologue_size */
- /* now we're ready to jump into next BPF program
+ /*
+ * Wow we're ready to jump into next BPF program
* rdi == ctx (1st arg)
* rax == prog->bpf_func + prologue_size
*/
@@ -340,7 +359,8 @@ static void emit_load_skb_data_hlen(u8 **pprog)
u8 *prog = *pprog;
int cnt = 0;
- /* r9d = skb->len - skb->data_len (headlen)
+ /*
+ * r9d = skb->len - skb->data_len (headlen)
* r10 = skb->data
*/
/* mov %r9d, off32(%rdi) */
@@ -361,7 +381,8 @@ static void emit_mov_imm32(u8 **pprog, bool sign_propagate,
u8 b1, b2, b3;
int cnt = 0;
- /* optimization: if imm32 is positive, use 'mov %eax, imm32'
+ /*
+ * Optimization: if imm32 is positive, use 'mov %eax, imm32'
* (which zero-extends imm32) to save 2 bytes.
*/
if (sign_propagate && (s32)imm32 < 0) {
@@ -373,7 +394,8 @@ static void emit_mov_imm32(u8 **pprog, bool sign_propagate,
goto done;
}
- /* optimization: if imm32 is zero, use 'xor %eax, %eax'
+ /*
+ * Optimization: if imm32 is zero, use 'xor %eax, %eax'
* to save 3 bytes.
*/
if (imm32 == 0) {
@@ -400,7 +422,8 @@ static void emit_mov_imm64(u8 **pprog, u32 dst_reg,
int cnt = 0;
if (is_uimm32(((u64)imm32_hi << 32) | (u32)imm32_lo)) {
- /* For emitting plain u32, where sign bit must not be
+ /*
+ * For emitting plain u32, where sign bit must not be
* propagated LLVM tends to load imm64 over mov32
* directly, so save couple of bytes by just doing
* 'mov %eax, imm32' instead.
@@ -525,7 +548,8 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
else if (is_ereg(dst_reg))
EMIT1(add_1mod(0x40, dst_reg));
- /* b3 holds 'normal' opcode, b2 short form only valid
+ /*
+ * b3 holds 'normal' opcode, b2 short form only valid
* in case dst is eax/rax.
*/
switch (BPF_OP(insn->code)) {
@@ -593,7 +617,8 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
/* mov rax, dst_reg */
EMIT_mov(BPF_REG_0, dst_reg);
- /* xor edx, edx
+ /*
+ * xor edx, edx
* equivalent to 'xor rdx, rdx', but one byte less
*/
EMIT2(0x31, 0xd2);
@@ -655,7 +680,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
}
break;
}
- /* shifts */
+ /* Shifts */
case BPF_ALU | BPF_LSH | BPF_K:
case BPF_ALU | BPF_RSH | BPF_K:
case BPF_ALU | BPF_ARSH | BPF_K:
@@ -686,7 +711,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
case BPF_ALU64 | BPF_RSH | BPF_X:
case BPF_ALU64 | BPF_ARSH | BPF_X:
- /* check for bad case when dst_reg == rcx */
+ /* Check for bad case when dst_reg == rcx */
if (dst_reg == BPF_REG_4) {
/* mov r11, dst_reg */
EMIT_mov(AUX_REG, dst_reg);
@@ -724,13 +749,13 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
case BPF_ALU | BPF_END | BPF_FROM_BE:
switch (imm32) {
case 16:
- /* emit 'ror %ax, 8' to swap lower 2 bytes */
+ /* Emit 'ror %ax, 8' to swap lower 2 bytes */
EMIT1(0x66);
if (is_ereg(dst_reg))
EMIT1(0x41);
EMIT3(0xC1, add_1reg(0xC8, dst_reg), 8);
- /* emit 'movzwl eax, ax' */
+ /* Emit 'movzwl eax, ax' */
if (is_ereg(dst_reg))
EMIT3(0x45, 0x0F, 0xB7);
else
@@ -738,7 +763,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
EMIT1(add_2reg(0xC0, dst_reg, dst_reg));
break;
case 32:
- /* emit 'bswap eax' to swap lower 4 bytes */
+ /* Emit 'bswap eax' to swap lower 4 bytes */
if (is_ereg(dst_reg))
EMIT2(0x41, 0x0F);
else
@@ -746,7 +771,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
EMIT1(add_1reg(0xC8, dst_reg));
break;
case 64:
- /* emit 'bswap rax' to swap 8 bytes */
+ /* Emit 'bswap rax' to swap 8 bytes */
EMIT3(add_1mod(0x48, dst_reg), 0x0F,
add_1reg(0xC8, dst_reg));
break;
@@ -756,7 +781,8 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
case BPF_ALU | BPF_END | BPF_FROM_LE:
switch (imm32) {
case 16:
- /* emit 'movzwl eax, ax' to zero extend 16-bit
+ /*
+ * Emit 'movzwl eax, ax' to zero extend 16-bit
* into 64 bit
*/
if (is_ereg(dst_reg))
@@ -766,7 +792,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
EMIT1(add_2reg(0xC0, dst_reg, dst_reg));
break;
case 32:
- /* emit 'mov eax, eax' to clear upper 32-bits */
+ /* Emit 'mov eax, eax' to clear upper 32-bits */
if (is_ereg(dst_reg))
EMIT1(0x45);
EMIT2(0x89, add_2reg(0xC0, dst_reg, dst_reg));
@@ -809,9 +835,9 @@ st: if (is_imm8(insn->off))
/* STX: *(u8*)(dst_reg + off) = src_reg */
case BPF_STX | BPF_MEM | BPF_B:
- /* emit 'mov byte ptr [rax + off], al' */
+ /* Emit 'mov byte ptr [rax + off], al' */
if (is_ereg(dst_reg) || is_ereg(src_reg) ||
- /* have to add extra byte for x86 SIL, DIL regs */
+ /* We have to add extra byte for x86 SIL, DIL regs */
src_reg == BPF_REG_1 || src_reg == BPF_REG_2)
EMIT2(add_2mod(0x40, dst_reg, src_reg), 0x88);
else
@@ -840,25 +866,26 @@ stx: if (is_imm8(insn->off))
/* LDX: dst_reg = *(u8*)(src_reg + off) */
case BPF_LDX | BPF_MEM | BPF_B:
- /* emit 'movzx rax, byte ptr [rax + off]' */
+ /* Emit 'movzx rax, byte ptr [rax + off]' */
EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xB6);
goto ldx;
case BPF_LDX | BPF_MEM | BPF_H:
- /* emit 'movzx rax, word ptr [rax + off]' */
+ /* Emit 'movzx rax, word ptr [rax + off]' */
EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xB7);
goto ldx;
case BPF_LDX | BPF_MEM | BPF_W:
- /* emit 'mov eax, dword ptr [rax+0x14]' */
+ /* Emit 'mov eax, dword ptr [rax+0x14]' */
if (is_ereg(dst_reg) || is_ereg(src_reg))
EMIT2(add_2mod(0x40, src_reg, dst_reg), 0x8B);
else
EMIT1(0x8B);
goto ldx;
case BPF_LDX | BPF_MEM | BPF_DW:
- /* emit 'mov rax, qword ptr [rax+0x14]' */
+ /* Emit 'mov rax, qword ptr [rax+0x14]' */
EMIT2(add_2mod(0x48, src_reg, dst_reg), 0x8B);
-ldx: /* if insn->off == 0 we can save one extra byte, but
- * special case of x86 r13 which always needs an offset
+ldx: /*
+ * If insn->off == 0 we can save one extra byte, but
+ * special case of x86 R13 which always needs an offset
* is not worth the hassle
*/
if (is_imm8(insn->off))
@@ -870,7 +897,7 @@ ldx: /* if insn->off == 0 we can save one extra byte, but
/* STX XADD: lock *(u32*)(dst_reg + off) += src_reg */
case BPF_STX | BPF_XADD | BPF_W:
- /* emit 'lock add dword ptr [rax + off], eax' */
+ /* Emit 'lock add dword ptr [rax + off], eax' */
if (is_ereg(dst_reg) || is_ereg(src_reg))
EMIT3(0xF0, add_2mod(0x40, dst_reg, src_reg), 0x01);
else
@@ -897,14 +924,15 @@ xadd: if (is_imm8(insn->off))
} else {
EMIT2(0x41, 0x52); /* push %r10 */
EMIT2(0x41, 0x51); /* push %r9 */
- /* need to adjust jmp offset, since
+ /*
+ * We need to adjust jmp offset, since
* pop %r9, pop %r10 take 4 bytes after call insn
*/
jmp_offset += 4;
}
}
if (!imm32 || !is_simm32(jmp_offset)) {
- pr_err("unsupported bpf func %d addr %p image %p\n",
+ pr_err("unsupported BPF func %d addr %p image %p\n",
imm32, func, image);
return -EINVAL;
}
@@ -970,7 +998,7 @@ xadd: if (is_imm8(insn->off))
else
EMIT2_off32(0x81, add_1reg(0xF8, dst_reg), imm32);
-emit_cond_jmp: /* convert BPF opcode to x86 */
+emit_cond_jmp: /* Convert BPF opcode to x86 */
switch (BPF_OP(insn->code)) {
case BPF_JEQ:
jmp_cond = X86_JE;
@@ -996,22 +1024,22 @@ emit_cond_jmp: /* convert BPF opcode to x86 */
jmp_cond = X86_JBE;
break;
case BPF_JSGT:
- /* signed '>', GT in x86 */
+ /* Signed '>', GT in x86 */
jmp_cond = X86_JG;
break;
case BPF_JSLT:
- /* signed '<', LT in x86 */
+ /* Signed '<', LT in x86 */
jmp_cond = X86_JL;
break;
case BPF_JSGE:
- /* signed '>=', GE in x86 */
+ /* Signed '>=', GE in x86 */
jmp_cond = X86_JGE;
break;
case BPF_JSLE:
- /* signed '<=', LE in x86 */
+ /* Signed '<=', LE in x86 */
jmp_cond = X86_JLE;
break;
- default: /* to silence gcc warning */
+ default: /* to silence GCC warning */
return -EFAULT;
}
jmp_offset = addrs[i + insn->off] - addrs[i];
@@ -1027,9 +1055,19 @@ emit_cond_jmp: /* convert BPF opcode to x86 */
break;
case BPF_JMP | BPF_JA:
- jmp_offset = addrs[i + insn->off] - addrs[i];
+ if (insn->off == -1)
+ /* -1 jmp instructions will always jump
+ * backwards two bytes. Explicitly handling
+ * this case avoids wasting too many passes
+ * when there are long sequences of replaced
+ * dead code.
+ */
+ jmp_offset = -2;
+ else
+ jmp_offset = addrs[i + insn->off] - addrs[i];
+
if (!jmp_offset)
- /* optimize out nop jumps */
+ /* Optimize out nop jumps */
break;
emit_jmp:
if (is_imm8(jmp_offset)) {
@@ -1051,7 +1089,7 @@ common_load:
ctx->seen_ld_abs = seen_ld_abs = true;
jmp_offset = func - (image + addrs[i]);
if (!func || !is_simm32(jmp_offset)) {
- pr_err("unsupported bpf func %d addr %p image %p\n",
+ pr_err("unsupported BPF func %d addr %p image %p\n",
imm32, func, image);
return -EINVAL;
}
@@ -1070,7 +1108,8 @@ common_load:
EMIT2_off32(0x81, 0xC6, imm32);
}
}
- /* skb pointer is in R6 (%rbx), it will be copied into
+ /*
+ * skb pointer is in R6 (%rbx), it will be copied into
* %rdi if skb_copy_bits() call is necessary.
* sk_load_* helpers also use %r10 and %r9d.
* See bpf_jit.S
@@ -1101,7 +1140,7 @@ common_load:
goto emit_jmp;
}
seen_exit = true;
- /* update cleanup_addr */
+ /* Update cleanup_addr */
ctx->cleanup_addr = proglen;
/* mov rbx, qword ptr [rbp+0] */
EMIT4(0x48, 0x8B, 0x5D, 0);
@@ -1119,10 +1158,11 @@ common_load:
break;
default:
- /* By design x64 JIT should support all BPF instructions
+ /*
+ * By design x86-64 JIT should support all BPF instructions.
* This error will be seen if new instruction was added
- * to interpreter, but not to JIT
- * or if there is junk in bpf_prog
+ * to the interpreter, but not to the JIT, or if there is
+ * junk in bpf_prog.
*/
pr_err("bpf_jit: unknown opcode %02x\n", insn->code);
return -EINVAL;
@@ -1174,7 +1214,8 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
return orig_prog;
tmp = bpf_jit_blind_constants(prog);
- /* If blinding was requested and we failed during blinding,
+ /*
+ * If blinding was requested and we failed during blinding,
* we must fall back to the interpreter.
*/
if (IS_ERR(tmp))
@@ -1208,8 +1249,9 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
goto out_addrs;
}
- /* Before first pass, make a rough estimation of addrs[]
- * each bpf instruction is translated to less than 64 bytes
+ /*
+ * Before first pass, make a rough estimation of addrs[]
+ * each BPF instruction is translated to less than 64 bytes
*/
for (proglen = 0, i = 0; i < prog->len; i++) {
proglen += 64;
@@ -1218,14 +1260,16 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
ctx.cleanup_addr = proglen;
skip_init_addrs:
- /* JITed image shrinks with every pass and the loop iterates
- * until the image stops shrinking. Very large bpf programs
+ /*
+ * JITed image shrinks with every pass and the loop iterates
+ * until the image stops shrinking. Very large BPF programs
* may converge on the last pass. In such case do one more
- * pass to emit the final image
+ * pass to emit the final image.
*/
for (pass = 0; pass < 20 || image; pass++) {
proglen = do_jit(prog, addrs, image, oldproglen, &ctx);
if (proglen <= 0) {
+out_image:
image = NULL;
if (header)
bpf_jit_binary_free(header);
@@ -1236,8 +1280,7 @@ skip_init_addrs:
if (proglen != oldproglen) {
pr_err("bpf_jit: proglen=%d != oldproglen=%d\n",
proglen, oldproglen);
- prog = orig_prog;
- goto out_addrs;
+ goto out_image;
}
break;
}
@@ -1273,7 +1316,7 @@ skip_init_addrs:
prog = orig_prog;
}
- if (!prog->is_func || extra_pass) {
+ if (!image || !prog->is_func || extra_pass) {
out_addrs:
kfree(addrs);
kfree(jit_data);
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
index bed7e7f4e44c..e01f7ceb9e7a 100644
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
@@ -225,7 +225,7 @@ int __init efi_alloc_page_tables(void)
pud = pud_alloc(&init_mm, p4d, EFI_VA_END);
if (!pud) {
- if (pgtable_l5_enabled)
+ if (pgtable_l5_enabled())
free_page((unsigned long) pgd_page_vaddr(*pgd));
free_pages((unsigned long)efi_pgd, PGD_ALLOCATION_ORDER);
return -ENOMEM;
diff --git a/arch/x86/power/hibernate_64.c b/arch/x86/power/hibernate_64.c
index ccf4a49bb065..67ccf64c8bd8 100644
--- a/arch/x86/power/hibernate_64.c
+++ b/arch/x86/power/hibernate_64.c
@@ -72,7 +72,7 @@ static int set_up_temporary_text_mapping(pgd_t *pgd)
* tables used by the image kernel.
*/
- if (pgtable_l5_enabled) {
+ if (pgtable_l5_enabled()) {
p4d = (p4d_t *)get_safe_page(GFP_ATOMIC);
if (!p4d)
return -ENOMEM;
diff --git a/arch/x86/um/vdso/Makefile b/arch/x86/um/vdso/Makefile
index 10003359e633..b2d6967262b2 100644
--- a/arch/x86/um/vdso/Makefile
+++ b/arch/x86/um/vdso/Makefile
@@ -23,14 +23,14 @@ $(obj)/vdso.o: $(obj)/vdso.so
targets += vdso.so vdso.so.dbg vdso.lds $(vobjs-y)
-export CPPFLAGS_vdso.lds += -P -C
+CPPFLAGS_vdso.lds += -P -C
VDSO_LDFLAGS_vdso.lds = -m64 -Wl,-soname=linux-vdso.so.1 \
-Wl,-z,max-page-size=4096 -Wl,-z,common-page-size=4096
$(obj)/vdso.o: $(src)/vdso.S $(obj)/vdso.so
-$(obj)/vdso.so.dbg: $(src)/vdso.lds $(vobjs) FORCE
+$(obj)/vdso.so.dbg: $(obj)/vdso.lds $(vobjs) FORCE
$(call if_changed,vdso)
$(obj)/%.so: OBJCOPYFLAGS := -S
diff --git a/arch/x86/xen/efi.c b/arch/x86/xen/efi.c
index a18703be9ead..1804b27f9632 100644
--- a/arch/x86/xen/efi.c
+++ b/arch/x86/xen/efi.c
@@ -115,6 +115,61 @@ static efi_system_table_t __init *xen_efi_probe(void)
return &efi_systab_xen;
}
+/*
+ * Determine whether we're in secure boot mode.
+ *
+ * Please keep the logic in sync with
+ * drivers/firmware/efi/libstub/secureboot.c:efi_get_secureboot().
+ */
+static enum efi_secureboot_mode xen_efi_get_secureboot(void)
+{
+ static efi_guid_t efi_variable_guid = EFI_GLOBAL_VARIABLE_GUID;
+ static efi_guid_t shim_guid = EFI_SHIM_LOCK_GUID;
+ efi_status_t status;
+ u8 moksbstate, secboot, setupmode;
+ unsigned long size;
+
+ size = sizeof(secboot);
+ status = efi.get_variable(L"SecureBoot", &efi_variable_guid,
+ NULL, &size, &secboot);
+
+ if (status == EFI_NOT_FOUND)
+ return efi_secureboot_mode_disabled;
+
+ if (status != EFI_SUCCESS)
+ goto out_efi_err;
+
+ size = sizeof(setupmode);
+ status = efi.get_variable(L"SetupMode", &efi_variable_guid,
+ NULL, &size, &setupmode);
+
+ if (status != EFI_SUCCESS)
+ goto out_efi_err;
+
+ if (secboot == 0 || setupmode == 1)
+ return efi_secureboot_mode_disabled;
+
+ /* See if a user has put the shim into insecure mode. */
+ size = sizeof(moksbstate);
+ status = efi.get_variable(L"MokSBStateRT", &shim_guid,
+ NULL, &size, &moksbstate);
+
+ /* If it fails, we don't care why. Default to secure. */
+ if (status != EFI_SUCCESS)
+ goto secure_boot_enabled;
+
+ if (moksbstate == 1)
+ return efi_secureboot_mode_disabled;
+
+ secure_boot_enabled:
+ pr_info("UEFI Secure Boot is enabled.\n");
+ return efi_secureboot_mode_enabled;
+
+ out_efi_err:
+ pr_err("Could not determine UEFI Secure Boot status.\n");
+ return efi_secureboot_mode_unknown;
+}
+
void __init xen_efi_init(void)
{
efi_system_table_t *efi_systab_xen;
@@ -129,6 +184,8 @@ void __init xen_efi_init(void)
boot_params.efi_info.efi_systab = (__u32)__pa(efi_systab_xen);
boot_params.efi_info.efi_systab_hi = (__u32)(__pa(efi_systab_xen) >> 32);
+ boot_params.secure_boot = xen_efi_get_secureboot();
+
set_bit(EFI_BOOT, &efi.flags);
set_bit(EFI_PARAVIRT, &efi.flags);
set_bit(EFI_64BIT, &efi.flags);
diff --git a/arch/x86/xen/enlighten_hvm.c b/arch/x86/xen/enlighten_hvm.c
index 826898701045..19c1ff542387 100644
--- a/arch/x86/xen/enlighten_hvm.c
+++ b/arch/x86/xen/enlighten_hvm.c
@@ -65,6 +65,19 @@ static void __init xen_hvm_init_mem_mapping(void)
{
early_memunmap(HYPERVISOR_shared_info, PAGE_SIZE);
HYPERVISOR_shared_info = __va(PFN_PHYS(shared_info_pfn));
+
+ /*
+ * The virtual address of the shared_info page has changed, so
+ * the vcpu_info pointer for VCPU 0 is now stale.
+ *
+ * The prepare_boot_cpu callback will re-initialize it via
+ * xen_vcpu_setup, but we can't rely on that to be called for
+ * old Xen versions (xen_have_vector_callback == 0).
+ *
+ * It is, in any case, bad to have a stale vcpu_info pointer
+ * so reset it now.
+ */
+ xen_vcpu_info_reset(0);
}
static void __init init_hvm_pv_info(void)
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index c36d23aa6c35..357969a3697c 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -421,45 +421,33 @@ static void xen_load_gdt(const struct desc_ptr *dtr)
{
unsigned long va = dtr->address;
unsigned int size = dtr->size + 1;
- unsigned pages = DIV_ROUND_UP(size, PAGE_SIZE);
- unsigned long frames[pages];
- int f;
-
- /*
- * A GDT can be up to 64k in size, which corresponds to 8192
- * 8-byte entries, or 16 4k pages..
- */
+ unsigned long pfn, mfn;
+ int level;
+ pte_t *ptep;
+ void *virt;
- BUG_ON(size > 65536);
+ /* @size should be at most GDT_SIZE which is smaller than PAGE_SIZE. */
+ BUG_ON(size > PAGE_SIZE);
BUG_ON(va & ~PAGE_MASK);
- for (f = 0; va < dtr->address + size; va += PAGE_SIZE, f++) {
- int level;
- pte_t *ptep;
- unsigned long pfn, mfn;
- void *virt;
-
- /*
- * The GDT is per-cpu and is in the percpu data area.
- * That can be virtually mapped, so we need to do a
- * page-walk to get the underlying MFN for the
- * hypercall. The page can also be in the kernel's
- * linear range, so we need to RO that mapping too.
- */
- ptep = lookup_address(va, &level);
- BUG_ON(ptep == NULL);
-
- pfn = pte_pfn(*ptep);
- mfn = pfn_to_mfn(pfn);
- virt = __va(PFN_PHYS(pfn));
+ /*
+ * The GDT is per-cpu and is in the percpu data area.
+ * That can be virtually mapped, so we need to do a
+ * page-walk to get the underlying MFN for the
+ * hypercall. The page can also be in the kernel's
+ * linear range, so we need to RO that mapping too.
+ */
+ ptep = lookup_address(va, &level);
+ BUG_ON(ptep == NULL);
- frames[f] = mfn;
+ pfn = pte_pfn(*ptep);
+ mfn = pfn_to_mfn(pfn);
+ virt = __va(PFN_PHYS(pfn));
- make_lowmem_page_readonly((void *)va);
- make_lowmem_page_readonly(virt);
- }
+ make_lowmem_page_readonly((void *)va);
+ make_lowmem_page_readonly(virt);
- if (HYPERVISOR_set_gdt(frames, size / sizeof(struct desc_struct)))
+ if (HYPERVISOR_set_gdt(&mfn, size / sizeof(struct desc_struct)))
BUG();
}
@@ -470,34 +458,22 @@ static void __init xen_load_gdt_boot(const struct desc_ptr *dtr)
{
unsigned long va = dtr->address;
unsigned int size = dtr->size + 1;
- unsigned pages = DIV_ROUND_UP(size, PAGE_SIZE);
- unsigned long frames[pages];
- int f;
-
- /*
- * A GDT can be up to 64k in size, which corresponds to 8192
- * 8-byte entries, or 16 4k pages..
- */
+ unsigned long pfn, mfn;
+ pte_t pte;
- BUG_ON(size > 65536);
+ /* @size should be at most GDT_SIZE which is smaller than PAGE_SIZE. */
+ BUG_ON(size > PAGE_SIZE);
BUG_ON(va & ~PAGE_MASK);
- for (f = 0; va < dtr->address + size; va += PAGE_SIZE, f++) {
- pte_t pte;
- unsigned long pfn, mfn;
+ pfn = virt_to_pfn(va);
+ mfn = pfn_to_mfn(pfn);
- pfn = virt_to_pfn(va);
- mfn = pfn_to_mfn(pfn);
+ pte = pfn_pte(pfn, PAGE_KERNEL_RO);
- pte = pfn_pte(pfn, PAGE_KERNEL_RO);
-
- if (HYPERVISOR_update_va_mapping((unsigned long)va, pte, 0))
- BUG();
-
- frames[f] = mfn;
- }
+ if (HYPERVISOR_update_va_mapping((unsigned long)va, pte, 0))
+ BUG();
- if (HYPERVISOR_set_gdt(frames, size / sizeof(struct desc_struct)))
+ if (HYPERVISOR_set_gdt(&mfn, size / sizeof(struct desc_struct)))
BUG();
}
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index d33e7dbe3129..2d76106788a3 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -42,13 +42,11 @@ xmaddr_t arbitrary_virt_to_machine(void *vaddr)
}
EXPORT_SYMBOL_GPL(arbitrary_virt_to_machine);
-static void xen_flush_tlb_all(void)
+static noinline void xen_flush_tlb_all(void)
{
struct mmuext_op *op;
struct multicall_space mcs;
- trace_xen_mmu_flush_tlb_all(0);
-
preempt_disable();
mcs = xen_mc_entry(sizeof(*op));
diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
index 486c0a34d00b..2c30cabfda90 100644
--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
@@ -1310,13 +1310,11 @@ unsigned long xen_read_cr2_direct(void)
return this_cpu_read(xen_vcpu_info.arch.cr2);
}
-static void xen_flush_tlb(void)
+static noinline void xen_flush_tlb(void)
{
struct mmuext_op *op;
struct multicall_space mcs;
- trace_xen_mmu_flush_tlb(0);
-
preempt_disable();
mcs = xen_mc_entry(sizeof(*op));
diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig
index c921e8bccdc8..17df332269b2 100644
--- a/arch/xtensa/Kconfig
+++ b/arch/xtensa/Kconfig
@@ -19,7 +19,6 @@ config XTENSA
select HAVE_ARCH_KASAN if MMU
select HAVE_CC_STACKPROTECTOR
select HAVE_DEBUG_KMEMLEAK
- select HAVE_DMA_API_DEBUG
select HAVE_DMA_CONTIGUOUS
select HAVE_EXIT_THREAD
select HAVE_FUNCTION_TRACER
diff --git a/arch/xtensa/include/asm/pci.h b/arch/xtensa/include/asm/pci.h
index d5a82153a7c5..6ddf0a30c60d 100644
--- a/arch/xtensa/include/asm/pci.h
+++ b/arch/xtensa/include/asm/pci.h
@@ -42,8 +42,6 @@ extern struct pci_controller* pcibios_alloc_controller(void);
* decisions.
*/
-#define PCI_DMA_BUS_IS_PHYS (1)
-
/* Tell PCI code what kind of PCI resource mappings we support */
#define HAVE_PCI_MMAP 1
#define ARCH_GENERIC_PCI_MMAP_RESOURCE 1
diff --git a/arch/xtensa/kernel/pci-dma.c b/arch/xtensa/kernel/pci-dma.c
index 732631ce250f..392b4a80ebc2 100644
--- a/arch/xtensa/kernel/pci-dma.c
+++ b/arch/xtensa/kernel/pci-dma.c
@@ -261,12 +261,3 @@ const struct dma_map_ops xtensa_dma_map_ops = {
.mapping_error = xtensa_dma_mapping_error,
};
EXPORT_SYMBOL(xtensa_dma_map_ops);
-
-#define PREALLOC_DMA_DEBUG_ENTRIES (1 << 16)
-
-static int __init xtensa_dma_init(void)
-{
- dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES);
- return 0;
-}
-fs_initcall(xtensa_dma_init);
diff --git a/arch/xtensa/kernel/traps.c b/arch/xtensa/kernel/traps.c
index 32c5207f1226..86507fa7c2d7 100644
--- a/arch/xtensa/kernel/traps.c
+++ b/arch/xtensa/kernel/traps.c
@@ -323,8 +323,6 @@ do_illegal_instruction(struct pt_regs *regs)
void
do_unaligned_user (struct pt_regs *regs)
{
- siginfo_t info;
-
__die_if_kernel("Unhandled unaligned exception in kernel",
regs, SIGKILL);
@@ -334,12 +332,7 @@ do_unaligned_user (struct pt_regs *regs)
"(pid = %d, pc = %#010lx)\n",
regs->excvaddr, current->comm,
task_pid_nr(current), regs->pc);
- info.si_signo = SIGBUS;
- info.si_errno = 0;
- info.si_code = BUS_ADRALN;
- info.si_addr = (void *) regs->excvaddr;
- force_sig_info(SIGSEGV, &info, current);
-
+ force_sig_fault(SIGBUS, BUS_ADRALN, (void *) regs->excvaddr, current);
}
#endif
diff --git a/arch/xtensa/mm/fault.c b/arch/xtensa/mm/fault.c
index 8b9b6f44bb06..c111a833205a 100644
--- a/arch/xtensa/mm/fault.c
+++ b/arch/xtensa/mm/fault.c
@@ -39,13 +39,13 @@ void do_page_fault(struct pt_regs *regs)
struct mm_struct *mm = current->mm;
unsigned int exccause = regs->exccause;
unsigned int address = regs->excvaddr;
- siginfo_t info;
+ int code;
int is_write, is_exec;
int fault;
unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
- info.si_code = SEGV_MAPERR;
+ code = SEGV_MAPERR;
/* We fault-in kernel-space virtual memory on-demand. The
* 'reference' page table is init_mm.pgd.
@@ -91,7 +91,7 @@ retry:
*/
good_area:
- info.si_code = SEGV_ACCERR;
+ code = SEGV_ACCERR;
if (is_write) {
if (!(vma->vm_flags & VM_WRITE))
@@ -157,11 +157,7 @@ bad_area:
if (user_mode(regs)) {
current->thread.bad_vaddr = address;
current->thread.error_code = is_write;
- info.si_signo = SIGSEGV;
- info.si_errno = 0;
- /* info.si_code has been set above */
- info.si_addr = (void *) address;
- force_sig_info(SIGSEGV, &info, current);
+ force_sig_fault(SIGSEGV, code, (void *) address, current);
return;
}
bad_page_fault(regs, address, SIGSEGV);
@@ -186,11 +182,7 @@ do_sigbus:
* or user mode.
*/
current->thread.bad_vaddr = address;
- info.si_code = SIGBUS;
- info.si_errno = 0;
- info.si_code = BUS_ADRERR;
- info.si_addr = (void *) address;
- force_sig_info(SIGBUS, &info, current);
+ force_sig_fault(SIGBUS, BUS_ADRERR, (void *) address, current);
/* Kernel mode? Handle exceptions or die */
if (!user_mode(regs))
diff --git a/arch/xtensa/platforms/iss/console.c b/arch/xtensa/platforms/iss/console.c
index 92f567f9a21e..af81a62faba6 100644
--- a/arch/xtensa/platforms/iss/console.c
+++ b/arch/xtensa/platforms/iss/console.c
@@ -153,19 +153,6 @@ static int rs_proc_show(struct seq_file *m, void *v)
return 0;
}
-static int rs_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, rs_proc_show, NULL);
-}
-
-static const struct file_operations rs_proc_fops = {
- .owner = THIS_MODULE,
- .open = rs_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
static const struct tty_operations serial_ops = {
.open = rs_open,
.close = rs_close,
@@ -176,7 +163,7 @@ static const struct tty_operations serial_ops = {
.chars_in_buffer = rs_chars_in_buffer,
.hangup = rs_hangup,
.wait_until_sent = rs_wait_until_sent,
- .proc_fops = &rs_proc_fops,
+ .proc_show = rs_proc_show,
};
int __init rs_init(void)
diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c
index d819dc77fe65..a9e8633388f4 100644
--- a/block/bfq-cgroup.c
+++ b/block/bfq-cgroup.c
@@ -55,13 +55,13 @@ BFQG_FLAG_FNS(empty)
/* This should be called with the scheduler lock held. */
static void bfqg_stats_update_group_wait_time(struct bfqg_stats *stats)
{
- unsigned long long now;
+ u64 now;
if (!bfqg_stats_waiting(stats))
return;
- now = sched_clock();
- if (time_after64(now, stats->start_group_wait_time))
+ now = ktime_get_ns();
+ if (now > stats->start_group_wait_time)
blkg_stat_add(&stats->group_wait_time,
now - stats->start_group_wait_time);
bfqg_stats_clear_waiting(stats);
@@ -77,20 +77,20 @@ static void bfqg_stats_set_start_group_wait_time(struct bfq_group *bfqg,
return;
if (bfqg == curr_bfqg)
return;
- stats->start_group_wait_time = sched_clock();
+ stats->start_group_wait_time = ktime_get_ns();
bfqg_stats_mark_waiting(stats);
}
/* This should be called with the scheduler lock held. */
static void bfqg_stats_end_empty_time(struct bfqg_stats *stats)
{
- unsigned long long now;
+ u64 now;
if (!bfqg_stats_empty(stats))
return;
- now = sched_clock();
- if (time_after64(now, stats->start_empty_time))
+ now = ktime_get_ns();
+ if (now > stats->start_empty_time)
blkg_stat_add(&stats->empty_time,
now - stats->start_empty_time);
bfqg_stats_clear_empty(stats);
@@ -116,7 +116,7 @@ void bfqg_stats_set_start_empty_time(struct bfq_group *bfqg)
if (bfqg_stats_empty(stats))
return;
- stats->start_empty_time = sched_clock();
+ stats->start_empty_time = ktime_get_ns();
bfqg_stats_mark_empty(stats);
}
@@ -125,9 +125,9 @@ void bfqg_stats_update_idle_time(struct bfq_group *bfqg)
struct bfqg_stats *stats = &bfqg->stats;
if (bfqg_stats_idling(stats)) {
- unsigned long long now = sched_clock();
+ u64 now = ktime_get_ns();
- if (time_after64(now, stats->start_idle_time))
+ if (now > stats->start_idle_time)
blkg_stat_add(&stats->idle_time,
now - stats->start_idle_time);
bfqg_stats_clear_idling(stats);
@@ -138,7 +138,7 @@ void bfqg_stats_set_start_idle_time(struct bfq_group *bfqg)
{
struct bfqg_stats *stats = &bfqg->stats;
- stats->start_idle_time = sched_clock();
+ stats->start_idle_time = ktime_get_ns();
bfqg_stats_mark_idling(stats);
}
@@ -171,18 +171,18 @@ void bfqg_stats_update_io_merged(struct bfq_group *bfqg, unsigned int op)
blkg_rwstat_add(&bfqg->stats.merged, op, 1);
}
-void bfqg_stats_update_completion(struct bfq_group *bfqg, uint64_t start_time,
- uint64_t io_start_time, unsigned int op)
+void bfqg_stats_update_completion(struct bfq_group *bfqg, u64 start_time_ns,
+ u64 io_start_time_ns, unsigned int op)
{
struct bfqg_stats *stats = &bfqg->stats;
- unsigned long long now = sched_clock();
+ u64 now = ktime_get_ns();
- if (time_after64(now, io_start_time))
+ if (now > io_start_time_ns)
blkg_rwstat_add(&stats->service_time, op,
- now - io_start_time);
- if (time_after64(io_start_time, start_time))
+ now - io_start_time_ns);
+ if (io_start_time_ns > start_time_ns)
blkg_rwstat_add(&stats->wait_time, op,
- io_start_time - start_time);
+ io_start_time_ns - start_time_ns);
}
#else /* CONFIG_BFQ_GROUP_IOSCHED && CONFIG_DEBUG_BLK_CGROUP */
@@ -191,8 +191,8 @@ void bfqg_stats_update_io_add(struct bfq_group *bfqg, struct bfq_queue *bfqq,
unsigned int op) { }
void bfqg_stats_update_io_remove(struct bfq_group *bfqg, unsigned int op) { }
void bfqg_stats_update_io_merged(struct bfq_group *bfqg, unsigned int op) { }
-void bfqg_stats_update_completion(struct bfq_group *bfqg, uint64_t start_time,
- uint64_t io_start_time, unsigned int op) { }
+void bfqg_stats_update_completion(struct bfq_group *bfqg, u64 start_time_ns,
+ u64 io_start_time_ns, unsigned int op) { }
void bfqg_stats_update_dequeue(struct bfq_group *bfqg) { }
void bfqg_stats_set_start_empty_time(struct bfq_group *bfqg) { }
void bfqg_stats_update_idle_time(struct bfq_group *bfqg) { }
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index 771ae9730ac6..495b9ddb3355 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -49,9 +49,39 @@
*
* In particular, to provide these low-latency guarantees, BFQ
* explicitly privileges the I/O of two classes of time-sensitive
- * applications: interactive and soft real-time. This feature enables
- * BFQ to provide applications in these classes with a very low
- * latency. Finally, BFQ also features additional heuristics for
+ * applications: interactive and soft real-time. In more detail, BFQ
+ * behaves this way if the low_latency parameter is set (default
+ * configuration). This feature enables BFQ to provide applications in
+ * these classes with a very low latency.
+ *
+ * To implement this feature, BFQ constantly tries to detect whether
+ * the I/O requests in a bfq_queue come from an interactive or a soft
+ * real-time application. For brevity, in these cases, the queue is
+ * said to be interactive or soft real-time. In both cases, BFQ
+ * privileges the service of the queue, over that of non-interactive
+ * and non-soft-real-time queues. This privileging is performed,
+ * mainly, by raising the weight of the queue. So, for brevity, we
+ * call just weight-raising periods the time periods during which a
+ * queue is privileged, because deemed interactive or soft real-time.
+ *
+ * The detection of soft real-time queues/applications is described in
+ * detail in the comments on the function
+ * bfq_bfqq_softrt_next_start. On the other hand, the detection of an
+ * interactive queue works as follows: a queue is deemed interactive
+ * if it is constantly non empty only for a limited time interval,
+ * after which it does become empty. The queue may be deemed
+ * interactive again (for a limited time), if it restarts being
+ * constantly non empty, provided that this happens only after the
+ * queue has remained empty for a given minimum idle time.
+ *
+ * By default, BFQ computes automatically the above maximum time
+ * interval, i.e., the time interval after which a constantly
+ * non-empty queue stops being deemed interactive. Since a queue is
+ * weight-raised while it is deemed interactive, this maximum time
+ * interval happens to coincide with the (maximum) duration of the
+ * weight-raising for interactive queues.
+ *
+ * Finally, BFQ also features additional heuristics for
* preserving both a low latency and a high throughput on NCQ-capable,
* rotational or flash-based devices, and to get the job done quickly
* for applications consisting in many I/O-bound processes.
@@ -61,14 +91,14 @@
* all low-latency heuristics for that device, by setting low_latency
* to 0.
*
- * BFQ is described in [1], where also a reference to the initial, more
- * theoretical paper on BFQ can be found. The interested reader can find
- * in the latter paper full details on the main algorithm, as well as
- * formulas of the guarantees and formal proofs of all the properties.
- * With respect to the version of BFQ presented in these papers, this
- * implementation adds a few more heuristics, such as the one that
- * guarantees a low latency to soft real-time applications, and a
- * hierarchical extension based on H-WF2Q+.
+ * BFQ is described in [1], where also a reference to the initial,
+ * more theoretical paper on BFQ can be found. The interested reader
+ * can find in the latter paper full details on the main algorithm, as
+ * well as formulas of the guarantees and formal proofs of all the
+ * properties. With respect to the version of BFQ presented in these
+ * papers, this implementation adds a few more heuristics, such as the
+ * ones that guarantee a low latency to interactive and soft real-time
+ * applications, and a hierarchical extension based on H-WF2Q+.
*
* B-WF2Q+ is based on WF2Q+, which is described in [2], together with
* H-WF2Q+, while the augmented tree used here to implement B-WF2Q+
@@ -218,56 +248,46 @@ static struct kmem_cache *bfq_pool;
#define BFQ_RATE_SHIFT 16
/*
- * By default, BFQ computes the duration of the weight raising for
- * interactive applications automatically, using the following formula:
- * duration = (R / r) * T, where r is the peak rate of the device, and
- * R and T are two reference parameters.
- * In particular, R is the peak rate of the reference device (see
- * below), and T is a reference time: given the systems that are
- * likely to be installed on the reference device according to its
- * speed class, T is about the maximum time needed, under BFQ and
- * while reading two files in parallel, to load typical large
- * applications on these systems (see the comments on
- * max_service_from_wr below, for more details on how T is obtained).
- * In practice, the slower/faster the device at hand is, the more/less
- * it takes to load applications with respect to the reference device.
- * Accordingly, the longer/shorter BFQ grants weight raising to
- * interactive applications.
- *
- * BFQ uses four different reference pairs (R, T), depending on:
- * . whether the device is rotational or non-rotational;
- * . whether the device is slow, such as old or portable HDDs, as well as
- * SD cards, or fast, such as newer HDDs and SSDs.
+ * When configured for computing the duration of the weight-raising
+ * for interactive queues automatically (see the comments at the
+ * beginning of this file), BFQ does it using the following formula:
+ * duration = (ref_rate / r) * ref_wr_duration,
+ * where r is the peak rate of the device, and ref_rate and
+ * ref_wr_duration are two reference parameters. In particular,
+ * ref_rate is the peak rate of the reference storage device (see
+ * below), and ref_wr_duration is about the maximum time needed, with
+ * BFQ and while reading two files in parallel, to load typical large
+ * applications on the reference device (see the comments on
+ * max_service_from_wr below, for more details on how ref_wr_duration
+ * is obtained). In practice, the slower/faster the device at hand
+ * is, the more/less it takes to load applications with respect to the
+ * reference device. Accordingly, the longer/shorter BFQ grants
+ * weight raising to interactive applications.
*
- * The device's speed class is dynamically (re)detected in
- * bfq_update_peak_rate() every time the estimated peak rate is updated.
+ * BFQ uses two different reference pairs (ref_rate, ref_wr_duration),
+ * depending on whether the device is rotational or non-rotational.
*
- * In the following definitions, R_slow[0]/R_fast[0] and
- * T_slow[0]/T_fast[0] are the reference values for a slow/fast
- * rotational device, whereas R_slow[1]/R_fast[1] and
- * T_slow[1]/T_fast[1] are the reference values for a slow/fast
- * non-rotational device. Finally, device_speed_thresh are the
- * thresholds used to switch between speed classes. The reference
- * rates are not the actual peak rates of the devices used as a
- * reference, but slightly lower values. The reason for using these
- * slightly lower values is that the peak-rate estimator tends to
- * yield slightly lower values than the actual peak rate (it can yield
- * the actual peak rate only if there is only one process doing I/O,
- * and the process does sequential I/O).
+ * In the following definitions, ref_rate[0] and ref_wr_duration[0]
+ * are the reference values for a rotational device, whereas
+ * ref_rate[1] and ref_wr_duration[1] are the reference values for a
+ * non-rotational device. The reference rates are not the actual peak
+ * rates of the devices used as a reference, but slightly lower
+ * values. The reason for using slightly lower values is that the
+ * peak-rate estimator tends to yield slightly lower values than the
+ * actual peak rate (it can yield the actual peak rate only if there
+ * is only one process doing I/O, and the process does sequential
+ * I/O).
*
- * Both the reference peak rates and the thresholds are measured in
- * sectors/usec, left-shifted by BFQ_RATE_SHIFT.
+ * The reference peak rates are measured in sectors/usec, left-shifted
+ * by BFQ_RATE_SHIFT.
*/
-static int R_slow[2] = {1000, 10700};
-static int R_fast[2] = {14000, 33000};
+static int ref_rate[2] = {14000, 33000};
/*
- * To improve readability, a conversion function is used to initialize the
- * following arrays, which entails that they can be initialized only in a
- * function.
+ * To improve readability, a conversion function is used to initialize
+ * the following array, which entails that the array can be
+ * initialized only in a function.
*/
-static int T_slow[2];
-static int T_fast[2];
-static int device_speed_thresh[2];
+static int ref_wr_duration[2];
/*
* BFQ uses the above-detailed, time-based weight-raising mechanism to
@@ -487,46 +507,6 @@ static struct request *bfq_choose_req(struct bfq_data *bfqd,
}
/*
- * See the comments on bfq_limit_depth for the purpose of
- * the depths set in the function.
- */
-static void bfq_update_depths(struct bfq_data *bfqd, struct sbitmap_queue *bt)
-{
- bfqd->sb_shift = bt->sb.shift;
-
- /*
- * In-word depths if no bfq_queue is being weight-raised:
- * leaving 25% of tags only for sync reads.
- *
- * In next formulas, right-shift the value
- * (1U<<bfqd->sb_shift), instead of computing directly
- * (1U<<(bfqd->sb_shift - something)), to be robust against
- * any possible value of bfqd->sb_shift, without having to
- * limit 'something'.
- */
- /* no more than 50% of tags for async I/O */
- bfqd->word_depths[0][0] = max((1U<<bfqd->sb_shift)>>1, 1U);
- /*
- * no more than 75% of tags for sync writes (25% extra tags
- * w.r.t. async I/O, to prevent async I/O from starving sync
- * writes)
- */
- bfqd->word_depths[0][1] = max(((1U<<bfqd->sb_shift) * 3)>>2, 1U);
-
- /*
- * In-word depths in case some bfq_queue is being weight-
- * raised: leaving ~63% of tags for sync reads. This is the
- * highest percentage for which, in our tests, application
- * start-up times didn't suffer from any regression due to tag
- * shortage.
- */
- /* no more than ~18% of tags for async I/O */
- bfqd->word_depths[1][0] = max(((1U<<bfqd->sb_shift) * 3)>>4, 1U);
- /* no more than ~37% of tags for sync writes (~20% extra tags) */
- bfqd->word_depths[1][1] = max(((1U<<bfqd->sb_shift) * 6)>>4, 1U);
-}
-
-/*
* Async I/O can easily starve sync I/O (both sync reads and sync
* writes), by consuming all tags. Similarly, storms of sync writes,
* such as those that sync(2) may trigger, can starve sync reads.
@@ -535,25 +515,11 @@ static void bfq_update_depths(struct bfq_data *bfqd, struct sbitmap_queue *bt)
*/
static void bfq_limit_depth(unsigned int op, struct blk_mq_alloc_data *data)
{
- struct blk_mq_tags *tags = blk_mq_tags_from_data(data);
struct bfq_data *bfqd = data->q->elevator->elevator_data;
- struct sbitmap_queue *bt;
if (op_is_sync(op) && !op_is_write(op))
return;
- if (data->flags & BLK_MQ_REQ_RESERVED) {
- if (unlikely(!tags->nr_reserved_tags)) {
- WARN_ON_ONCE(1);
- return;
- }
- bt = &tags->breserved_tags;
- } else
- bt = &tags->bitmap_tags;
-
- if (unlikely(bfqd->sb_shift != bt->sb.shift))
- bfq_update_depths(bfqd, bt);
-
data->shallow_depth =
bfqd->word_depths[!!bfqd->wr_busy_queues][op_is_sync(op)];
@@ -906,26 +872,30 @@ static unsigned int bfq_wr_duration(struct bfq_data *bfqd)
if (bfqd->bfq_wr_max_time > 0)
return bfqd->bfq_wr_max_time;
- dur = bfqd->RT_prod;
+ dur = bfqd->rate_dur_prod;
do_div(dur, bfqd->peak_rate);
/*
- * Limit duration between 3 and 13 seconds. Tests show that
- * higher values than 13 seconds often yield the opposite of
- * the desired result, i.e., worsen responsiveness by letting
- * non-interactive and non-soft-real-time applications
- * preserve weight raising for a too long time interval.
+ * Limit duration between 3 and 25 seconds. The upper limit
+ * has been conservatively set after the following worst case:
+ * on a QEMU/KVM virtual machine
+ * - running in a slow PC
+ * - with a virtual disk stacked on a slow low-end 5400rpm HDD
+ * - serving a heavy I/O workload, such as the sequential reading
+ * of several files
+ * mplayer took 23 seconds to start, if constantly weight-raised.
+ *
+ * As for higher values than that accomodating the above bad
+ * scenario, tests show that higher values would often yield
+ * the opposite of the desired result, i.e., would worsen
+ * responsiveness by allowing non-interactive applications to
+ * preserve weight raising for too long.
*
* On the other end, lower values than 3 seconds make it
* difficult for most interactive tasks to complete their jobs
* before weight-raising finishes.
*/
- if (dur > msecs_to_jiffies(13000))
- dur = msecs_to_jiffies(13000);
- else if (dur < msecs_to_jiffies(3000))
- dur = msecs_to_jiffies(3000);
-
- return dur;
+ return clamp_val(dur, msecs_to_jiffies(3000), msecs_to_jiffies(25000));
}
/* switch back from soft real-time to interactive weight raising */
@@ -1393,15 +1363,6 @@ static bool bfq_bfqq_update_budg_for_activation(struct bfq_data *bfqd,
}
/*
- * Return the farthest future time instant according to jiffies
- * macros.
- */
-static unsigned long bfq_greatest_from_now(void)
-{
- return jiffies + MAX_JIFFY_OFFSET;
-}
-
-/*
* Return the farthest past time instant according to jiffies
* macros.
*/
@@ -1545,7 +1506,8 @@ static void bfq_bfqq_handle_idle_busy_switch(struct bfq_data *bfqd,
in_burst = bfq_bfqq_in_large_burst(bfqq);
soft_rt = bfqd->bfq_wr_max_softrt_rate > 0 &&
!in_burst &&
- time_is_before_jiffies(bfqq->soft_rt_next_start);
+ time_is_before_jiffies(bfqq->soft_rt_next_start) &&
+ bfqq->dispatched == 0;
*interactive = !in_burst && idle_for_long_time;
wr_or_deserves_wr = bfqd->low_latency &&
(bfqq->wr_coeff > 1 ||
@@ -1858,6 +1820,8 @@ static int bfq_request_merge(struct request_queue *q, struct request **req,
return ELEVATOR_NO_MERGE;
}
+static struct bfq_queue *bfq_init_rq(struct request *rq);
+
static void bfq_request_merged(struct request_queue *q, struct request *req,
enum elv_merge type)
{
@@ -1866,7 +1830,7 @@ static void bfq_request_merged(struct request_queue *q, struct request *req,
blk_rq_pos(req) <
blk_rq_pos(container_of(rb_prev(&req->rb_node),
struct request, rb_node))) {
- struct bfq_queue *bfqq = RQ_BFQQ(req);
+ struct bfq_queue *bfqq = bfq_init_rq(req);
struct bfq_data *bfqd = bfqq->bfqd;
struct request *prev, *next_rq;
@@ -1891,14 +1855,25 @@ static void bfq_request_merged(struct request_queue *q, struct request *req,
}
}
+/*
+ * This function is called to notify the scheduler that the requests
+ * rq and 'next' have been merged, with 'next' going away. BFQ
+ * exploits this hook to address the following issue: if 'next' has a
+ * fifo_time lower that rq, then the fifo_time of rq must be set to
+ * the value of 'next', to not forget the greater age of 'next'.
+ *
+ * NOTE: in this function we assume that rq is in a bfq_queue, basing
+ * on that rq is picked from the hash table q->elevator->hash, which,
+ * in its turn, is filled only with I/O requests present in
+ * bfq_queues, while BFQ is in use for the request queue q. In fact,
+ * the function that fills this hash table (elv_rqhash_add) is called
+ * only by bfq_insert_request.
+ */
static void bfq_requests_merged(struct request_queue *q, struct request *rq,
struct request *next)
{
- struct bfq_queue *bfqq = RQ_BFQQ(rq), *next_bfqq = RQ_BFQQ(next);
-
- if (!RB_EMPTY_NODE(&rq->rb_node))
- goto end;
- spin_lock_irq(&bfqq->bfqd->lock);
+ struct bfq_queue *bfqq = bfq_init_rq(rq),
+ *next_bfqq = bfq_init_rq(next);
/*
* If next and rq belong to the same bfq_queue and next is older
@@ -1920,11 +1895,6 @@ static void bfq_requests_merged(struct request_queue *q, struct request *rq,
if (bfqq->next_rq == next)
bfqq->next_rq = rq;
- bfq_remove_request(q, next);
- bfqg_stats_update_io_remove(bfqq_group(bfqq), next->cmd_flags);
-
- spin_unlock_irq(&bfqq->bfqd->lock);
-end:
bfqg_stats_update_io_merged(bfqq_group(bfqq), next->cmd_flags);
}
@@ -2506,37 +2476,15 @@ static unsigned long bfq_calc_max_budget(struct bfq_data *bfqd)
/*
* Update parameters related to throughput and responsiveness, as a
* function of the estimated peak rate. See comments on
- * bfq_calc_max_budget(), and on T_slow and T_fast arrays.
+ * bfq_calc_max_budget(), and on the ref_wr_duration array.
*/
static void update_thr_responsiveness_params(struct bfq_data *bfqd)
{
- int dev_type = blk_queue_nonrot(bfqd->queue);
-
- if (bfqd->bfq_user_max_budget == 0)
+ if (bfqd->bfq_user_max_budget == 0) {
bfqd->bfq_max_budget =
bfq_calc_max_budget(bfqd);
-
- if (bfqd->device_speed == BFQ_BFQD_FAST &&
- bfqd->peak_rate < device_speed_thresh[dev_type]) {
- bfqd->device_speed = BFQ_BFQD_SLOW;
- bfqd->RT_prod = R_slow[dev_type] *
- T_slow[dev_type];
- } else if (bfqd->device_speed == BFQ_BFQD_SLOW &&
- bfqd->peak_rate > device_speed_thresh[dev_type]) {
- bfqd->device_speed = BFQ_BFQD_FAST;
- bfqd->RT_prod = R_fast[dev_type] *
- T_fast[dev_type];
+ bfq_log(bfqd, "new max_budget = %d", bfqd->bfq_max_budget);
}
-
- bfq_log(bfqd,
-"dev_type %s dev_speed_class = %s (%llu sects/sec), thresh %llu setcs/sec",
- dev_type == 0 ? "ROT" : "NONROT",
- bfqd->device_speed == BFQ_BFQD_FAST ? "FAST" : "SLOW",
- bfqd->device_speed == BFQ_BFQD_FAST ?
- (USEC_PER_SEC*(u64)R_fast[dev_type])>>BFQ_RATE_SHIFT :
- (USEC_PER_SEC*(u64)R_slow[dev_type])>>BFQ_RATE_SHIFT,
- (USEC_PER_SEC*(u64)device_speed_thresh[dev_type])>>
- BFQ_RATE_SHIFT);
}
static void bfq_reset_rate_computation(struct bfq_data *bfqd,
@@ -3266,23 +3214,6 @@ void bfq_bfqq_expire(struct bfq_data *bfqd,
bfq_bfqq_softrt_next_start(bfqd, bfqq);
else {
/*
- * The application is still waiting for the
- * completion of one or more requests:
- * prevent it from possibly being incorrectly
- * deemed as soft real-time by setting its
- * soft_rt_next_start to infinity. In fact,
- * without this assignment, the application
- * would be incorrectly deemed as soft
- * real-time if:
- * 1) it issued a new request before the
- * completion of all its in-flight
- * requests, and
- * 2) at that time, its soft_rt_next_start
- * happened to be in the past.
- */
- bfqq->soft_rt_next_start =
- bfq_greatest_from_now();
- /*
* Schedule an update of soft_rt_next_start to when
* the task may be discovered to be isochronous.
*/
@@ -4540,14 +4471,12 @@ static inline void bfq_update_insert_stats(struct request_queue *q,
unsigned int cmd_flags) {}
#endif
-static void bfq_prepare_request(struct request *rq, struct bio *bio);
-
static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
bool at_head)
{
struct request_queue *q = hctx->queue;
struct bfq_data *bfqd = q->elevator->elevator_data;
- struct bfq_queue *bfqq = RQ_BFQQ(rq);
+ struct bfq_queue *bfqq;
bool idle_timer_disabled = false;
unsigned int cmd_flags;
@@ -4562,24 +4491,13 @@ static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
blk_mq_sched_request_inserted(rq);
spin_lock_irq(&bfqd->lock);
+ bfqq = bfq_init_rq(rq);
if (at_head || blk_rq_is_passthrough(rq)) {
if (at_head)
list_add(&rq->queuelist, &bfqd->dispatch);
else
list_add_tail(&rq->queuelist, &bfqd->dispatch);
- } else {
- if (WARN_ON_ONCE(!bfqq)) {
- /*
- * This should never happen. Most likely rq is
- * a requeued regular request, being
- * re-inserted without being first
- * re-prepared. Do a prepare, to avoid
- * failure.
- */
- bfq_prepare_request(rq, rq->bio);
- bfqq = RQ_BFQQ(rq);
- }
-
+ } else { /* bfqq is assumed to be non null here */
idle_timer_disabled = __bfq_insert_request(bfqd, rq);
/*
* Update bfqq, because, if a queue merge has occurred
@@ -4778,8 +4696,8 @@ static void bfq_finish_requeue_request(struct request *rq)
if (rq->rq_flags & RQF_STARTED)
bfqg_stats_update_completion(bfqq_group(bfqq),
- rq_start_time_ns(rq),
- rq_io_start_time_ns(rq),
+ rq->start_time_ns,
+ rq->io_start_time_ns,
rq->cmd_flags);
if (likely(rq->rq_flags & RQF_STARTED)) {
@@ -4922,11 +4840,48 @@ static struct bfq_queue *bfq_get_bfqq_handle_split(struct bfq_data *bfqd,
}
/*
- * Allocate bfq data structures associated with this request.
+ * Only reset private fields. The actual request preparation will be
+ * performed by bfq_init_rq, when rq is either inserted or merged. See
+ * comments on bfq_init_rq for the reason behind this delayed
+ * preparation.
*/
static void bfq_prepare_request(struct request *rq, struct bio *bio)
{
+ /*
+ * Regardless of whether we have an icq attached, we have to
+ * clear the scheduler pointers, as they might point to
+ * previously allocated bic/bfqq structs.
+ */
+ rq->elv.priv[0] = rq->elv.priv[1] = NULL;
+}
+
+/*
+ * If needed, init rq, allocate bfq data structures associated with
+ * rq, and increment reference counters in the destination bfq_queue
+ * for rq. Return the destination bfq_queue for rq, or NULL is rq is
+ * not associated with any bfq_queue.
+ *
+ * This function is invoked by the functions that perform rq insertion
+ * or merging. One may have expected the above preparation operations
+ * to be performed in bfq_prepare_request, and not delayed to when rq
+ * is inserted or merged. The rationale behind this delayed
+ * preparation is that, after the prepare_request hook is invoked for
+ * rq, rq may still be transformed into a request with no icq, i.e., a
+ * request not associated with any queue. No bfq hook is invoked to
+ * signal this tranformation. As a consequence, should these
+ * preparation operations be performed when the prepare_request hook
+ * is invoked, and should rq be transformed one moment later, bfq
+ * would end up in an inconsistent state, because it would have
+ * incremented some queue counters for an rq destined to
+ * transformation, without any chance to correctly lower these
+ * counters back. In contrast, no transformation can still happen for
+ * rq after rq has been inserted or merged. So, it is safe to execute
+ * these preparation operations when rq is finally inserted or merged.
+ */
+static struct bfq_queue *bfq_init_rq(struct request *rq)
+{
struct request_queue *q = rq->q;
+ struct bio *bio = rq->bio;
struct bfq_data *bfqd = q->elevator->elevator_data;
struct bfq_io_cq *bic;
const int is_sync = rq_is_sync(rq);
@@ -4934,20 +4889,21 @@ static void bfq_prepare_request(struct request *rq, struct bio *bio)
bool new_queue = false;
bool bfqq_already_existing = false, split = false;
+ if (unlikely(!rq->elv.icq))
+ return NULL;
+
/*
- * Even if we don't have an icq attached, we should still clear
- * the scheduler pointers, as they might point to previously
- * allocated bic/bfqq structs.
+ * Assuming that elv.priv[1] is set only if everything is set
+ * for this rq. This holds true, because this function is
+ * invoked only for insertion or merging, and, after such
+ * events, a request cannot be manipulated any longer before
+ * being removed from bfq.
*/
- if (!rq->elv.icq) {
- rq->elv.priv[0] = rq->elv.priv[1] = NULL;
- return;
- }
+ if (rq->elv.priv[1])
+ return rq->elv.priv[1];
bic = icq_to_bic(rq->elv.icq);
- spin_lock_irq(&bfqd->lock);
-
bfq_check_ioprio_change(bic, bio);
bfq_bic_update_cgroup(bic, bio);
@@ -5006,7 +4962,7 @@ static void bfq_prepare_request(struct request *rq, struct bio *bio)
if (unlikely(bfq_bfqq_just_created(bfqq)))
bfq_handle_burst(bfqd, bfqq);
- spin_unlock_irq(&bfqd->lock);
+ return bfqq;
}
static void bfq_idle_slice_timer_body(struct bfq_queue *bfqq)
@@ -5105,6 +5061,64 @@ void bfq_put_async_queues(struct bfq_data *bfqd, struct bfq_group *bfqg)
__bfq_put_async_bfqq(bfqd, &bfqg->async_idle_bfqq);
}
+/*
+ * See the comments on bfq_limit_depth for the purpose of
+ * the depths set in the function. Return minimum shallow depth we'll use.
+ */
+static unsigned int bfq_update_depths(struct bfq_data *bfqd,
+ struct sbitmap_queue *bt)
+{
+ unsigned int i, j, min_shallow = UINT_MAX;
+
+ /*
+ * In-word depths if no bfq_queue is being weight-raised:
+ * leaving 25% of tags only for sync reads.
+ *
+ * In next formulas, right-shift the value
+ * (1U<<bt->sb.shift), instead of computing directly
+ * (1U<<(bt->sb.shift - something)), to be robust against
+ * any possible value of bt->sb.shift, without having to
+ * limit 'something'.
+ */
+ /* no more than 50% of tags for async I/O */
+ bfqd->word_depths[0][0] = max((1U << bt->sb.shift) >> 1, 1U);
+ /*
+ * no more than 75% of tags for sync writes (25% extra tags
+ * w.r.t. async I/O, to prevent async I/O from starving sync
+ * writes)
+ */
+ bfqd->word_depths[0][1] = max(((1U << bt->sb.shift) * 3) >> 2, 1U);
+
+ /*
+ * In-word depths in case some bfq_queue is being weight-
+ * raised: leaving ~63% of tags for sync reads. This is the
+ * highest percentage for which, in our tests, application
+ * start-up times didn't suffer from any regression due to tag
+ * shortage.
+ */
+ /* no more than ~18% of tags for async I/O */
+ bfqd->word_depths[1][0] = max(((1U << bt->sb.shift) * 3) >> 4, 1U);
+ /* no more than ~37% of tags for sync writes (~20% extra tags) */
+ bfqd->word_depths[1][1] = max(((1U << bt->sb.shift) * 6) >> 4, 1U);
+
+ for (i = 0; i < 2; i++)
+ for (j = 0; j < 2; j++)
+ min_shallow = min(min_shallow, bfqd->word_depths[i][j]);
+
+ return min_shallow;
+}
+
+static int bfq_init_hctx(struct blk_mq_hw_ctx *hctx, unsigned int index)
+{
+ struct bfq_data *bfqd = hctx->queue->elevator->elevator_data;
+ struct blk_mq_tags *tags = hctx->sched_tags;
+ unsigned int min_shallow;
+
+ min_shallow = bfq_update_depths(bfqd, &tags->bitmap_tags);
+ sbitmap_queue_min_shallow_depth(&tags->bitmap_tags, min_shallow);
+ return 0;
+}
+
static void bfq_exit_queue(struct elevator_queue *e)
{
struct bfq_data *bfqd = e->elevator_data;
@@ -5242,14 +5256,12 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e)
bfqd->wr_busy_queues = 0;
/*
- * Begin by assuming, optimistically, that the device is a
- * high-speed one, and that its peak rate is equal to 2/3 of
- * the highest reference rate.
+ * Begin by assuming, optimistically, that the device peak
+ * rate is equal to 2/3 of the highest reference rate.
*/
- bfqd->RT_prod = R_fast[blk_queue_nonrot(bfqd->queue)] *
- T_fast[blk_queue_nonrot(bfqd->queue)];
- bfqd->peak_rate = R_fast[blk_queue_nonrot(bfqd->queue)] * 2 / 3;
- bfqd->device_speed = BFQ_BFQD_FAST;
+ bfqd->rate_dur_prod = ref_rate[blk_queue_nonrot(bfqd->queue)] *
+ ref_wr_duration[blk_queue_nonrot(bfqd->queue)];
+ bfqd->peak_rate = ref_rate[blk_queue_nonrot(bfqd->queue)] * 2 / 3;
spin_lock_init(&bfqd->lock);
@@ -5526,6 +5538,7 @@ static struct elevator_type iosched_bfq_mq = {
.requests_merged = bfq_requests_merged,
.request_merged = bfq_request_merged,
.has_work = bfq_has_work,
+ .init_hctx = bfq_init_hctx,
.init_sched = bfq_init_queue,
.exit_sched = bfq_exit_queue,
},
@@ -5556,8 +5569,8 @@ static int __init bfq_init(void)
/*
* Times to load large popular applications for the typical
* systems installed on the reference devices (see the
- * comments before the definitions of the next two
- * arrays). Actually, we use slightly slower values, as the
+ * comments before the definition of the next
+ * array). Actually, we use slightly lower values, as the
* estimated peak rate tends to be smaller than the actual
* peak rate. The reason for this last fact is that estimates
* are computed over much shorter time intervals than the long
@@ -5566,25 +5579,8 @@ static int __init bfq_init(void)
* scheduler cannot rely on a peak-rate-evaluation workload to
* be run for a long time.
*/
- T_slow[0] = msecs_to_jiffies(3500); /* actually 4 sec */
- T_slow[1] = msecs_to_jiffies(6000); /* actually 6.5 sec */
- T_fast[0] = msecs_to_jiffies(7000); /* actually 8 sec */
- T_fast[1] = msecs_to_jiffies(2500); /* actually 3 sec */
-
- /*
- * Thresholds that determine the switch between speed classes
- * (see the comments before the definition of the array
- * device_speed_thresh). These thresholds are biased towards
- * transitions to the fast class. This is safer than the
- * opposite bias. In fact, a wrong transition to the slow
- * class results in short weight-raising periods, because the
- * speed of the device then tends to be higher that the
- * reference peak rate. On the opposite end, a wrong
- * transition to the fast class tends to increase
- * weight-raising periods, because of the opposite reason.
- */
- device_speed_thresh[0] = (4 * R_slow[0]) / 3;
- device_speed_thresh[1] = (4 * R_slow[1]) / 3;
+ ref_wr_duration[0] = msecs_to_jiffies(7000); /* actually 8 sec */
+ ref_wr_duration[1] = msecs_to_jiffies(2500); /* actually 3 sec */
ret = elv_register(&iosched_bfq_mq);
if (ret)
diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h
index ae2f3dadec44..0f712e03b035 100644
--- a/block/bfq-iosched.h
+++ b/block/bfq-iosched.h
@@ -399,11 +399,6 @@ struct bfq_io_cq {
struct bfq_ttime saved_ttime;
};
-enum bfq_device_speed {
- BFQ_BFQD_FAST,
- BFQ_BFQD_SLOW,
-};
-
/**
* struct bfq_data - per-device data structure.
*
@@ -611,12 +606,11 @@ struct bfq_data {
/* Max service-rate for a soft real-time queue, in sectors/sec */
unsigned int bfq_wr_max_softrt_rate;
/*
- * Cached value of the product R*T, used for computing the
- * maximum duration of weight raising automatically.
+ * Cached value of the product ref_rate*ref_wr_duration, used
+ * for computing the maximum duration of weight raising
+ * automatically.
*/
- u64 RT_prod;
- /* device-speed class for the low-latency heuristic */
- enum bfq_device_speed device_speed;
+ u64 rate_dur_prod;
/* fallback dummy bfqq for extreme OOM conditions */
struct bfq_queue oom_bfqq;
@@ -636,12 +630,6 @@ struct bfq_data {
struct bfq_queue *bio_bfqq;
/*
- * Cached sbitmap shift, used to compute depth limits in
- * bfq_update_depths.
- */
- unsigned int sb_shift;
-
- /*
* Depth limits used in bfq_limit_depth (see comments on the
* function)
*/
@@ -732,9 +720,9 @@ struct bfqg_stats {
/* total time with empty current active q with other requests queued */
struct blkg_stat empty_time;
/* fields after this shouldn't be cleared on stat reset */
- uint64_t start_group_wait_time;
- uint64_t start_idle_time;
- uint64_t start_empty_time;
+ u64 start_group_wait_time;
+ u64 start_idle_time;
+ u64 start_empty_time;
uint16_t flags;
#endif /* CONFIG_BFQ_GROUP_IOSCHED && CONFIG_DEBUG_BLK_CGROUP */
};
@@ -856,8 +844,8 @@ void bfqg_stats_update_io_add(struct bfq_group *bfqg, struct bfq_queue *bfqq,
unsigned int op);
void bfqg_stats_update_io_remove(struct bfq_group *bfqg, unsigned int op);
void bfqg_stats_update_io_merged(struct bfq_group *bfqg, unsigned int op);
-void bfqg_stats_update_completion(struct bfq_group *bfqg, uint64_t start_time,
- uint64_t io_start_time, unsigned int op);
+void bfqg_stats_update_completion(struct bfq_group *bfqg, u64 start_time_ns,
+ u64 io_start_time_ns, unsigned int op);
void bfqg_stats_update_dequeue(struct bfq_group *bfqg);
void bfqg_stats_set_start_empty_time(struct bfq_group *bfqg);
void bfqg_stats_update_idle_time(struct bfq_group *bfqg);
diff --git a/block/bio-integrity.c b/block/bio-integrity.c
index 9cfdd6c83b5b..add7c7c85335 100644
--- a/block/bio-integrity.c
+++ b/block/bio-integrity.c
@@ -56,12 +56,12 @@ struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio,
struct bio_set *bs = bio->bi_pool;
unsigned inline_vecs;
- if (!bs || !bs->bio_integrity_pool) {
+ if (!bs || !mempool_initialized(&bs->bio_integrity_pool)) {
bip = kmalloc(sizeof(struct bio_integrity_payload) +
sizeof(struct bio_vec) * nr_vecs, gfp_mask);
inline_vecs = nr_vecs;
} else {
- bip = mempool_alloc(bs->bio_integrity_pool, gfp_mask);
+ bip = mempool_alloc(&bs->bio_integrity_pool, gfp_mask);
inline_vecs = BIP_INLINE_VECS;
}
@@ -74,7 +74,7 @@ struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio,
unsigned long idx = 0;
bip->bip_vec = bvec_alloc(gfp_mask, nr_vecs, &idx,
- bs->bvec_integrity_pool);
+ &bs->bvec_integrity_pool);
if (!bip->bip_vec)
goto err;
bip->bip_max_vcnt = bvec_nr_vecs(idx);
@@ -90,7 +90,7 @@ struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio,
return bip;
err:
- mempool_free(bip, bs->bio_integrity_pool);
+ mempool_free(bip, &bs->bio_integrity_pool);
return ERR_PTR(-ENOMEM);
}
EXPORT_SYMBOL(bio_integrity_alloc);
@@ -111,10 +111,10 @@ static void bio_integrity_free(struct bio *bio)
kfree(page_address(bip->bip_vec->bv_page) +
bip->bip_vec->bv_offset);
- if (bs && bs->bio_integrity_pool) {
- bvec_free(bs->bvec_integrity_pool, bip->bip_vec, bip->bip_slab);
+ if (bs && mempool_initialized(&bs->bio_integrity_pool)) {
+ bvec_free(&bs->bvec_integrity_pool, bip->bip_vec, bip->bip_slab);
- mempool_free(bip, bs->bio_integrity_pool);
+ mempool_free(bip, &bs->bio_integrity_pool);
} else {
kfree(bip);
}
@@ -465,16 +465,15 @@ EXPORT_SYMBOL(bio_integrity_clone);
int bioset_integrity_create(struct bio_set *bs, int pool_size)
{
- if (bs->bio_integrity_pool)
+ if (mempool_initialized(&bs->bio_integrity_pool))
return 0;
- bs->bio_integrity_pool = mempool_create_slab_pool(pool_size, bip_slab);
- if (!bs->bio_integrity_pool)
+ if (mempool_init_slab_pool(&bs->bio_integrity_pool,
+ pool_size, bip_slab))
return -1;
- bs->bvec_integrity_pool = biovec_create_pool(pool_size);
- if (!bs->bvec_integrity_pool) {
- mempool_destroy(bs->bio_integrity_pool);
+ if (biovec_init_pool(&bs->bvec_integrity_pool, pool_size)) {
+ mempool_exit(&bs->bio_integrity_pool);
return -1;
}
@@ -484,8 +483,8 @@ EXPORT_SYMBOL(bioset_integrity_create);
void bioset_integrity_free(struct bio_set *bs)
{
- mempool_destroy(bs->bio_integrity_pool);
- mempool_destroy(bs->bvec_integrity_pool);
+ mempool_exit(&bs->bio_integrity_pool);
+ mempool_exit(&bs->bvec_integrity_pool);
}
EXPORT_SYMBOL(bioset_integrity_free);
diff --git a/block/bio.c b/block/bio.c
index 53e0f0a1ed94..595663e0281a 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -53,7 +53,7 @@ static struct biovec_slab bvec_slabs[BVEC_POOL_NR] __read_mostly = {
* fs_bio_set is the bio_set containing bio and iovec memory pools used by
* IO code that does not need private memory pools.
*/
-struct bio_set *fs_bio_set;
+struct bio_set fs_bio_set;
EXPORT_SYMBOL(fs_bio_set);
/*
@@ -254,7 +254,7 @@ static void bio_free(struct bio *bio)
bio_uninit(bio);
if (bs) {
- bvec_free(bs->bvec_pool, bio->bi_io_vec, BVEC_POOL_IDX(bio));
+ bvec_free(&bs->bvec_pool, bio->bi_io_vec, BVEC_POOL_IDX(bio));
/*
* If we have front padding, adjust the bio pointer before freeing
@@ -262,7 +262,7 @@ static void bio_free(struct bio *bio)
p = bio;
p -= bs->front_pad;
- mempool_free(p, bs->bio_pool);
+ mempool_free(p, &bs->bio_pool);
} else {
/* Bio was allocated by bio_kmalloc() */
kfree(bio);
@@ -454,7 +454,8 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, unsigned int nr_iovecs,
inline_vecs = nr_iovecs;
} else {
/* should not use nobvec bioset for nr_iovecs > 0 */
- if (WARN_ON_ONCE(!bs->bvec_pool && nr_iovecs > 0))
+ if (WARN_ON_ONCE(!mempool_initialized(&bs->bvec_pool) &&
+ nr_iovecs > 0))
return NULL;
/*
* generic_make_request() converts recursion to iteration; this
@@ -483,11 +484,11 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, unsigned int nr_iovecs,
bs->rescue_workqueue)
gfp_mask &= ~__GFP_DIRECT_RECLAIM;
- p = mempool_alloc(bs->bio_pool, gfp_mask);
+ p = mempool_alloc(&bs->bio_pool, gfp_mask);
if (!p && gfp_mask != saved_gfp) {
punt_bios_to_rescuer(bs);
gfp_mask = saved_gfp;
- p = mempool_alloc(bs->bio_pool, gfp_mask);
+ p = mempool_alloc(&bs->bio_pool, gfp_mask);
}
front_pad = bs->front_pad;
@@ -503,11 +504,11 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, unsigned int nr_iovecs,
if (nr_iovecs > inline_vecs) {
unsigned long idx = 0;
- bvl = bvec_alloc(gfp_mask, nr_iovecs, &idx, bs->bvec_pool);
+ bvl = bvec_alloc(gfp_mask, nr_iovecs, &idx, &bs->bvec_pool);
if (!bvl && gfp_mask != saved_gfp) {
punt_bios_to_rescuer(bs);
gfp_mask = saved_gfp;
- bvl = bvec_alloc(gfp_mask, nr_iovecs, &idx, bs->bvec_pool);
+ bvl = bvec_alloc(gfp_mask, nr_iovecs, &idx, &bs->bvec_pool);
}
if (unlikely(!bvl))
@@ -524,25 +525,25 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, unsigned int nr_iovecs,
return bio;
err_free:
- mempool_free(p, bs->bio_pool);
+ mempool_free(p, &bs->bio_pool);
return NULL;
}
EXPORT_SYMBOL(bio_alloc_bioset);
-void zero_fill_bio(struct bio *bio)
+void zero_fill_bio_iter(struct bio *bio, struct bvec_iter start)
{
unsigned long flags;
struct bio_vec bv;
struct bvec_iter iter;
- bio_for_each_segment(bv, bio, iter) {
+ __bio_for_each_segment(bv, bio, iter, start) {
char *data = bvec_kmap_irq(&bv, &flags);
memset(data, 0, bv.bv_len);
flush_dcache_page(bv.bv_page);
bvec_kunmap_irq(data, &flags);
}
}
-EXPORT_SYMBOL(zero_fill_bio);
+EXPORT_SYMBOL(zero_fill_bio_iter);
/**
* bio_put - release a reference to a bio
@@ -970,27 +971,68 @@ void bio_advance(struct bio *bio, unsigned bytes)
}
EXPORT_SYMBOL(bio_advance);
+void bio_copy_data_iter(struct bio *dst, struct bvec_iter *dst_iter,
+ struct bio *src, struct bvec_iter *src_iter)
+{
+ struct bio_vec src_bv, dst_bv;
+ void *src_p, *dst_p;
+ unsigned bytes;
+
+ while (src_iter->bi_size && dst_iter->bi_size) {
+ src_bv = bio_iter_iovec(src, *src_iter);
+ dst_bv = bio_iter_iovec(dst, *dst_iter);
+
+ bytes = min(src_bv.bv_len, dst_bv.bv_len);
+
+ src_p = kmap_atomic(src_bv.bv_page);
+ dst_p = kmap_atomic(dst_bv.bv_page);
+
+ memcpy(dst_p + dst_bv.bv_offset,
+ src_p + src_bv.bv_offset,
+ bytes);
+
+ kunmap_atomic(dst_p);
+ kunmap_atomic(src_p);
+
+ flush_dcache_page(dst_bv.bv_page);
+
+ bio_advance_iter(src, src_iter, bytes);
+ bio_advance_iter(dst, dst_iter, bytes);
+ }
+}
+EXPORT_SYMBOL(bio_copy_data_iter);
+
/**
- * bio_copy_data - copy contents of data buffers from one chain of bios to
- * another
- * @src: source bio list
- * @dst: destination bio list
- *
- * If @src and @dst are single bios, bi_next must be NULL - otherwise, treats
- * @src and @dst as linked lists of bios.
+ * bio_copy_data - copy contents of data buffers from one bio to another
+ * @src: source bio
+ * @dst: destination bio
*
* Stops when it reaches the end of either @src or @dst - that is, copies
* min(src->bi_size, dst->bi_size) bytes (or the equivalent for lists of bios).
*/
void bio_copy_data(struct bio *dst, struct bio *src)
{
- struct bvec_iter src_iter, dst_iter;
- struct bio_vec src_bv, dst_bv;
- void *src_p, *dst_p;
- unsigned bytes;
+ struct bvec_iter src_iter = src->bi_iter;
+ struct bvec_iter dst_iter = dst->bi_iter;
- src_iter = src->bi_iter;
- dst_iter = dst->bi_iter;
+ bio_copy_data_iter(dst, &dst_iter, src, &src_iter);
+}
+EXPORT_SYMBOL(bio_copy_data);
+
+/**
+ * bio_list_copy_data - copy contents of data buffers from one chain of bios to
+ * another
+ * @src: source bio list
+ * @dst: destination bio list
+ *
+ * Stops when it reaches the end of either the @src list or @dst list - that is,
+ * copies min(src->bi_size, dst->bi_size) bytes (or the equivalent for lists of
+ * bios).
+ */
+void bio_list_copy_data(struct bio *dst, struct bio *src)
+{
+ struct bvec_iter src_iter = src->bi_iter;
+ struct bvec_iter dst_iter = dst->bi_iter;
while (1) {
if (!src_iter.bi_size) {
@@ -1009,26 +1051,10 @@ void bio_copy_data(struct bio *dst, struct bio *src)
dst_iter = dst->bi_iter;
}
- src_bv = bio_iter_iovec(src, src_iter);
- dst_bv = bio_iter_iovec(dst, dst_iter);
-
- bytes = min(src_bv.bv_len, dst_bv.bv_len);
-
- src_p = kmap_atomic(src_bv.bv_page);
- dst_p = kmap_atomic(dst_bv.bv_page);
-
- memcpy(dst_p + dst_bv.bv_offset,
- src_p + src_bv.bv_offset,
- bytes);
-
- kunmap_atomic(dst_p);
- kunmap_atomic(src_p);
-
- bio_advance_iter(src, &src_iter, bytes);
- bio_advance_iter(dst, &dst_iter, bytes);
+ bio_copy_data_iter(dst, &dst_iter, src, &src_iter);
}
}
-EXPORT_SYMBOL(bio_copy_data);
+EXPORT_SYMBOL(bio_list_copy_data);
struct bio_map_data {
int is_our_pages;
@@ -1584,6 +1610,7 @@ void bio_set_pages_dirty(struct bio *bio)
set_page_dirty_lock(page);
}
}
+EXPORT_SYMBOL_GPL(bio_set_pages_dirty);
static void bio_release_pages(struct bio *bio)
{
@@ -1667,6 +1694,7 @@ void bio_check_pages_dirty(struct bio *bio)
bio_put(bio);
}
}
+EXPORT_SYMBOL_GPL(bio_check_pages_dirty);
void generic_start_io_acct(struct request_queue *q, int rw,
unsigned long sectors, struct hd_struct *part)
@@ -1749,6 +1777,9 @@ again:
if (!bio_integrity_endio(bio))
return;
+ if (WARN_ONCE(bio->bi_next, "driver left bi_next not NULL"))
+ bio->bi_next = NULL;
+
/*
* Need to have a real endio function for chained bios, otherwise
* various corner cases will break (like stacking block devices that
@@ -1848,30 +1879,38 @@ EXPORT_SYMBOL_GPL(bio_trim);
* create memory pools for biovec's in a bio_set.
* use the global biovec slabs created for general use.
*/
-mempool_t *biovec_create_pool(int pool_entries)
+int biovec_init_pool(mempool_t *pool, int pool_entries)
{
struct biovec_slab *bp = bvec_slabs + BVEC_POOL_MAX;
- return mempool_create_slab_pool(pool_entries, bp->slab);
+ return mempool_init_slab_pool(pool, pool_entries, bp->slab);
}
-void bioset_free(struct bio_set *bs)
+/*
+ * bioset_exit - exit a bioset initialized with bioset_init()
+ *
+ * May be called on a zeroed but uninitialized bioset (i.e. allocated with
+ * kzalloc()).
+ */
+void bioset_exit(struct bio_set *bs)
{
if (bs->rescue_workqueue)
destroy_workqueue(bs->rescue_workqueue);
+ bs->rescue_workqueue = NULL;
- mempool_destroy(bs->bio_pool);
- mempool_destroy(bs->bvec_pool);
+ mempool_exit(&bs->bio_pool);
+ mempool_exit(&bs->bvec_pool);
bioset_integrity_free(bs);
- bio_put_slab(bs);
-
- kfree(bs);
+ if (bs->bio_slab)
+ bio_put_slab(bs);
+ bs->bio_slab = NULL;
}
-EXPORT_SYMBOL(bioset_free);
+EXPORT_SYMBOL(bioset_exit);
/**
- * bioset_create - Create a bio_set
+ * bioset_init - Initialize a bio_set
+ * @bs: pool to initialize
* @pool_size: Number of bio and bio_vecs to cache in the mempool
* @front_pad: Number of bytes to allocate in front of the returned bio
* @flags: Flags to modify behavior, currently %BIOSET_NEED_BVECS
@@ -1890,16 +1929,12 @@ EXPORT_SYMBOL(bioset_free);
* dispatch queued requests when the mempool runs out of space.
*
*/
-struct bio_set *bioset_create(unsigned int pool_size,
- unsigned int front_pad,
- int flags)
+int bioset_init(struct bio_set *bs,
+ unsigned int pool_size,
+ unsigned int front_pad,
+ int flags)
{
unsigned int back_pad = BIO_INLINE_VECS * sizeof(struct bio_vec);
- struct bio_set *bs;
-
- bs = kzalloc(sizeof(*bs), GFP_KERNEL);
- if (!bs)
- return NULL;
bs->front_pad = front_pad;
@@ -1908,34 +1943,29 @@ struct bio_set *bioset_create(unsigned int pool_size,
INIT_WORK(&bs->rescue_work, bio_alloc_rescue);
bs->bio_slab = bio_find_or_create_slab(front_pad + back_pad);
- if (!bs->bio_slab) {
- kfree(bs);
- return NULL;
- }
+ if (!bs->bio_slab)
+ return -ENOMEM;
- bs->bio_pool = mempool_create_slab_pool(pool_size, bs->bio_slab);
- if (!bs->bio_pool)
+ if (mempool_init_slab_pool(&bs->bio_pool, pool_size, bs->bio_slab))
goto bad;
- if (flags & BIOSET_NEED_BVECS) {
- bs->bvec_pool = biovec_create_pool(pool_size);
- if (!bs->bvec_pool)
- goto bad;
- }
+ if ((flags & BIOSET_NEED_BVECS) &&
+ biovec_init_pool(&bs->bvec_pool, pool_size))
+ goto bad;
if (!(flags & BIOSET_NEED_RESCUER))
- return bs;
+ return 0;
bs->rescue_workqueue = alloc_workqueue("bioset", WQ_MEM_RECLAIM, 0);
if (!bs->rescue_workqueue)
goto bad;
- return bs;
+ return 0;
bad:
- bioset_free(bs);
- return NULL;
+ bioset_exit(bs);
+ return -ENOMEM;
}
-EXPORT_SYMBOL(bioset_create);
+EXPORT_SYMBOL(bioset_init);
#ifdef CONFIG_BLK_CGROUP
@@ -2020,11 +2050,10 @@ static int __init init_bio(void)
bio_integrity_init();
biovec_init_slabs();
- fs_bio_set = bioset_create(BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
- if (!fs_bio_set)
+ if (bioset_init(&fs_bio_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS))
panic("bio: can't allocate bios\n");
- if (bioset_integrity_create(fs_bio_set, BIO_POOL_SIZE))
+ if (bioset_integrity_create(&fs_bio_set, BIO_POOL_SIZE))
panic("bio: can't create integrity pool\n");
return 0;
diff --git a/block/blk-core.c b/block/blk-core.c
index 85909b431eb0..3f56be15f17e 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -196,15 +196,8 @@ void blk_rq_init(struct request_queue *q, struct request *rq)
RB_CLEAR_NODE(&rq->rb_node);
rq->tag = -1;
rq->internal_tag = -1;
- rq->start_time = jiffies;
- set_start_time_ns(rq);
+ rq->start_time_ns = ktime_get_ns();
rq->part = NULL;
- seqcount_init(&rq->gstate_seq);
- u64_stats_init(&rq->aborted_gstate_sync);
- /*
- * See comment of blk_mq_init_request
- */
- WRITE_ONCE(rq->gstate, MQ_RQ_GEN_INC);
}
EXPORT_SYMBOL(blk_rq_init);
@@ -280,6 +273,10 @@ static void req_bio_endio(struct request *rq, struct bio *bio,
bio_advance(bio, nbytes);
/* don't actually finish bio if it's part of flush sequence */
+ /*
+ * XXX this code looks suspicious - it's not consistent with advancing
+ * req->bio in caller
+ */
if (bio->bi_iter.bi_size == 0 && !(rq->rq_flags & RQF_FLUSH_SEQ))
bio_endio(bio);
}
@@ -360,7 +357,6 @@ EXPORT_SYMBOL(blk_start_queue_async);
void blk_start_queue(struct request_queue *q)
{
lockdep_assert_held(q->queue_lock);
- WARN_ON(!in_interrupt() && !irqs_disabled());
WARN_ON_ONCE(q->mq_ops);
queue_flag_clear(QUEUE_FLAG_STOPPED, q);
@@ -996,18 +992,24 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id,
spinlock_t *lock)
{
struct request_queue *q;
+ int ret;
q = kmem_cache_alloc_node(blk_requestq_cachep,
gfp_mask | __GFP_ZERO, node_id);
if (!q)
return NULL;
+ INIT_LIST_HEAD(&q->queue_head);
+ q->last_merge = NULL;
+ q->end_sector = 0;
+ q->boundary_rq = NULL;
+
q->id = ida_simple_get(&blk_queue_ida, 0, 0, gfp_mask);
if (q->id < 0)
goto fail_q;
- q->bio_split = bioset_create(BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
- if (!q->bio_split)
+ ret = bioset_init(&q->bio_split, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
+ if (ret)
goto fail_id;
q->backing_dev_info = bdi_alloc_node(gfp_mask, node_id);
@@ -1079,7 +1081,7 @@ fail_bdi:
fail_stats:
bdi_put(q->backing_dev_info);
fail_split:
- bioset_free(q->bio_split);
+ bioset_exit(&q->bio_split);
fail_id:
ida_simple_remove(&blk_queue_ida, q->id);
fail_q:
@@ -1173,16 +1175,8 @@ int blk_init_allocated_queue(struct request_queue *q)
q->sg_reserved_size = INT_MAX;
- /* Protect q->elevator from elevator_change */
- mutex_lock(&q->sysfs_lock);
-
- /* init elevator */
- if (elevator_init(q, NULL)) {
- mutex_unlock(&q->sysfs_lock);
+ if (elevator_init(q))
goto out_exit_flush_rq;
- }
-
- mutex_unlock(&q->sysfs_lock);
return 0;
out_exit_flush_rq:
@@ -1334,6 +1328,7 @@ int blk_update_nr_requests(struct request_queue *q, unsigned int nr)
* @op: operation and flags
* @bio: bio to allocate request for (can be %NULL)
* @flags: BLQ_MQ_REQ_* flags
+ * @gfp_mask: allocator flags
*
* Get a free request from @q. This function may fail under memory
* pressure or if @q is dead.
@@ -1343,7 +1338,7 @@ int blk_update_nr_requests(struct request_queue *q, unsigned int nr)
* Returns request pointer on success, with @q->queue_lock *not held*.
*/
static struct request *__get_request(struct request_list *rl, unsigned int op,
- struct bio *bio, blk_mq_req_flags_t flags)
+ struct bio *bio, blk_mq_req_flags_t flags, gfp_t gfp_mask)
{
struct request_queue *q = rl->q;
struct request *rq;
@@ -1352,8 +1347,6 @@ static struct request *__get_request(struct request_list *rl, unsigned int op,
struct io_cq *icq = NULL;
const bool is_sync = op_is_sync(op);
int may_queue;
- gfp_t gfp_mask = flags & BLK_MQ_REQ_NOWAIT ? GFP_ATOMIC :
- __GFP_DIRECT_RECLAIM;
req_flags_t rq_flags = RQF_ALLOCED;
lockdep_assert_held(q->queue_lock);
@@ -1517,8 +1510,9 @@ rq_starved:
* @op: operation and flags
* @bio: bio to allocate request for (can be %NULL)
* @flags: BLK_MQ_REQ_* flags.
+ * @gfp: allocator flags
*
- * Get a free request from @q. If %__GFP_DIRECT_RECLAIM is set in @gfp_mask,
+ * Get a free request from @q. If %BLK_MQ_REQ_NOWAIT is set in @flags,
* this function keeps retrying under memory pressure and fails iff @q is dead.
*
* Must be called with @q->queue_lock held and,
@@ -1526,7 +1520,7 @@ rq_starved:
* Returns request pointer on success, with @q->queue_lock *not held*.
*/
static struct request *get_request(struct request_queue *q, unsigned int op,
- struct bio *bio, blk_mq_req_flags_t flags)
+ struct bio *bio, blk_mq_req_flags_t flags, gfp_t gfp)
{
const bool is_sync = op_is_sync(op);
DEFINE_WAIT(wait);
@@ -1538,7 +1532,7 @@ static struct request *get_request(struct request_queue *q, unsigned int op,
rl = blk_get_rl(q, bio); /* transferred to @rq on success */
retry:
- rq = __get_request(rl, op, bio, flags);
+ rq = __get_request(rl, op, bio, flags, gfp);
if (!IS_ERR(rq))
return rq;
@@ -1579,8 +1573,7 @@ static struct request *blk_old_get_request(struct request_queue *q,
unsigned int op, blk_mq_req_flags_t flags)
{
struct request *rq;
- gfp_t gfp_mask = flags & BLK_MQ_REQ_NOWAIT ? GFP_ATOMIC :
- __GFP_DIRECT_RECLAIM;
+ gfp_t gfp_mask = flags & BLK_MQ_REQ_NOWAIT ? GFP_ATOMIC : GFP_NOIO;
int ret = 0;
WARN_ON_ONCE(q->mq_ops);
@@ -1592,7 +1585,7 @@ static struct request *blk_old_get_request(struct request_queue *q,
if (ret)
return ERR_PTR(ret);
spin_lock_irq(q->queue_lock);
- rq = get_request(q, op, NULL, flags);
+ rq = get_request(q, op, NULL, flags, gfp_mask);
if (IS_ERR(rq)) {
spin_unlock_irq(q->queue_lock);
blk_queue_exit(q);
@@ -1607,13 +1600,13 @@ static struct request *blk_old_get_request(struct request_queue *q,
}
/**
- * blk_get_request_flags - allocate a request
+ * blk_get_request - allocate a request
* @q: request queue to allocate a request for
* @op: operation (REQ_OP_*) and REQ_* flags, e.g. REQ_SYNC.
* @flags: BLK_MQ_REQ_* flags, e.g. BLK_MQ_REQ_NOWAIT.
*/
-struct request *blk_get_request_flags(struct request_queue *q, unsigned int op,
- blk_mq_req_flags_t flags)
+struct request *blk_get_request(struct request_queue *q, unsigned int op,
+ blk_mq_req_flags_t flags)
{
struct request *req;
@@ -1632,14 +1625,6 @@ struct request *blk_get_request_flags(struct request_queue *q, unsigned int op,
return req;
}
-EXPORT_SYMBOL(blk_get_request_flags);
-
-struct request *blk_get_request(struct request_queue *q, unsigned int op,
- gfp_t gfp_mask)
-{
- return blk_get_request_flags(q, op, gfp_mask & __GFP_DIRECT_RECLAIM ?
- 0 : BLK_MQ_REQ_NOWAIT);
-}
EXPORT_SYMBOL(blk_get_request);
/**
@@ -1660,7 +1645,7 @@ void blk_requeue_request(struct request_queue *q, struct request *rq)
blk_delete_timer(rq);
blk_clear_rq_complete(rq);
trace_block_rq_requeue(q, rq);
- wbt_requeue(q->rq_wb, &rq->issue_stat);
+ wbt_requeue(q->rq_wb, rq);
if (rq->rq_flags & RQF_QUEUED)
blk_queue_end_tag(q, rq);
@@ -1767,7 +1752,7 @@ void __blk_put_request(struct request_queue *q, struct request *req)
/* this is a bio leak */
WARN_ON(req->bio != NULL);
- wbt_done(q->rq_wb, &req->issue_stat);
+ wbt_done(q->rq_wb, req);
/*
* Request may not have originated from ll_rw_blk. if not,
@@ -2066,7 +2051,7 @@ get_rq:
* Returns with the queue unlocked.
*/
blk_queue_enter_live(q);
- req = get_request(q, bio->bi_opf, bio, 0);
+ req = get_request(q, bio->bi_opf, bio, 0, GFP_NOIO);
if (IS_ERR(req)) {
blk_queue_exit(q);
__wbt_done(q->rq_wb, wb_acct);
@@ -2078,7 +2063,7 @@ get_rq:
goto out_unlock;
}
- wbt_track(&req->issue_stat, wb_acct);
+ wbt_track(req, wb_acct);
/*
* After dropping the lock and possibly sleeping here, our request
@@ -2392,7 +2377,9 @@ blk_qc_t generic_make_request(struct bio *bio)
if (bio->bi_opf & REQ_NOWAIT)
flags = BLK_MQ_REQ_NOWAIT;
- if (blk_queue_enter(q, flags) < 0) {
+ if (bio_flagged(bio, BIO_QUEUE_ENTERED))
+ blk_queue_enter_live(q);
+ else if (blk_queue_enter(q, flags) < 0) {
if (!blk_queue_dying(q) && (bio->bi_opf & REQ_NOWAIT))
bio_wouldblock_error(bio);
else
@@ -2727,7 +2714,7 @@ void blk_account_io_completion(struct request *req, unsigned int bytes)
}
}
-void blk_account_io_done(struct request *req)
+void blk_account_io_done(struct request *req, u64 now)
{
/*
* Account IO completion. flush_rq isn't accounted as a
@@ -2735,11 +2722,12 @@ void blk_account_io_done(struct request *req)
* containing request is enough.
*/
if (blk_do_io_stat(req) && !(req->rq_flags & RQF_FLUSH_SEQ)) {
- unsigned long duration = jiffies - req->start_time;
+ unsigned long duration;
const int rw = rq_data_dir(req);
struct hd_struct *part;
int cpu;
+ duration = nsecs_to_jiffies(now - req->start_time_ns);
cpu = part_stat_lock();
part = req->part;
@@ -2970,10 +2958,8 @@ static void blk_dequeue_request(struct request *rq)
* and to it is freed is accounted as io that is in progress at
* the driver side.
*/
- if (blk_account_rq(rq)) {
+ if (blk_account_rq(rq))
q->in_flight[rq_is_sync(rq)]++;
- set_io_start_time_ns(rq);
- }
}
/**
@@ -2992,9 +2978,12 @@ void blk_start_request(struct request *req)
blk_dequeue_request(req);
if (test_bit(QUEUE_FLAG_STATS, &req->q->queue_flags)) {
- blk_stat_set_issue(&req->issue_stat, blk_rq_sectors(req));
+ req->io_start_time_ns = ktime_get_ns();
+#ifdef CONFIG_BLK_DEV_THROTTLING_LOW
+ req->throtl_size = blk_rq_sectors(req);
+#endif
req->rq_flags |= RQF_STATS;
- wbt_issue(req->q->rq_wb, &req->issue_stat);
+ wbt_issue(req->q->rq_wb, req);
}
BUG_ON(blk_rq_is_complete(req));
@@ -3092,8 +3081,10 @@ bool blk_update_request(struct request *req, blk_status_t error,
struct bio *bio = req->bio;
unsigned bio_bytes = min(bio->bi_iter.bi_size, nr_bytes);
- if (bio_bytes == bio->bi_iter.bi_size)
+ if (bio_bytes == bio->bi_iter.bi_size) {
req->bio = bio->bi_next;
+ bio->bi_next = NULL;
+ }
/* Completion has already been traced */
bio_clear_flag(bio, BIO_TRACE_COMPLETION);
@@ -3190,12 +3181,13 @@ EXPORT_SYMBOL_GPL(blk_unprep_request);
void blk_finish_request(struct request *req, blk_status_t error)
{
struct request_queue *q = req->q;
+ u64 now = ktime_get_ns();
lockdep_assert_held(req->q->queue_lock);
WARN_ON_ONCE(q->mq_ops);
if (req->rq_flags & RQF_STATS)
- blk_stat_add(req);
+ blk_stat_add(req, now);
if (req->rq_flags & RQF_QUEUED)
blk_queue_end_tag(q, req);
@@ -3210,10 +3202,10 @@ void blk_finish_request(struct request *req, blk_status_t error)
if (req->rq_flags & RQF_DONTPREP)
blk_unprep_request(req);
- blk_account_io_done(req);
+ blk_account_io_done(req, now);
if (req->end_io) {
- wbt_done(req->q->rq_wb, &req->issue_stat);
+ wbt_done(req->q->rq_wb, req);
req->end_io(req, error);
} else {
if (blk_bidi_rq(req))
@@ -3519,7 +3511,7 @@ int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
struct bio *bio, *bio_src;
if (!bs)
- bs = fs_bio_set;
+ bs = &fs_bio_set;
__rq_for_each_bio(bio_src, rq_src) {
bio = bio_clone_fast(bio_src, gfp_mask, bs);
@@ -3630,7 +3622,7 @@ static void queue_unplugged(struct request_queue *q, unsigned int depth,
blk_run_queue_async(q);
else
__blk_run_queue(q);
- spin_unlock(q->queue_lock);
+ spin_unlock_irq(q->queue_lock);
}
static void flush_plug_callbacks(struct blk_plug *plug, bool from_schedule)
@@ -3678,7 +3670,6 @@ EXPORT_SYMBOL(blk_check_plugged);
void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
{
struct request_queue *q;
- unsigned long flags;
struct request *rq;
LIST_HEAD(list);
unsigned int depth;
@@ -3698,11 +3689,6 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
q = NULL;
depth = 0;
- /*
- * Save and disable interrupts here, to avoid doing it for every
- * queue lock we have to take.
- */
- local_irq_save(flags);
while (!list_empty(&list)) {
rq = list_entry_rq(list.next);
list_del_init(&rq->queuelist);
@@ -3715,7 +3701,7 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
queue_unplugged(q, depth, from_schedule);
q = rq->q;
depth = 0;
- spin_lock(q->queue_lock);
+ spin_lock_irq(q->queue_lock);
}
/*
@@ -3742,8 +3728,6 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
*/
if (q)
queue_unplugged(q, depth, from_schedule);
-
- local_irq_restore(flags);
}
void blk_finish_plug(struct blk_plug *plug)
diff --git a/block/blk-integrity.c b/block/blk-integrity.c
index feb30570eaf5..6121611e1316 100644
--- a/block/blk-integrity.c
+++ b/block/blk-integrity.c
@@ -333,34 +333,34 @@ static ssize_t integrity_device_show(struct blk_integrity *bi, char *page)
}
static struct integrity_sysfs_entry integrity_format_entry = {
- .attr = { .name = "format", .mode = S_IRUGO },
+ .attr = { .name = "format", .mode = 0444 },
.show = integrity_format_show,
};
static struct integrity_sysfs_entry integrity_tag_size_entry = {
- .attr = { .name = "tag_size", .mode = S_IRUGO },
+ .attr = { .name = "tag_size", .mode = 0444 },
.show = integrity_tag_size_show,
};
static struct integrity_sysfs_entry integrity_interval_entry = {
- .attr = { .name = "protection_interval_bytes", .mode = S_IRUGO },
+ .attr = { .name = "protection_interval_bytes", .mode = 0444 },
.show = integrity_interval_show,
};
static struct integrity_sysfs_entry integrity_verify_entry = {
- .attr = { .name = "read_verify", .mode = S_IRUGO | S_IWUSR },
+ .attr = { .name = "read_verify", .mode = 0644 },
.show = integrity_verify_show,
.store = integrity_verify_store,
};
static struct integrity_sysfs_entry integrity_generate_entry = {
- .attr = { .name = "write_generate", .mode = S_IRUGO | S_IWUSR },
+ .attr = { .name = "write_generate", .mode = 0644 },
.show = integrity_generate_show,
.store = integrity_generate_store,
};
static struct integrity_sysfs_entry integrity_device_entry = {
- .attr = { .name = "device_is_integrity_capable", .mode = S_IRUGO },
+ .attr = { .name = "device_is_integrity_capable", .mode = 0444 },
.show = integrity_device_show,
};
diff --git a/block/blk-lib.c b/block/blk-lib.c
index a676084d4740..8faa70f26fcd 100644
--- a/block/blk-lib.c
+++ b/block/blk-lib.c
@@ -62,10 +62,16 @@ int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
unsigned int req_sects;
sector_t end_sect, tmp;
- /* Make sure bi_size doesn't overflow */
- req_sects = min_t(sector_t, nr_sects, UINT_MAX >> 9);
+ /*
+ * Issue in chunks of the user defined max discard setting,
+ * ensuring that bi_size doesn't overflow
+ */
+ req_sects = min_t(sector_t, nr_sects,
+ q->limits.max_discard_sectors);
+ if (req_sects > UINT_MAX >> 9)
+ req_sects = UINT_MAX >> 9;
- /**
+ /*
* If splitting a request, and the next starting sector would be
* misaligned, stop the discard at the previous aligned sector.
*/
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 782940c65d8a..aaec38cc37b8 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -188,16 +188,16 @@ void blk_queue_split(struct request_queue *q, struct bio **bio)
switch (bio_op(*bio)) {
case REQ_OP_DISCARD:
case REQ_OP_SECURE_ERASE:
- split = blk_bio_discard_split(q, *bio, q->bio_split, &nsegs);
+ split = blk_bio_discard_split(q, *bio, &q->bio_split, &nsegs);
break;
case REQ_OP_WRITE_ZEROES:
- split = blk_bio_write_zeroes_split(q, *bio, q->bio_split, &nsegs);
+ split = blk_bio_write_zeroes_split(q, *bio, &q->bio_split, &nsegs);
break;
case REQ_OP_WRITE_SAME:
- split = blk_bio_write_same_split(q, *bio, q->bio_split, &nsegs);
+ split = blk_bio_write_same_split(q, *bio, &q->bio_split, &nsegs);
break;
default:
- split = blk_bio_segment_split(q, *bio, q->bio_split, &nsegs);
+ split = blk_bio_segment_split(q, *bio, &q->bio_split, &nsegs);
break;
}
@@ -210,6 +210,16 @@ void blk_queue_split(struct request_queue *q, struct bio **bio)
/* there isn't chance to merge the splitted bio */
split->bi_opf |= REQ_NOMERGE;
+ /*
+ * Since we're recursing into make_request here, ensure
+ * that we mark this bio as already having entered the queue.
+ * If not, and the queue is going away, we can get stuck
+ * forever on waiting for the queue reference to drop. But
+ * that will never happen, as we're already holding a
+ * reference to it.
+ */
+ bio_set_flag(*bio, BIO_QUEUE_ENTERED);
+
bio_chain(split, *bio);
trace_block_split(q, split, (*bio)->bi_iter.bi_sector);
generic_make_request(*bio);
@@ -724,13 +734,12 @@ static struct request *attempt_merge(struct request_queue *q,
}
/*
- * At this point we have either done a back merge
- * or front merge. We need the smaller start_time of
- * the merged requests to be the current request
- * for accounting purposes.
+ * At this point we have either done a back merge or front merge. We
+ * need the smaller start_time_ns of the merged requests to be the
+ * current request for accounting purposes.
*/
- if (time_after(req->start_time, next->start_time))
- req->start_time = next->start_time;
+ if (next->start_time_ns < req->start_time_ns)
+ req->start_time_ns = next->start_time_ns;
req->biotail->bi_next = next->bio;
req->biotail = next->biotail;
diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
index 3080e18cb859..ffa622366922 100644
--- a/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@ -344,7 +344,6 @@ static const char *const rqf_name[] = {
RQF_NAME(STATS),
RQF_NAME(SPECIAL_PAYLOAD),
RQF_NAME(ZONE_WRITE_LOCKED),
- RQF_NAME(MQ_TIMEOUT_EXPIRED),
RQF_NAME(MQ_POLL_SLEPT),
};
#undef RQF_NAME
diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index 25c14c58385c..56c493c6cd90 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -268,19 +268,16 @@ bool blk_mq_sched_try_merge(struct request_queue *q, struct bio *bio,
EXPORT_SYMBOL_GPL(blk_mq_sched_try_merge);
/*
- * Reverse check our software queue for entries that we could potentially
- * merge with. Currently includes a hand-wavy stop count of 8, to not spend
- * too much time checking for merges.
+ * Iterate list of requests and see if we can merge this bio with any
+ * of them.
*/
-static bool blk_mq_attempt_merge(struct request_queue *q,
- struct blk_mq_ctx *ctx, struct bio *bio)
+bool blk_mq_bio_list_merge(struct request_queue *q, struct list_head *list,
+ struct bio *bio)
{
struct request *rq;
int checked = 8;
- lockdep_assert_held(&ctx->lock);
-
- list_for_each_entry_reverse(rq, &ctx->rq_list, queuelist) {
+ list_for_each_entry_reverse(rq, list, queuelist) {
bool merged = false;
if (!checked--)
@@ -305,13 +302,30 @@ static bool blk_mq_attempt_merge(struct request_queue *q,
continue;
}
- if (merged)
- ctx->rq_merged++;
return merged;
}
return false;
}
+EXPORT_SYMBOL_GPL(blk_mq_bio_list_merge);
+
+/*
+ * Reverse check our software queue for entries that we could potentially
+ * merge with. Currently includes a hand-wavy stop count of 8, to not spend
+ * too much time checking for merges.
+ */
+static bool blk_mq_attempt_merge(struct request_queue *q,
+ struct blk_mq_ctx *ctx, struct bio *bio)
+{
+ lockdep_assert_held(&ctx->lock);
+
+ if (blk_mq_bio_list_merge(q, &ctx->rq_list, bio)) {
+ ctx->rq_merged++;
+ return true;
+ }
+
+ return false;
+}
bool __blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio)
{
@@ -571,6 +585,7 @@ int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e)
if (!e) {
q->elevator = NULL;
+ q->nr_requests = q->tag_set->queue_depth;
return 0;
}
@@ -633,14 +648,3 @@ void blk_mq_exit_sched(struct request_queue *q, struct elevator_queue *e)
blk_mq_sched_tags_teardown(q);
q->elevator = NULL;
}
-
-int blk_mq_sched_init(struct request_queue *q)
-{
- int ret;
-
- mutex_lock(&q->sysfs_lock);
- ret = elevator_init(q, NULL);
- mutex_unlock(&q->sysfs_lock);
-
- return ret;
-}
diff --git a/block/blk-mq-sched.h b/block/blk-mq-sched.h
index 1e9c9018ace1..0cb8f938dff9 100644
--- a/block/blk-mq-sched.h
+++ b/block/blk-mq-sched.h
@@ -33,8 +33,6 @@ int blk_mq_sched_init_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx,
void blk_mq_sched_exit_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx,
unsigned int hctx_idx);
-int blk_mq_sched_init(struct request_queue *q);
-
static inline bool
blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio)
{
diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c
index a54b4b070f1c..aafb44224c89 100644
--- a/block/blk-mq-sysfs.c
+++ b/block/blk-mq-sysfs.c
@@ -166,15 +166,15 @@ static struct attribute *default_ctx_attrs[] = {
};
static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_nr_tags = {
- .attr = {.name = "nr_tags", .mode = S_IRUGO },
+ .attr = {.name = "nr_tags", .mode = 0444 },
.show = blk_mq_hw_sysfs_nr_tags_show,
};
static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_nr_reserved_tags = {
- .attr = {.name = "nr_reserved_tags", .mode = S_IRUGO },
+ .attr = {.name = "nr_reserved_tags", .mode = 0444 },
.show = blk_mq_hw_sysfs_nr_reserved_tags_show,
};
static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_cpus = {
- .attr = {.name = "cpu_list", .mode = S_IRUGO },
+ .attr = {.name = "cpu_list", .mode = 0444 },
.show = blk_mq_hw_sysfs_cpus_show,
};
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index 336dde07b230..70356a2a11ab 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -134,6 +134,8 @@ unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data)
ws = bt_wait_ptr(bt, data->hctx);
drop_ctx = data->ctx == NULL;
do {
+ struct sbitmap_queue *bt_prev;
+
/*
* We're out of tags on this hardware queue, kick any
* pending IO submits before going to sleep waiting for
@@ -159,6 +161,7 @@ unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data)
if (data->ctx)
blk_mq_put_ctx(data->ctx);
+ bt_prev = bt;
io_schedule();
data->ctx = blk_mq_get_ctx(data->q);
@@ -170,6 +173,15 @@ unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data)
bt = &tags->bitmap_tags;
finish_wait(&ws->wait, &wait);
+
+ /*
+ * If destination hw queue is changed, fake wake up on
+ * previous queue for compensating the wake up miss, so
+ * other allocations on previous queue won't be starved.
+ */
+ if (bt != bt_prev)
+ sbitmap_queue_wake_up(bt_prev);
+
ws = bt_wait_ptr(bt, data->hctx);
} while (1);
@@ -259,7 +271,7 @@ static bool bt_tags_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data)
* test and set the bit before assining ->rqs[].
*/
rq = tags->rqs[bitnr];
- if (rq)
+ if (rq && blk_mq_rq_state(rq) == MQ_RQ_IN_FLIGHT)
iter_data->fn(rq, iter_data->data, reserved);
return true;
diff --git a/block/blk-mq.c b/block/blk-mq.c
index c3621453ad87..6332940ca118 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -95,18 +95,15 @@ static void blk_mq_check_inflight(struct blk_mq_hw_ctx *hctx,
{
struct mq_inflight *mi = priv;
- if (blk_mq_rq_state(rq) == MQ_RQ_IN_FLIGHT) {
- /*
- * index[0] counts the specific partition that was asked
- * for. index[1] counts the ones that are active on the
- * whole device, so increment that if mi->part is indeed
- * a partition, and not a whole device.
- */
- if (rq->part == mi->part)
- mi->inflight[0]++;
- if (mi->part->partno)
- mi->inflight[1]++;
- }
+ /*
+ * index[0] counts the specific partition that was asked for. index[1]
+ * counts the ones that are active on the whole device, so increment
+ * that if mi->part is indeed a partition, and not a whole device.
+ */
+ if (rq->part == mi->part)
+ mi->inflight[0]++;
+ if (mi->part->partno)
+ mi->inflight[1]++;
}
void blk_mq_in_flight(struct request_queue *q, struct hd_struct *part,
@@ -118,6 +115,25 @@ void blk_mq_in_flight(struct request_queue *q, struct hd_struct *part,
blk_mq_queue_tag_busy_iter(q, blk_mq_check_inflight, &mi);
}
+static void blk_mq_check_inflight_rw(struct blk_mq_hw_ctx *hctx,
+ struct request *rq, void *priv,
+ bool reserved)
+{
+ struct mq_inflight *mi = priv;
+
+ if (rq->part == mi->part)
+ mi->inflight[rq_data_dir(rq)]++;
+}
+
+void blk_mq_in_flight_rw(struct request_queue *q, struct hd_struct *part,
+ unsigned int inflight[2])
+{
+ struct mq_inflight mi = { .part = part, .inflight = inflight, };
+
+ inflight[0] = inflight[1] = 0;
+ blk_mq_queue_tag_busy_iter(q, blk_mq_check_inflight_rw, &mi);
+}
+
void blk_freeze_queue_start(struct request_queue *q)
{
int freeze_depth;
@@ -293,7 +309,8 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data,
RB_CLEAR_NODE(&rq->rb_node);
rq->rq_disk = NULL;
rq->part = NULL;
- rq->start_time = jiffies;
+ rq->start_time_ns = ktime_get_ns();
+ rq->io_start_time_ns = 0;
rq->nr_phys_segments = 0;
#if defined(CONFIG_BLK_DEV_INTEGRITY)
rq->nr_integrity_segments = 0;
@@ -312,11 +329,10 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data,
#ifdef CONFIG_BLK_CGROUP
rq->rl = NULL;
- set_start_time_ns(rq);
- rq->io_start_time_ns = 0;
#endif
data->ctx->rq_dispatched[op_is_sync(op)]++;
+ refcount_set(&rq->ref, 1);
return rq;
}
@@ -345,9 +361,11 @@ static struct request *blk_mq_get_request(struct request_queue *q,
/*
* Flush requests are special and go directly to the
- * dispatch list.
+ * dispatch list. Don't include reserved tags in the
+ * limiting, as it isn't useful.
*/
- if (!op_is_flush(op) && e->type->ops.mq.limit_depth)
+ if (!op_is_flush(op) && e->type->ops.mq.limit_depth &&
+ !(data->flags & BLK_MQ_REQ_RESERVED))
e->type->ops.mq.limit_depth(op, data);
}
@@ -448,13 +466,27 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q,
}
EXPORT_SYMBOL_GPL(blk_mq_alloc_request_hctx);
+static void __blk_mq_free_request(struct request *rq)
+{
+ struct request_queue *q = rq->q;
+ struct blk_mq_ctx *ctx = rq->mq_ctx;
+ struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu);
+ const int sched_tag = rq->internal_tag;
+
+ if (rq->tag != -1)
+ blk_mq_put_tag(hctx, hctx->tags, ctx, rq->tag);
+ if (sched_tag != -1)
+ blk_mq_put_tag(hctx, hctx->sched_tags, ctx, sched_tag);
+ blk_mq_sched_restart(hctx);
+ blk_queue_exit(q);
+}
+
void blk_mq_free_request(struct request *rq)
{
struct request_queue *q = rq->q;
struct elevator_queue *e = q->elevator;
struct blk_mq_ctx *ctx = rq->mq_ctx;
struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu);
- const int sched_tag = rq->internal_tag;
if (rq->rq_flags & RQF_ELVPRIV) {
if (e && e->type->ops.mq.finish_request)
@@ -472,27 +504,30 @@ void blk_mq_free_request(struct request *rq)
if (unlikely(laptop_mode && !blk_rq_is_passthrough(rq)))
laptop_io_completion(q->backing_dev_info);
- wbt_done(q->rq_wb, &rq->issue_stat);
+ wbt_done(q->rq_wb, rq);
if (blk_rq_rl(rq))
blk_put_rl(blk_rq_rl(rq));
- blk_mq_rq_update_state(rq, MQ_RQ_IDLE);
- if (rq->tag != -1)
- blk_mq_put_tag(hctx, hctx->tags, ctx, rq->tag);
- if (sched_tag != -1)
- blk_mq_put_tag(hctx, hctx->sched_tags, ctx, sched_tag);
- blk_mq_sched_restart(hctx);
- blk_queue_exit(q);
+ WRITE_ONCE(rq->state, MQ_RQ_IDLE);
+ if (refcount_dec_and_test(&rq->ref))
+ __blk_mq_free_request(rq);
}
EXPORT_SYMBOL_GPL(blk_mq_free_request);
inline void __blk_mq_end_request(struct request *rq, blk_status_t error)
{
- blk_account_io_done(rq);
+ u64 now = ktime_get_ns();
+
+ if (rq->rq_flags & RQF_STATS) {
+ blk_mq_poll_stats_start(rq->q);
+ blk_stat_add(rq, now);
+ }
+
+ blk_account_io_done(rq, now);
if (rq->end_io) {
- wbt_done(rq->q->rq_wb, &rq->issue_stat);
+ wbt_done(rq->q->rq_wb, rq);
rq->end_io(rq, error);
} else {
if (unlikely(blk_bidi_rq(rq)))
@@ -523,15 +558,12 @@ static void __blk_mq_complete_request(struct request *rq)
bool shared = false;
int cpu;
- WARN_ON_ONCE(blk_mq_rq_state(rq) != MQ_RQ_IN_FLIGHT);
- blk_mq_rq_update_state(rq, MQ_RQ_COMPLETE);
+ if (cmpxchg(&rq->state, MQ_RQ_IN_FLIGHT, MQ_RQ_COMPLETE) !=
+ MQ_RQ_IN_FLIGHT)
+ return;
if (rq->internal_tag != -1)
blk_mq_sched_completed_request(rq);
- if (rq->rq_flags & RQF_STATS) {
- blk_mq_poll_stats_start(rq->q);
- blk_stat_add(rq);
- }
if (!test_bit(QUEUE_FLAG_SAME_COMP, &rq->q->queue_flags)) {
rq->q->softirq_done_fn(rq);
@@ -573,36 +605,6 @@ static void hctx_lock(struct blk_mq_hw_ctx *hctx, int *srcu_idx)
*srcu_idx = srcu_read_lock(hctx->srcu);
}
-static void blk_mq_rq_update_aborted_gstate(struct request *rq, u64 gstate)
-{
- unsigned long flags;
-
- /*
- * blk_mq_rq_aborted_gstate() is used from the completion path and
- * can thus be called from irq context. u64_stats_fetch in the
- * middle of update on the same CPU leads to lockup. Disable irq
- * while updating.
- */
- local_irq_save(flags);
- u64_stats_update_begin(&rq->aborted_gstate_sync);
- rq->aborted_gstate = gstate;
- u64_stats_update_end(&rq->aborted_gstate_sync);
- local_irq_restore(flags);
-}
-
-static u64 blk_mq_rq_aborted_gstate(struct request *rq)
-{
- unsigned int start;
- u64 aborted_gstate;
-
- do {
- start = u64_stats_fetch_begin(&rq->aborted_gstate_sync);
- aborted_gstate = rq->aborted_gstate;
- } while (u64_stats_fetch_retry(&rq->aborted_gstate_sync, start));
-
- return aborted_gstate;
-}
-
/**
* blk_mq_complete_request - end I/O on a request
* @rq: the request being processed
@@ -613,28 +615,9 @@ static u64 blk_mq_rq_aborted_gstate(struct request *rq)
**/
void blk_mq_complete_request(struct request *rq)
{
- struct request_queue *q = rq->q;
- struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, rq->mq_ctx->cpu);
- int srcu_idx;
-
- if (unlikely(blk_should_fake_timeout(q)))
+ if (unlikely(blk_should_fake_timeout(rq->q)))
return;
-
- /*
- * If @rq->aborted_gstate equals the current instance, timeout is
- * claiming @rq and we lost. This is synchronized through
- * hctx_lock(). See blk_mq_timeout_work() for details.
- *
- * Completion path never blocks and we can directly use RCU here
- * instead of hctx_lock() which can be either RCU or SRCU.
- * However, that would complicate paths which want to synchronize
- * against us. Let stay in sync with the issue path so that
- * hctx_lock() covers both issue and completion paths.
- */
- hctx_lock(hctx, &srcu_idx);
- if (blk_mq_rq_aborted_gstate(rq) != rq->gstate)
- __blk_mq_complete_request(rq);
- hctx_unlock(hctx, srcu_idx);
+ __blk_mq_complete_request(rq);
}
EXPORT_SYMBOL(blk_mq_complete_request);
@@ -653,32 +636,18 @@ void blk_mq_start_request(struct request *rq)
trace_block_rq_issue(q, rq);
if (test_bit(QUEUE_FLAG_STATS, &q->queue_flags)) {
- blk_stat_set_issue(&rq->issue_stat, blk_rq_sectors(rq));
+ rq->io_start_time_ns = ktime_get_ns();
+#ifdef CONFIG_BLK_DEV_THROTTLING_LOW
+ rq->throtl_size = blk_rq_sectors(rq);
+#endif
rq->rq_flags |= RQF_STATS;
- wbt_issue(q->rq_wb, &rq->issue_stat);
+ wbt_issue(q->rq_wb, rq);
}
WARN_ON_ONCE(blk_mq_rq_state(rq) != MQ_RQ_IDLE);
- /*
- * Mark @rq in-flight which also advances the generation number,
- * and register for timeout. Protect with a seqcount to allow the
- * timeout path to read both @rq->gstate and @rq->deadline
- * coherently.
- *
- * This is the only place where a request is marked in-flight. If
- * the timeout path reads an in-flight @rq->gstate, the
- * @rq->deadline it reads together under @rq->gstate_seq is
- * guaranteed to be the matching one.
- */
- preempt_disable();
- write_seqcount_begin(&rq->gstate_seq);
-
- blk_mq_rq_update_state(rq, MQ_RQ_IN_FLIGHT);
blk_add_timer(rq);
-
- write_seqcount_end(&rq->gstate_seq);
- preempt_enable();
+ WRITE_ONCE(rq->state, MQ_RQ_IN_FLIGHT);
if (q->dma_drain_size && blk_rq_bytes(rq)) {
/*
@@ -691,11 +660,6 @@ void blk_mq_start_request(struct request *rq)
}
EXPORT_SYMBOL(blk_mq_start_request);
-/*
- * When we reach here because queue is busy, it's safe to change the state
- * to IDLE without checking @rq->aborted_gstate because we should still be
- * holding the RCU read lock and thus protected against timeout.
- */
static void __blk_mq_requeue_request(struct request *rq)
{
struct request_queue *q = rq->q;
@@ -703,10 +667,10 @@ static void __blk_mq_requeue_request(struct request *rq)
blk_mq_put_driver_tag(rq);
trace_block_rq_requeue(q, rq);
- wbt_requeue(q->rq_wb, &rq->issue_stat);
+ wbt_requeue(q->rq_wb, rq);
- if (blk_mq_rq_state(rq) != MQ_RQ_IDLE) {
- blk_mq_rq_update_state(rq, MQ_RQ_IDLE);
+ if (blk_mq_request_started(rq)) {
+ WRITE_ONCE(rq->state, MQ_RQ_IDLE);
if (q->dma_drain_size && blk_rq_bytes(rq))
rq->nr_phys_segments--;
}
@@ -804,101 +768,79 @@ struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag)
}
EXPORT_SYMBOL(blk_mq_tag_to_rq);
-struct blk_mq_timeout_data {
- unsigned long next;
- unsigned int next_set;
- unsigned int nr_expired;
-};
-
static void blk_mq_rq_timed_out(struct request *req, bool reserved)
{
- const struct blk_mq_ops *ops = req->q->mq_ops;
- enum blk_eh_timer_return ret = BLK_EH_RESET_TIMER;
-
- req->rq_flags |= RQF_MQ_TIMEOUT_EXPIRED;
-
- if (ops->timeout)
- ret = ops->timeout(req, reserved);
+ if (req->q->mq_ops->timeout) {
+ enum blk_eh_timer_return ret;
- switch (ret) {
- case BLK_EH_HANDLED:
- __blk_mq_complete_request(req);
- break;
- case BLK_EH_RESET_TIMER:
- /*
- * As nothing prevents from completion happening while
- * ->aborted_gstate is set, this may lead to ignored
- * completions and further spurious timeouts.
- */
- blk_mq_rq_update_aborted_gstate(req, 0);
- blk_add_timer(req);
- break;
- case BLK_EH_NOT_HANDLED:
- break;
- default:
- printk(KERN_ERR "block: bad eh return: %d\n", ret);
- break;
+ ret = req->q->mq_ops->timeout(req, reserved);
+ if (ret == BLK_EH_DONE)
+ return;
+ WARN_ON_ONCE(ret != BLK_EH_RESET_TIMER);
}
+
+ blk_add_timer(req);
}
-static void blk_mq_check_expired(struct blk_mq_hw_ctx *hctx,
- struct request *rq, void *priv, bool reserved)
+static bool blk_mq_req_expired(struct request *rq, unsigned long *next)
{
- struct blk_mq_timeout_data *data = priv;
- unsigned long gstate, deadline;
- int start;
-
- might_sleep();
+ unsigned long deadline;
- if (rq->rq_flags & RQF_MQ_TIMEOUT_EXPIRED)
- return;
+ if (blk_mq_rq_state(rq) != MQ_RQ_IN_FLIGHT)
+ return false;
- /* read coherent snapshots of @rq->state_gen and @rq->deadline */
- while (true) {
- start = read_seqcount_begin(&rq->gstate_seq);
- gstate = READ_ONCE(rq->gstate);
- deadline = blk_rq_deadline(rq);
- if (!read_seqcount_retry(&rq->gstate_seq, start))
- break;
- cond_resched();
- }
+ deadline = blk_rq_deadline(rq);
+ if (time_after_eq(jiffies, deadline))
+ return true;
- /* if in-flight && overdue, mark for abortion */
- if ((gstate & MQ_RQ_STATE_MASK) == MQ_RQ_IN_FLIGHT &&
- time_after_eq(jiffies, deadline)) {
- blk_mq_rq_update_aborted_gstate(rq, gstate);
- data->nr_expired++;
- hctx->nr_expired++;
- } else if (!data->next_set || time_after(data->next, deadline)) {
- data->next = deadline;
- data->next_set = 1;
- }
+ if (*next == 0)
+ *next = deadline;
+ else if (time_after(*next, deadline))
+ *next = deadline;
+ return false;
}
-static void blk_mq_terminate_expired(struct blk_mq_hw_ctx *hctx,
+static void blk_mq_check_expired(struct blk_mq_hw_ctx *hctx,
struct request *rq, void *priv, bool reserved)
{
+ unsigned long *next = priv;
+
/*
- * We marked @rq->aborted_gstate and waited for RCU. If there were
- * completions that we lost to, they would have finished and
- * updated @rq->gstate by now; otherwise, the completion path is
- * now guaranteed to see @rq->aborted_gstate and yield. If
- * @rq->aborted_gstate still matches @rq->gstate, @rq is ours.
+ * Just do a quick check if it is expired before locking the request in
+ * so we're not unnecessarilly synchronizing across CPUs.
*/
- if (!(rq->rq_flags & RQF_MQ_TIMEOUT_EXPIRED) &&
- READ_ONCE(rq->gstate) == rq->aborted_gstate)
+ if (!blk_mq_req_expired(rq, next))
+ return;
+
+ /*
+ * We have reason to believe the request may be expired. Take a
+ * reference on the request to lock this request lifetime into its
+ * currently allocated context to prevent it from being reallocated in
+ * the event the completion by-passes this timeout handler.
+ *
+ * If the reference was already released, then the driver beat the
+ * timeout handler to posting a natural completion.
+ */
+ if (!refcount_inc_not_zero(&rq->ref))
+ return;
+
+ /*
+ * The request is now locked and cannot be reallocated underneath the
+ * timeout handler's processing. Re-verify this exact request is truly
+ * expired; if it is not expired, then the request was completed and
+ * reallocated as a new request.
+ */
+ if (blk_mq_req_expired(rq, next))
blk_mq_rq_timed_out(rq, reserved);
+ if (refcount_dec_and_test(&rq->ref))
+ __blk_mq_free_request(rq);
}
static void blk_mq_timeout_work(struct work_struct *work)
{
struct request_queue *q =
container_of(work, struct request_queue, timeout_work);
- struct blk_mq_timeout_data data = {
- .next = 0,
- .next_set = 0,
- .nr_expired = 0,
- };
+ unsigned long next = 0;
struct blk_mq_hw_ctx *hctx;
int i;
@@ -918,39 +860,10 @@ static void blk_mq_timeout_work(struct work_struct *work)
if (!percpu_ref_tryget(&q->q_usage_counter))
return;
- /* scan for the expired ones and set their ->aborted_gstate */
- blk_mq_queue_tag_busy_iter(q, blk_mq_check_expired, &data);
-
- if (data.nr_expired) {
- bool has_rcu = false;
-
- /*
- * Wait till everyone sees ->aborted_gstate. The
- * sequential waits for SRCUs aren't ideal. If this ever
- * becomes a problem, we can add per-hw_ctx rcu_head and
- * wait in parallel.
- */
- queue_for_each_hw_ctx(q, hctx, i) {
- if (!hctx->nr_expired)
- continue;
-
- if (!(hctx->flags & BLK_MQ_F_BLOCKING))
- has_rcu = true;
- else
- synchronize_srcu(hctx->srcu);
-
- hctx->nr_expired = 0;
- }
- if (has_rcu)
- synchronize_rcu();
-
- /* terminate the ones we won */
- blk_mq_queue_tag_busy_iter(q, blk_mq_terminate_expired, NULL);
- }
+ blk_mq_queue_tag_busy_iter(q, blk_mq_check_expired, &next);
- if (data.next_set) {
- data.next = blk_rq_timeout(round_jiffies_up(data.next));
- mod_timer(&q->timeout, data.next);
+ if (next != 0) {
+ mod_timer(&q->timeout, next);
} else {
/*
* Request timeouts are handled as a forward rolling timer. If
@@ -1013,7 +926,7 @@ static bool dispatch_rq_from_ctx(struct sbitmap *sb, unsigned int bitnr,
struct blk_mq_ctx *ctx = hctx->ctxs[bitnr];
spin_lock(&ctx->lock);
- if (unlikely(!list_empty(&ctx->rq_list))) {
+ if (!list_empty(&ctx->rq_list)) {
dispatch_data->rq = list_entry_rq(ctx->rq_list.next);
list_del_init(&dispatch_data->rq->queuelist);
if (list_empty(&ctx->rq_list))
@@ -1700,15 +1613,6 @@ static void blk_mq_bio_to_request(struct request *rq, struct bio *bio)
blk_account_io_start(rq, true);
}
-static inline void blk_mq_queue_io(struct blk_mq_hw_ctx *hctx,
- struct blk_mq_ctx *ctx,
- struct request *rq)
-{
- spin_lock(&ctx->lock);
- __blk_mq_insert_request(hctx, rq, false);
- spin_unlock(&ctx->lock);
-}
-
static blk_qc_t request_to_qc_t(struct blk_mq_hw_ctx *hctx, struct request *rq)
{
if (rq->tag != -1)
@@ -1866,7 +1770,7 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
return BLK_QC_T_NONE;
}
- wbt_track(&rq->issue_stat, wb_acct);
+ wbt_track(rq, wb_acct);
cookie = request_to_qc_t(data.hctx, rq);
@@ -1933,15 +1837,10 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
blk_mq_put_ctx(data.ctx);
blk_mq_bio_to_request(rq, bio);
blk_mq_try_issue_directly(data.hctx, rq, &cookie);
- } else if (q->elevator) {
- blk_mq_put_ctx(data.ctx);
- blk_mq_bio_to_request(rq, bio);
- blk_mq_sched_insert_request(rq, false, true, true);
} else {
blk_mq_put_ctx(data.ctx);
blk_mq_bio_to_request(rq, bio);
- blk_mq_queue_io(data.hctx, data.ctx, rq);
- blk_mq_run_hw_queue(data.hctx, true);
+ blk_mq_sched_insert_request(rq, false, true, true);
}
return cookie;
@@ -2040,15 +1939,7 @@ static int blk_mq_init_request(struct blk_mq_tag_set *set, struct request *rq,
return ret;
}
- seqcount_init(&rq->gstate_seq);
- u64_stats_init(&rq->aborted_gstate_sync);
- /*
- * start gstate with gen 1 instead of 0, otherwise it will be equal
- * to aborted_gstate, and be identified timed out by
- * blk_mq_terminate_expired.
- */
- WRITE_ONCE(rq->gstate, MQ_RQ_GEN_INC);
-
+ WRITE_ONCE(rq->state, MQ_RQ_IDLE);
return 0;
}
@@ -2349,6 +2240,7 @@ static void blk_mq_map_swqueue(struct request_queue *q)
queue_for_each_hw_ctx(q, hctx, i) {
cpumask_clear(hctx->cpumask);
hctx->nr_ctx = 0;
+ hctx->dispatch_from = NULL;
}
/*
@@ -2681,7 +2573,7 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
if (!(set->flags & BLK_MQ_F_NO_SCHED)) {
int ret;
- ret = blk_mq_sched_init(q);
+ ret = elevator_init_mq(q);
if (ret)
return ERR_PTR(ret);
}
diff --git a/block/blk-mq.h b/block/blk-mq.h
index 89b5cd3a6c70..89231e439b2f 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -30,20 +30,6 @@ struct blk_mq_ctx {
struct kobject kobj;
} ____cacheline_aligned_in_smp;
-/*
- * Bits for request->gstate. The lower two bits carry MQ_RQ_* state value
- * and the upper bits the generation number.
- */
-enum mq_rq_state {
- MQ_RQ_IDLE = 0,
- MQ_RQ_IN_FLIGHT = 1,
- MQ_RQ_COMPLETE = 2,
-
- MQ_RQ_STATE_BITS = 2,
- MQ_RQ_STATE_MASK = (1 << MQ_RQ_STATE_BITS) - 1,
- MQ_RQ_GEN_INC = 1 << MQ_RQ_STATE_BITS,
-};
-
void blk_mq_freeze_queue(struct request_queue *q);
void blk_mq_free_queue(struct request_queue *q);
int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr);
@@ -107,33 +93,9 @@ void blk_mq_release(struct request_queue *q);
* blk_mq_rq_state() - read the current MQ_RQ_* state of a request
* @rq: target request.
*/
-static inline int blk_mq_rq_state(struct request *rq)
+static inline enum mq_rq_state blk_mq_rq_state(struct request *rq)
{
- return READ_ONCE(rq->gstate) & MQ_RQ_STATE_MASK;
-}
-
-/**
- * blk_mq_rq_update_state() - set the current MQ_RQ_* state of a request
- * @rq: target request.
- * @state: new state to set.
- *
- * Set @rq's state to @state. The caller is responsible for ensuring that
- * there are no other updaters. A request can transition into IN_FLIGHT
- * only from IDLE and doing so increments the generation number.
- */
-static inline void blk_mq_rq_update_state(struct request *rq,
- enum mq_rq_state state)
-{
- u64 old_val = READ_ONCE(rq->gstate);
- u64 new_val = (old_val & ~MQ_RQ_STATE_MASK) | state;
-
- if (state == MQ_RQ_IN_FLIGHT) {
- WARN_ON_ONCE((old_val & MQ_RQ_STATE_MASK) != MQ_RQ_IDLE);
- new_val += MQ_RQ_GEN_INC;
- }
-
- /* avoid exposing interim values */
- WRITE_ONCE(rq->gstate, new_val);
+ return READ_ONCE(rq->state);
}
static inline struct blk_mq_ctx *__blk_mq_get_ctx(struct request_queue *q,
@@ -188,7 +150,9 @@ static inline bool blk_mq_hw_queue_mapped(struct blk_mq_hw_ctx *hctx)
}
void blk_mq_in_flight(struct request_queue *q, struct hd_struct *part,
- unsigned int inflight[2]);
+ unsigned int inflight[2]);
+void blk_mq_in_flight_rw(struct request_queue *q, struct hd_struct *part,
+ unsigned int inflight[2]);
static inline void blk_mq_put_dispatch_budget(struct blk_mq_hw_ctx *hctx)
{
diff --git a/block/blk-stat.c b/block/blk-stat.c
index bd365a95fcf8..175c143ac5b9 100644
--- a/block/blk-stat.c
+++ b/block/blk-stat.c
@@ -47,19 +47,15 @@ static void __blk_stat_add(struct blk_rq_stat *stat, u64 value)
stat->nr_samples++;
}
-void blk_stat_add(struct request *rq)
+void blk_stat_add(struct request *rq, u64 now)
{
struct request_queue *q = rq->q;
struct blk_stat_callback *cb;
struct blk_rq_stat *stat;
int bucket;
- u64 now, value;
+ u64 value;
- now = __blk_stat_time(ktime_to_ns(ktime_get()));
- if (now < blk_stat_time(&rq->issue_stat))
- return;
-
- value = now - blk_stat_time(&rq->issue_stat);
+ value = (now >= rq->io_start_time_ns) ? now - rq->io_start_time_ns : 0;
blk_throtl_stat_add(rq, value);
diff --git a/block/blk-stat.h b/block/blk-stat.h
index 2dd36347252a..78399cdde9c9 100644
--- a/block/blk-stat.h
+++ b/block/blk-stat.h
@@ -8,21 +8,6 @@
#include <linux/rcupdate.h>
#include <linux/timer.h>
-/*
- * from upper:
- * 3 bits: reserved for other usage
- * 12 bits: size
- * 49 bits: time
- */
-#define BLK_STAT_RES_BITS 3
-#define BLK_STAT_SIZE_BITS 12
-#define BLK_STAT_RES_SHIFT (64 - BLK_STAT_RES_BITS)
-#define BLK_STAT_SIZE_SHIFT (BLK_STAT_RES_SHIFT - BLK_STAT_SIZE_BITS)
-#define BLK_STAT_TIME_MASK ((1ULL << BLK_STAT_SIZE_SHIFT) - 1)
-#define BLK_STAT_SIZE_MASK \
- (((1ULL << BLK_STAT_SIZE_BITS) - 1) << BLK_STAT_SIZE_SHIFT)
-#define BLK_STAT_RES_MASK (~((1ULL << BLK_STAT_RES_SHIFT) - 1))
-
/**
* struct blk_stat_callback - Block statistics callback.
*
@@ -80,35 +65,7 @@ struct blk_stat_callback {
struct blk_queue_stats *blk_alloc_queue_stats(void);
void blk_free_queue_stats(struct blk_queue_stats *);
-void blk_stat_add(struct request *);
-
-static inline u64 __blk_stat_time(u64 time)
-{
- return time & BLK_STAT_TIME_MASK;
-}
-
-static inline u64 blk_stat_time(struct blk_issue_stat *stat)
-{
- return __blk_stat_time(stat->stat);
-}
-
-static inline sector_t blk_capped_size(sector_t size)
-{
- return size & ((1ULL << BLK_STAT_SIZE_BITS) - 1);
-}
-
-static inline sector_t blk_stat_size(struct blk_issue_stat *stat)
-{
- return (stat->stat & BLK_STAT_SIZE_MASK) >> BLK_STAT_SIZE_SHIFT;
-}
-
-static inline void blk_stat_set_issue(struct blk_issue_stat *stat,
- sector_t size)
-{
- stat->stat = (stat->stat & BLK_STAT_RES_MASK) |
- (ktime_to_ns(ktime_get()) & BLK_STAT_TIME_MASK) |
- (((u64)blk_capped_size(size)) << BLK_STAT_SIZE_SHIFT);
-}
+void blk_stat_add(struct request *rq, u64 now);
/* record time/size info in request but not add a callback */
void blk_stat_enable_accounting(struct request_queue *q);
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index d00d1b0ec109..94987b1f69e1 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -491,188 +491,198 @@ static ssize_t queue_wc_store(struct request_queue *q, const char *page,
return count;
}
+static ssize_t queue_fua_show(struct request_queue *q, char *page)
+{
+ return sprintf(page, "%u\n", test_bit(QUEUE_FLAG_FUA, &q->queue_flags));
+}
+
static ssize_t queue_dax_show(struct request_queue *q, char *page)
{
return queue_var_show(blk_queue_dax(q), page);
}
static struct queue_sysfs_entry queue_requests_entry = {
- .attr = {.name = "nr_requests", .mode = S_IRUGO | S_IWUSR },
+ .attr = {.name = "nr_requests", .mode = 0644 },
.show = queue_requests_show,
.store = queue_requests_store,
};
static struct queue_sysfs_entry queue_ra_entry = {
- .attr = {.name = "read_ahead_kb", .mode = S_IRUGO | S_IWUSR },
+ .attr = {.name = "read_ahead_kb", .mode = 0644 },
.show = queue_ra_show,
.store = queue_ra_store,
};
static struct queue_sysfs_entry queue_max_sectors_entry = {
- .attr = {.name = "max_sectors_kb", .mode = S_IRUGO | S_IWUSR },
+ .attr = {.name = "max_sectors_kb", .mode = 0644 },
.show = queue_max_sectors_show,
.store = queue_max_sectors_store,
};
static struct queue_sysfs_entry queue_max_hw_sectors_entry = {
- .attr = {.name = "max_hw_sectors_kb", .mode = S_IRUGO },
+ .attr = {.name = "max_hw_sectors_kb", .mode = 0444 },
.show = queue_max_hw_sectors_show,
};
static struct queue_sysfs_entry queue_max_segments_entry = {
- .attr = {.name = "max_segments", .mode = S_IRUGO },
+ .attr = {.name = "max_segments", .mode = 0444 },
.show = queue_max_segments_show,
};
static struct queue_sysfs_entry queue_max_discard_segments_entry = {
- .attr = {.name = "max_discard_segments", .mode = S_IRUGO },
+ .attr = {.name = "max_discard_segments", .mode = 0444 },
.show = queue_max_discard_segments_show,
};
static struct queue_sysfs_entry queue_max_integrity_segments_entry = {
- .attr = {.name = "max_integrity_segments", .mode = S_IRUGO },
+ .attr = {.name = "max_integrity_segments", .mode = 0444 },
.show = queue_max_integrity_segments_show,
};
static struct queue_sysfs_entry queue_max_segment_size_entry = {
- .attr = {.name = "max_segment_size", .mode = S_IRUGO },
+ .attr = {.name = "max_segment_size", .mode = 0444 },
.show = queue_max_segment_size_show,
};
static struct queue_sysfs_entry queue_iosched_entry = {
- .attr = {.name = "scheduler", .mode = S_IRUGO | S_IWUSR },
+ .attr = {.name = "scheduler", .mode = 0644 },
.show = elv_iosched_show,
.store = elv_iosched_store,
};
static struct queue_sysfs_entry queue_hw_sector_size_entry = {
- .attr = {.name = "hw_sector_size", .mode = S_IRUGO },
+ .attr = {.name = "hw_sector_size", .mode = 0444 },
.show = queue_logical_block_size_show,
};
static struct queue_sysfs_entry queue_logical_block_size_entry = {
- .attr = {.name = "logical_block_size", .mode = S_IRUGO },
+ .attr = {.name = "logical_block_size", .mode = 0444 },
.show = queue_logical_block_size_show,
};
static struct queue_sysfs_entry queue_physical_block_size_entry = {
- .attr = {.name = "physical_block_size", .mode = S_IRUGO },
+ .attr = {.name = "physical_block_size", .mode = 0444 },
.show = queue_physical_block_size_show,
};
static struct queue_sysfs_entry queue_chunk_sectors_entry = {
- .attr = {.name = "chunk_sectors", .mode = S_IRUGO },
+ .attr = {.name = "chunk_sectors", .mode = 0444 },
.show = queue_chunk_sectors_show,
};
static struct queue_sysfs_entry queue_io_min_entry = {
- .attr = {.name = "minimum_io_size", .mode = S_IRUGO },
+ .attr = {.name = "minimum_io_size", .mode = 0444 },
.show = queue_io_min_show,
};
static struct queue_sysfs_entry queue_io_opt_entry = {
- .attr = {.name = "optimal_io_size", .mode = S_IRUGO },
+ .attr = {.name = "optimal_io_size", .mode = 0444 },
.show = queue_io_opt_show,
};
static struct queue_sysfs_entry queue_discard_granularity_entry = {
- .attr = {.name = "discard_granularity", .mode = S_IRUGO },
+ .attr = {.name = "discard_granularity", .mode = 0444 },
.show = queue_discard_granularity_show,
};
static struct queue_sysfs_entry queue_discard_max_hw_entry = {
- .attr = {.name = "discard_max_hw_bytes", .mode = S_IRUGO },
+ .attr = {.name = "discard_max_hw_bytes", .mode = 0444 },
.show = queue_discard_max_hw_show,
};
static struct queue_sysfs_entry queue_discard_max_entry = {
- .attr = {.name = "discard_max_bytes", .mode = S_IRUGO | S_IWUSR },
+ .attr = {.name = "discard_max_bytes", .mode = 0644 },
.show = queue_discard_max_show,
.store = queue_discard_max_store,
};
static struct queue_sysfs_entry queue_discard_zeroes_data_entry = {
- .attr = {.name = "discard_zeroes_data", .mode = S_IRUGO },
+ .attr = {.name = "discard_zeroes_data", .mode = 0444 },
.show = queue_discard_zeroes_data_show,
};
static struct queue_sysfs_entry queue_write_same_max_entry = {
- .attr = {.name = "write_same_max_bytes", .mode = S_IRUGO },
+ .attr = {.name = "write_same_max_bytes", .mode = 0444 },
.show = queue_write_same_max_show,
};
static struct queue_sysfs_entry queue_write_zeroes_max_entry = {
- .attr = {.name = "write_zeroes_max_bytes", .mode = S_IRUGO },
+ .attr = {.name = "write_zeroes_max_bytes", .mode = 0444 },
.show = queue_write_zeroes_max_show,
};
static struct queue_sysfs_entry queue_nonrot_entry = {
- .attr = {.name = "rotational", .mode = S_IRUGO | S_IWUSR },
+ .attr = {.name = "rotational", .mode = 0644 },
.show = queue_show_nonrot,
.store = queue_store_nonrot,
};
static struct queue_sysfs_entry queue_zoned_entry = {
- .attr = {.name = "zoned", .mode = S_IRUGO },
+ .attr = {.name = "zoned", .mode = 0444 },
.show = queue_zoned_show,
};
static struct queue_sysfs_entry queue_nomerges_entry = {
- .attr = {.name = "nomerges", .mode = S_IRUGO | S_IWUSR },
+ .attr = {.name = "nomerges", .mode = 0644 },
.show = queue_nomerges_show,
.store = queue_nomerges_store,
};
static struct queue_sysfs_entry queue_rq_affinity_entry = {
- .attr = {.name = "rq_affinity", .mode = S_IRUGO | S_IWUSR },
+ .attr = {.name = "rq_affinity", .mode = 0644 },
.show = queue_rq_affinity_show,
.store = queue_rq_affinity_store,
};
static struct queue_sysfs_entry queue_iostats_entry = {
- .attr = {.name = "iostats", .mode = S_IRUGO | S_IWUSR },
+ .attr = {.name = "iostats", .mode = 0644 },
.show = queue_show_iostats,
.store = queue_store_iostats,
};
static struct queue_sysfs_entry queue_random_entry = {
- .attr = {.name = "add_random", .mode = S_IRUGO | S_IWUSR },
+ .attr = {.name = "add_random", .mode = 0644 },
.show = queue_show_random,
.store = queue_store_random,
};
static struct queue_sysfs_entry queue_poll_entry = {
- .attr = {.name = "io_poll", .mode = S_IRUGO | S_IWUSR },
+ .attr = {.name = "io_poll", .mode = 0644 },
.show = queue_poll_show,
.store = queue_poll_store,
};
static struct queue_sysfs_entry queue_poll_delay_entry = {
- .attr = {.name = "io_poll_delay", .mode = S_IRUGO | S_IWUSR },
+ .attr = {.name = "io_poll_delay", .mode = 0644 },
.show = queue_poll_delay_show,
.store = queue_poll_delay_store,
};
static struct queue_sysfs_entry queue_wc_entry = {
- .attr = {.name = "write_cache", .mode = S_IRUGO | S_IWUSR },
+ .attr = {.name = "write_cache", .mode = 0644 },
.show = queue_wc_show,
.store = queue_wc_store,
};
+static struct queue_sysfs_entry queue_fua_entry = {
+ .attr = {.name = "fua", .mode = 0444 },
+ .show = queue_fua_show,
+};
+
static struct queue_sysfs_entry queue_dax_entry = {
- .attr = {.name = "dax", .mode = S_IRUGO },
+ .attr = {.name = "dax", .mode = 0444 },
.show = queue_dax_show,
};
static struct queue_sysfs_entry queue_wb_lat_entry = {
- .attr = {.name = "wbt_lat_usec", .mode = S_IRUGO | S_IWUSR },
+ .attr = {.name = "wbt_lat_usec", .mode = 0644 },
.show = queue_wb_lat_show,
.store = queue_wb_lat_store,
};
#ifdef CONFIG_BLK_DEV_THROTTLING_LOW
static struct queue_sysfs_entry throtl_sample_time_entry = {
- .attr = {.name = "throttle_sample_time", .mode = S_IRUGO | S_IWUSR },
+ .attr = {.name = "throttle_sample_time", .mode = 0644 },
.show = blk_throtl_sample_time_show,
.store = blk_throtl_sample_time_store,
};
@@ -708,6 +718,7 @@ static struct attribute *default_attrs[] = {
&queue_random_entry.attr,
&queue_poll_entry.attr,
&queue_wc_entry.attr,
+ &queue_fua_entry.attr,
&queue_dax_entry.attr,
&queue_wb_lat_entry.attr,
&queue_poll_delay_entry.attr,
@@ -813,8 +824,7 @@ static void __blk_release_queue(struct work_struct *work)
if (q->mq_ops)
blk_mq_debugfs_unregister(q);
- if (q->bio_split)
- bioset_free(q->bio_split);
+ bioset_exit(&q->bio_split);
ida_simple_remove(&blk_queue_ida, q->id);
call_rcu(&q->rcu_head, blk_free_queue_rcu);
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index c5a131673733..82282e6fdcf8 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -36,8 +36,6 @@ static int throtl_quantum = 32;
*/
#define LATENCY_FILTERED_HD (1000L) /* 1ms */
-#define SKIP_LATENCY (((u64)1) << BLK_STAT_RES_SHIFT)
-
static struct blkcg_policy blkcg_policy_throtl;
/* A workqueue to queue throttle related work */
@@ -821,7 +819,7 @@ static bool throtl_slice_used(struct throtl_grp *tg, bool rw)
if (time_in_range(jiffies, tg->slice_start[rw], tg->slice_end[rw]))
return false;
- return 1;
+ return true;
}
/* Trim the used slices and adjust slice start accordingly */
@@ -931,7 +929,7 @@ static bool tg_with_in_iops_limit(struct throtl_grp *tg, struct bio *bio,
if (wait)
*wait = jiffy_wait;
- return 0;
+ return false;
}
static bool tg_with_in_bps_limit(struct throtl_grp *tg, struct bio *bio,
@@ -974,7 +972,7 @@ static bool tg_with_in_bps_limit(struct throtl_grp *tg, struct bio *bio,
jiffy_wait = jiffy_wait + (jiffy_elapsed_rnd - jiffy_elapsed);
if (wait)
*wait = jiffy_wait;
- return 0;
+ return false;
}
/*
@@ -1024,7 +1022,7 @@ static bool tg_may_dispatch(struct throtl_grp *tg, struct bio *bio,
tg_with_in_iops_limit(tg, bio, &iops_wait)) {
if (wait)
*wait = 0;
- return 1;
+ return true;
}
max_wait = max(bps_wait, iops_wait);
@@ -1035,7 +1033,7 @@ static bool tg_may_dispatch(struct throtl_grp *tg, struct bio *bio,
if (time_before(tg->slice_end[rw], jiffies + max_wait))
throtl_extend_slice(tg, rw, jiffies + max_wait);
- return 0;
+ return false;
}
static void throtl_charge_bio(struct throtl_grp *tg, struct bio *bio)
@@ -1209,7 +1207,7 @@ static int throtl_select_dispatch(struct throtl_service_queue *parent_sq)
while (1) {
struct throtl_grp *tg = throtl_rb_first(parent_sq);
- struct throtl_service_queue *sq = &tg->service_queue;
+ struct throtl_service_queue *sq;
if (!tg)
break;
@@ -1221,6 +1219,7 @@ static int throtl_select_dispatch(struct throtl_service_queue *parent_sq)
nr_disp += throtl_dispatch_tg(tg);
+ sq = &tg->service_queue;
if (sq->nr_queued[0] || sq->nr_queued[1])
tg_update_disptime(tg);
@@ -2139,7 +2138,7 @@ static void blk_throtl_assoc_bio(struct throtl_grp *tg, struct bio *bio)
bio->bi_cg_private = tg;
blkg_get(tg_to_blkg(tg));
}
- blk_stat_set_issue(&bio->bi_issue_stat, bio_sectors(bio));
+ bio_issue_init(&bio->bi_issue, bio_sectors(bio));
#endif
}
@@ -2251,7 +2250,7 @@ out:
#ifdef CONFIG_BLK_DEV_THROTTLING_LOW
if (throttled || !td->track_bio_latency)
- bio->bi_issue_stat.stat |= SKIP_LATENCY;
+ bio->bi_issue.value |= BIO_ISSUE_THROTL_SKIP_LATENCY;
#endif
return throttled;
}
@@ -2281,8 +2280,7 @@ void blk_throtl_stat_add(struct request *rq, u64 time_ns)
struct request_queue *q = rq->q;
struct throtl_data *td = q->td;
- throtl_track_latency(td, blk_stat_size(&rq->issue_stat),
- req_op(rq), time_ns >> 10);
+ throtl_track_latency(td, rq->throtl_size, req_op(rq), time_ns >> 10);
}
void blk_throtl_bio_endio(struct bio *bio)
@@ -2302,8 +2300,8 @@ void blk_throtl_bio_endio(struct bio *bio)
finish_time_ns = ktime_get_ns();
tg->last_finish_time = finish_time_ns >> 10;
- start_time = blk_stat_time(&bio->bi_issue_stat) >> 10;
- finish_time = __blk_stat_time(finish_time_ns) >> 10;
+ start_time = bio_issue_time(&bio->bi_issue) >> 10;
+ finish_time = __bio_issue_time(finish_time_ns) >> 10;
if (!start_time || finish_time <= start_time) {
blkg_put(tg_to_blkg(tg));
return;
@@ -2311,16 +2309,15 @@ void blk_throtl_bio_endio(struct bio *bio)
lat = finish_time - start_time;
/* this is only for bio based driver */
- if (!(bio->bi_issue_stat.stat & SKIP_LATENCY))
- throtl_track_latency(tg->td, blk_stat_size(&bio->bi_issue_stat),
- bio_op(bio), lat);
+ if (!(bio->bi_issue.value & BIO_ISSUE_THROTL_SKIP_LATENCY))
+ throtl_track_latency(tg->td, bio_issue_size(&bio->bi_issue),
+ bio_op(bio), lat);
if (tg->latency_target && lat >= tg->td->filtered_latency) {
int bucket;
unsigned int threshold;
- bucket = request_bucket_index(
- blk_stat_size(&bio->bi_issue_stat));
+ bucket = request_bucket_index(bio_issue_size(&bio->bi_issue));
threshold = tg->td->avg_buckets[rw][bucket].latency +
tg->latency_target;
if (lat > threshold)
diff --git a/block/blk-timeout.c b/block/blk-timeout.c
index 652d4d4d3e97..4b8a48d48ba1 100644
--- a/block/blk-timeout.c
+++ b/block/blk-timeout.c
@@ -86,14 +86,11 @@ static void blk_rq_timed_out(struct request *req)
if (q->rq_timed_out_fn)
ret = q->rq_timed_out_fn(req);
switch (ret) {
- case BLK_EH_HANDLED:
- __blk_complete_request(req);
- break;
case BLK_EH_RESET_TIMER:
blk_add_timer(req);
blk_clear_rq_complete(req);
break;
- case BLK_EH_NOT_HANDLED:
+ case BLK_EH_DONE:
/*
* LLD handles this for now but in the future
* we can send a request msg to abort the command
@@ -214,7 +211,6 @@ void blk_add_timer(struct request *req)
req->timeout = q->rq_timeout;
blk_rq_set_deadline(req, jiffies + req->timeout);
- req->rq_flags &= ~RQF_MQ_TIMEOUT_EXPIRED;
/*
* Only the non-mq case needs to add the request to a protected list.
diff --git a/block/blk-wbt.c b/block/blk-wbt.c
index f92fc84b5e2c..4f89b28fa652 100644
--- a/block/blk-wbt.c
+++ b/block/blk-wbt.c
@@ -29,6 +29,26 @@
#define CREATE_TRACE_POINTS
#include <trace/events/wbt.h>
+static inline void wbt_clear_state(struct request *rq)
+{
+ rq->wbt_flags = 0;
+}
+
+static inline enum wbt_flags wbt_flags(struct request *rq)
+{
+ return rq->wbt_flags;
+}
+
+static inline bool wbt_is_tracked(struct request *rq)
+{
+ return rq->wbt_flags & WBT_TRACKED;
+}
+
+static inline bool wbt_is_read(struct request *rq)
+{
+ return rq->wbt_flags & WBT_READ;
+}
+
enum {
/*
* Default setting, we'll scale up (to 75% of QD max) or down (min 1)
@@ -101,9 +121,15 @@ static bool wb_recent_wait(struct rq_wb *rwb)
return time_before(jiffies, wb->dirty_sleep + HZ);
}
-static inline struct rq_wait *get_rq_wait(struct rq_wb *rwb, bool is_kswapd)
+static inline struct rq_wait *get_rq_wait(struct rq_wb *rwb,
+ enum wbt_flags wb_acct)
{
- return &rwb->rq_wait[is_kswapd];
+ if (wb_acct & WBT_KSWAPD)
+ return &rwb->rq_wait[WBT_RWQ_KSWAPD];
+ else if (wb_acct & WBT_DISCARD)
+ return &rwb->rq_wait[WBT_RWQ_DISCARD];
+
+ return &rwb->rq_wait[WBT_RWQ_BG];
}
static void rwb_wake_all(struct rq_wb *rwb)
@@ -126,7 +152,7 @@ void __wbt_done(struct rq_wb *rwb, enum wbt_flags wb_acct)
if (!(wb_acct & WBT_TRACKED))
return;
- rqw = get_rq_wait(rwb, wb_acct & WBT_KSWAPD);
+ rqw = get_rq_wait(rwb, wb_acct);
inflight = atomic_dec_return(&rqw->inflight);
/*
@@ -139,10 +165,13 @@ void __wbt_done(struct rq_wb *rwb, enum wbt_flags wb_acct)
}
/*
- * If the device does write back caching, drop further down
- * before we wake people up.
+ * For discards, our limit is always the background. For writes, if
+ * the device does write back caching, drop further down before we
+ * wake people up.
*/
- if (rwb->wc && !wb_recent_wait(rwb))
+ if (wb_acct & WBT_DISCARD)
+ limit = rwb->wb_background;
+ else if (rwb->wc && !wb_recent_wait(rwb))
limit = 0;
else
limit = rwb->wb_normal;
@@ -165,24 +194,24 @@ void __wbt_done(struct rq_wb *rwb, enum wbt_flags wb_acct)
* Called on completion of a request. Note that it's also called when
* a request is merged, when the request gets freed.
*/
-void wbt_done(struct rq_wb *rwb, struct blk_issue_stat *stat)
+void wbt_done(struct rq_wb *rwb, struct request *rq)
{
if (!rwb)
return;
- if (!wbt_is_tracked(stat)) {
- if (rwb->sync_cookie == stat) {
+ if (!wbt_is_tracked(rq)) {
+ if (rwb->sync_cookie == rq) {
rwb->sync_issue = 0;
rwb->sync_cookie = NULL;
}
- if (wbt_is_read(stat))
+ if (wbt_is_read(rq))
wb_timestamp(rwb, &rwb->last_comp);
} else {
- WARN_ON_ONCE(stat == rwb->sync_cookie);
- __wbt_done(rwb, wbt_stat_to_mask(stat));
+ WARN_ON_ONCE(rq == rwb->sync_cookie);
+ __wbt_done(rwb, wbt_flags(rq));
}
- wbt_clear_state(stat);
+ wbt_clear_state(rq);
}
/*
@@ -479,6 +508,9 @@ static inline unsigned int get_limit(struct rq_wb *rwb, unsigned long rw)
{
unsigned int limit;
+ if ((rw & REQ_OP_MASK) == REQ_OP_DISCARD)
+ return rwb->wb_background;
+
/*
* At this point we know it's a buffered write. If this is
* kswapd trying to free memory, or REQ_SYNC is set, then
@@ -529,11 +561,12 @@ static inline bool may_queue(struct rq_wb *rwb, struct rq_wait *rqw,
* Block if we will exceed our limit, or if we are currently waiting for
* the timer to kick off queuing again.
*/
-static void __wbt_wait(struct rq_wb *rwb, unsigned long rw, spinlock_t *lock)
+static void __wbt_wait(struct rq_wb *rwb, enum wbt_flags wb_acct,
+ unsigned long rw, spinlock_t *lock)
__releases(lock)
__acquires(lock)
{
- struct rq_wait *rqw = get_rq_wait(rwb, current_is_kswapd());
+ struct rq_wait *rqw = get_rq_wait(rwb, wb_acct);
DEFINE_WAIT(wait);
if (may_queue(rwb, rqw, &wait, rw))
@@ -559,21 +592,20 @@ static void __wbt_wait(struct rq_wb *rwb, unsigned long rw, spinlock_t *lock)
static inline bool wbt_should_throttle(struct rq_wb *rwb, struct bio *bio)
{
- const int op = bio_op(bio);
-
- /*
- * If not a WRITE, do nothing
- */
- if (op != REQ_OP_WRITE)
- return false;
-
- /*
- * Don't throttle WRITE_ODIRECT
- */
- if ((bio->bi_opf & (REQ_SYNC | REQ_IDLE)) == (REQ_SYNC | REQ_IDLE))
+ switch (bio_op(bio)) {
+ case REQ_OP_WRITE:
+ /*
+ * Don't throttle WRITE_ODIRECT
+ */
+ if ((bio->bi_opf & (REQ_SYNC | REQ_IDLE)) ==
+ (REQ_SYNC | REQ_IDLE))
+ return false;
+ /* fallthrough */
+ case REQ_OP_DISCARD:
+ return true;
+ default:
return false;
-
- return true;
+ }
}
/*
@@ -584,7 +616,7 @@ static inline bool wbt_should_throttle(struct rq_wb *rwb, struct bio *bio)
*/
enum wbt_flags wbt_wait(struct rq_wb *rwb, struct bio *bio, spinlock_t *lock)
{
- unsigned int ret = 0;
+ enum wbt_flags ret = 0;
if (!rwb_enabled(rwb))
return 0;
@@ -598,41 +630,42 @@ enum wbt_flags wbt_wait(struct rq_wb *rwb, struct bio *bio, spinlock_t *lock)
return ret;
}
- __wbt_wait(rwb, bio->bi_opf, lock);
+ if (current_is_kswapd())
+ ret |= WBT_KSWAPD;
+ if (bio_op(bio) == REQ_OP_DISCARD)
+ ret |= WBT_DISCARD;
+
+ __wbt_wait(rwb, ret, bio->bi_opf, lock);
if (!blk_stat_is_active(rwb->cb))
rwb_arm_timer(rwb);
- if (current_is_kswapd())
- ret |= WBT_KSWAPD;
-
return ret | WBT_TRACKED;
}
-void wbt_issue(struct rq_wb *rwb, struct blk_issue_stat *stat)
+void wbt_issue(struct rq_wb *rwb, struct request *rq)
{
if (!rwb_enabled(rwb))
return;
/*
- * Track sync issue, in case it takes a long time to complete. Allows
- * us to react quicker, if a sync IO takes a long time to complete.
- * Note that this is just a hint. 'stat' can go away when the
- * request completes, so it's important we never dereference it. We
- * only use the address to compare with, which is why we store the
- * sync_issue time locally.
+ * Track sync issue, in case it takes a long time to complete. Allows us
+ * to react quicker, if a sync IO takes a long time to complete. Note
+ * that this is just a hint. The request can go away when it completes,
+ * so it's important we never dereference it. We only use the address to
+ * compare with, which is why we store the sync_issue time locally.
*/
- if (wbt_is_read(stat) && !rwb->sync_issue) {
- rwb->sync_cookie = stat;
- rwb->sync_issue = blk_stat_time(stat);
+ if (wbt_is_read(rq) && !rwb->sync_issue) {
+ rwb->sync_cookie = rq;
+ rwb->sync_issue = rq->io_start_time_ns;
}
}
-void wbt_requeue(struct rq_wb *rwb, struct blk_issue_stat *stat)
+void wbt_requeue(struct rq_wb *rwb, struct request *rq)
{
if (!rwb_enabled(rwb))
return;
- if (stat == rwb->sync_cookie) {
+ if (rq == rwb->sync_cookie) {
rwb->sync_issue = 0;
rwb->sync_cookie = NULL;
}
@@ -701,7 +734,7 @@ static int wbt_data_dir(const struct request *rq)
if (op == REQ_OP_READ)
return READ;
- else if (op == REQ_OP_WRITE || op == REQ_OP_FLUSH)
+ else if (op_is_write(op))
return WRITE;
/* don't account */
@@ -713,8 +746,6 @@ int wbt_init(struct request_queue *q)
struct rq_wb *rwb;
int i;
- BUILD_BUG_ON(WBT_NR_BITS > BLK_STAT_RES_BITS);
-
rwb = kzalloc(sizeof(*rwb), GFP_KERNEL);
if (!rwb)
return -ENOMEM;
diff --git a/block/blk-wbt.h b/block/blk-wbt.h
index a232c98fbf4d..300df531d0a6 100644
--- a/block/blk-wbt.h
+++ b/block/blk-wbt.h
@@ -14,12 +14,16 @@ enum wbt_flags {
WBT_TRACKED = 1, /* write, tracked for throttling */
WBT_READ = 2, /* read */
WBT_KSWAPD = 4, /* write, from kswapd */
+ WBT_DISCARD = 8, /* discard */
- WBT_NR_BITS = 3, /* number of bits */
+ WBT_NR_BITS = 4, /* number of bits */
};
enum {
- WBT_NUM_RWQ = 2,
+ WBT_RWQ_BG = 0,
+ WBT_RWQ_KSWAPD,
+ WBT_RWQ_DISCARD,
+ WBT_NUM_RWQ,
};
/*
@@ -31,31 +35,6 @@ enum {
WBT_STATE_ON_MANUAL = 2,
};
-static inline void wbt_clear_state(struct blk_issue_stat *stat)
-{
- stat->stat &= ~BLK_STAT_RES_MASK;
-}
-
-static inline enum wbt_flags wbt_stat_to_mask(struct blk_issue_stat *stat)
-{
- return (stat->stat & BLK_STAT_RES_MASK) >> BLK_STAT_RES_SHIFT;
-}
-
-static inline void wbt_track(struct blk_issue_stat *stat, enum wbt_flags wb_acct)
-{
- stat->stat |= ((u64) wb_acct) << BLK_STAT_RES_SHIFT;
-}
-
-static inline bool wbt_is_tracked(struct blk_issue_stat *stat)
-{
- return (stat->stat >> BLK_STAT_RES_SHIFT) & WBT_TRACKED;
-}
-
-static inline bool wbt_is_read(struct blk_issue_stat *stat)
-{
- return (stat->stat >> BLK_STAT_RES_SHIFT) & WBT_READ;
-}
-
struct rq_wait {
wait_queue_head_t wait;
atomic_t inflight;
@@ -84,7 +63,7 @@ struct rq_wb {
struct blk_stat_callback *cb;
- s64 sync_issue;
+ u64 sync_issue;
void *sync_cookie;
unsigned int wc;
@@ -109,14 +88,19 @@ static inline unsigned int wbt_inflight(struct rq_wb *rwb)
#ifdef CONFIG_BLK_WBT
+static inline void wbt_track(struct request *rq, enum wbt_flags flags)
+{
+ rq->wbt_flags |= flags;
+}
+
void __wbt_done(struct rq_wb *, enum wbt_flags);
-void wbt_done(struct rq_wb *, struct blk_issue_stat *);
+void wbt_done(struct rq_wb *, struct request *);
enum wbt_flags wbt_wait(struct rq_wb *, struct bio *, spinlock_t *);
int wbt_init(struct request_queue *);
void wbt_exit(struct request_queue *);
void wbt_update_limits(struct rq_wb *);
-void wbt_requeue(struct rq_wb *, struct blk_issue_stat *);
-void wbt_issue(struct rq_wb *, struct blk_issue_stat *);
+void wbt_requeue(struct rq_wb *, struct request *);
+void wbt_issue(struct rq_wb *, struct request *);
void wbt_disable_default(struct request_queue *);
void wbt_enable_default(struct request_queue *);
@@ -127,10 +111,13 @@ u64 wbt_default_latency_nsec(struct request_queue *);
#else
+static inline void wbt_track(struct request *rq, enum wbt_flags flags)
+{
+}
static inline void __wbt_done(struct rq_wb *rwb, enum wbt_flags flags)
{
}
-static inline void wbt_done(struct rq_wb *rwb, struct blk_issue_stat *stat)
+static inline void wbt_done(struct rq_wb *rwb, struct request *rq)
{
}
static inline enum wbt_flags wbt_wait(struct rq_wb *rwb, struct bio *bio,
@@ -148,10 +135,10 @@ static inline void wbt_exit(struct request_queue *q)
static inline void wbt_update_limits(struct rq_wb *rwb)
{
}
-static inline void wbt_requeue(struct rq_wb *rwb, struct blk_issue_stat *stat)
+static inline void wbt_requeue(struct rq_wb *rwb, struct request *rq)
{
}
-static inline void wbt_issue(struct rq_wb *rwb, struct blk_issue_stat *stat)
+static inline void wbt_issue(struct rq_wb *rwb, struct request *rq)
{
}
static inline void wbt_disable_default(struct request_queue *q)
diff --git a/block/blk-zoned.c b/block/blk-zoned.c
index 08e84ef2bc05..3d08dc84db16 100644
--- a/block/blk-zoned.c
+++ b/block/blk-zoned.c
@@ -328,7 +328,11 @@ int blkdev_report_zones_ioctl(struct block_device *bdev, fmode_t mode,
if (!rep.nr_zones)
return -EINVAL;
- zones = kcalloc(rep.nr_zones, sizeof(struct blk_zone), GFP_KERNEL);
+ if (rep.nr_zones > INT_MAX / sizeof(struct blk_zone))
+ return -ERANGE;
+
+ zones = kvmalloc(rep.nr_zones * sizeof(struct blk_zone),
+ GFP_KERNEL | __GFP_ZERO);
if (!zones)
return -ENOMEM;
@@ -350,7 +354,7 @@ int blkdev_report_zones_ioctl(struct block_device *bdev, fmode_t mode,
}
out:
- kfree(zones);
+ kvfree(zones);
return ret;
}
diff --git a/block/blk.h b/block/blk.h
index b034fd2460c4..8d23aea96ce9 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -186,7 +186,7 @@ unsigned int blk_plug_queued_count(struct request_queue *q);
void blk_account_io_start(struct request *req, bool new_io);
void blk_account_io_completion(struct request *req, unsigned int bytes);
-void blk_account_io_done(struct request *req);
+void blk_account_io_done(struct request *req, u64 now);
/*
* EH timer and IO completion will both attempt to 'grab' the request, make
@@ -231,6 +231,9 @@ static inline void elv_deactivate_rq(struct request_queue *q, struct request *rq
e->type->ops.sq.elevator_deactivate_req_fn(q, rq);
}
+int elevator_init(struct request_queue *);
+int elevator_init_mq(struct request_queue *q);
+void elevator_exit(struct request_queue *, struct elevator_queue *);
int elv_register_queue(struct request_queue *q);
void elv_unregister_queue(struct request_queue *q);
diff --git a/block/bounce.c b/block/bounce.c
index dd0b93f2a871..fd31347b7836 100644
--- a/block/bounce.c
+++ b/block/bounce.c
@@ -28,28 +28,29 @@
#define POOL_SIZE 64
#define ISA_POOL_SIZE 16
-static struct bio_set *bounce_bio_set, *bounce_bio_split;
-static mempool_t *page_pool, *isa_page_pool;
+static struct bio_set bounce_bio_set, bounce_bio_split;
+static mempool_t page_pool, isa_page_pool;
#if defined(CONFIG_HIGHMEM)
static __init int init_emergency_pool(void)
{
+ int ret;
#if defined(CONFIG_HIGHMEM) && !defined(CONFIG_MEMORY_HOTPLUG)
if (max_pfn <= max_low_pfn)
return 0;
#endif
- page_pool = mempool_create_page_pool(POOL_SIZE, 0);
- BUG_ON(!page_pool);
+ ret = mempool_init_page_pool(&page_pool, POOL_SIZE, 0);
+ BUG_ON(ret);
pr_info("pool size: %d pages\n", POOL_SIZE);
- bounce_bio_set = bioset_create(BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
- BUG_ON(!bounce_bio_set);
- if (bioset_integrity_create(bounce_bio_set, BIO_POOL_SIZE))
+ ret = bioset_init(&bounce_bio_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
+ BUG_ON(ret);
+ if (bioset_integrity_create(&bounce_bio_set, BIO_POOL_SIZE))
BUG_ON(1);
- bounce_bio_split = bioset_create(BIO_POOL_SIZE, 0, 0);
- BUG_ON(!bounce_bio_split);
+ ret = bioset_init(&bounce_bio_split, BIO_POOL_SIZE, 0, 0);
+ BUG_ON(ret);
return 0;
}
@@ -63,14 +64,11 @@ __initcall(init_emergency_pool);
*/
static void bounce_copy_vec(struct bio_vec *to, unsigned char *vfrom)
{
- unsigned long flags;
unsigned char *vto;
- local_irq_save(flags);
vto = kmap_atomic(to->bv_page);
memcpy(vto + to->bv_offset, vfrom, to->bv_len);
kunmap_atomic(vto);
- local_irq_restore(flags);
}
#else /* CONFIG_HIGHMEM */
@@ -94,12 +92,14 @@ static void *mempool_alloc_pages_isa(gfp_t gfp_mask, void *data)
*/
int init_emergency_isa_pool(void)
{
- if (isa_page_pool)
+ int ret;
+
+ if (mempool_initialized(&isa_page_pool))
return 0;
- isa_page_pool = mempool_create(ISA_POOL_SIZE, mempool_alloc_pages_isa,
- mempool_free_pages, (void *) 0);
- BUG_ON(!isa_page_pool);
+ ret = mempool_init(&isa_page_pool, ISA_POOL_SIZE, mempool_alloc_pages_isa,
+ mempool_free_pages, (void *) 0);
+ BUG_ON(ret);
pr_info("isa pool size: %d pages\n", ISA_POOL_SIZE);
return 0;
@@ -166,13 +166,13 @@ static void bounce_end_io(struct bio *bio, mempool_t *pool)
static void bounce_end_io_write(struct bio *bio)
{
- bounce_end_io(bio, page_pool);
+ bounce_end_io(bio, &page_pool);
}
static void bounce_end_io_write_isa(struct bio *bio)
{
- bounce_end_io(bio, isa_page_pool);
+ bounce_end_io(bio, &isa_page_pool);
}
static void __bounce_end_io_read(struct bio *bio, mempool_t *pool)
@@ -187,12 +187,12 @@ static void __bounce_end_io_read(struct bio *bio, mempool_t *pool)
static void bounce_end_io_read(struct bio *bio)
{
- __bounce_end_io_read(bio, page_pool);
+ __bounce_end_io_read(bio, &page_pool);
}
static void bounce_end_io_read_isa(struct bio *bio)
{
- __bounce_end_io_read(bio, isa_page_pool);
+ __bounce_end_io_read(bio, &isa_page_pool);
}
static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig,
@@ -217,13 +217,13 @@ static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig,
return;
if (!passthrough && sectors < bio_sectors(*bio_orig)) {
- bio = bio_split(*bio_orig, sectors, GFP_NOIO, bounce_bio_split);
+ bio = bio_split(*bio_orig, sectors, GFP_NOIO, &bounce_bio_split);
bio_chain(bio, *bio_orig);
generic_make_request(*bio_orig);
*bio_orig = bio;
}
bio = bio_clone_bioset(*bio_orig, GFP_NOIO, passthrough ? NULL :
- bounce_bio_set);
+ &bounce_bio_set);
bio_for_each_segment_all(to, bio, i) {
struct page *page = to->bv_page;
@@ -250,7 +250,7 @@ static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig,
bio->bi_flags |= (1 << BIO_BOUNCED);
- if (pool == page_pool) {
+ if (pool == &page_pool) {
bio->bi_end_io = bounce_end_io_write;
if (rw == READ)
bio->bi_end_io = bounce_end_io_read;
@@ -282,10 +282,10 @@ void blk_queue_bounce(struct request_queue *q, struct bio **bio_orig)
if (!(q->bounce_gfp & GFP_DMA)) {
if (q->limits.bounce_pfn >= blk_max_pfn)
return;
- pool = page_pool;
+ pool = &page_pool;
} else {
- BUG_ON(!isa_page_pool);
- pool = isa_page_pool;
+ BUG_ON(!mempool_initialized(&isa_page_pool));
+ pool = &isa_page_pool;
}
/*
diff --git a/block/bsg-lib.c b/block/bsg-lib.c
index fc2e5ff2c4b9..9419def8c017 100644
--- a/block/bsg-lib.c
+++ b/block/bsg-lib.c
@@ -303,11 +303,9 @@ static void bsg_exit_rq(struct request_queue *q, struct request *req)
* @name: device to give bsg device
* @job_fn: bsg job handler
* @dd_job_size: size of LLD data needed for each job
- * @release: @dev release function
*/
struct request_queue *bsg_setup_queue(struct device *dev, const char *name,
- bsg_job_fn *job_fn, int dd_job_size,
- void (*release)(struct device *))
+ bsg_job_fn *job_fn, int dd_job_size)
{
struct request_queue *q;
int ret;
@@ -331,7 +329,7 @@ struct request_queue *bsg_setup_queue(struct device *dev, const char *name,
blk_queue_softirq_done(q, bsg_softirq_done);
blk_queue_rq_timeout(q, BLK_DEFAULT_SG_TIMEOUT);
- ret = bsg_register_queue(q, dev, name, &bsg_transport_ops, release);
+ ret = bsg_register_queue(q, dev, name, &bsg_transport_ops);
if (ret) {
printk(KERN_ERR "%s: bsg interface failed to "
"initialize - register queue\n", dev->kobj.name);
diff --git a/block/bsg.c b/block/bsg.c
index defa06c11858..132e657e2d91 100644
--- a/block/bsg.c
+++ b/block/bsg.c
@@ -226,8 +226,7 @@ bsg_map_hdr(struct request_queue *q, struct sg_io_v4 *hdr, fmode_t mode)
return ERR_PTR(ret);
rq = blk_get_request(q, hdr->dout_xfer_len ?
- REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN,
- GFP_KERNEL);
+ REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN, 0);
if (IS_ERR(rq))
return rq;
@@ -249,7 +248,7 @@ bsg_map_hdr(struct request_queue *q, struct sg_io_v4 *hdr, fmode_t mode)
goto out;
}
- next_rq = blk_get_request(q, REQ_OP_SCSI_IN, GFP_KERNEL);
+ next_rq = blk_get_request(q, REQ_OP_SCSI_IN, 0);
if (IS_ERR(next_rq)) {
ret = PTR_ERR(next_rq);
goto out;
@@ -650,18 +649,6 @@ static struct bsg_device *bsg_alloc_device(void)
return bd;
}
-static void bsg_kref_release_function(struct kref *kref)
-{
- struct bsg_class_device *bcd =
- container_of(kref, struct bsg_class_device, ref);
- struct device *parent = bcd->parent;
-
- if (bcd->release)
- bcd->release(bcd->parent);
-
- put_device(parent);
-}
-
static int bsg_put_device(struct bsg_device *bd)
{
int ret = 0, do_free;
@@ -694,7 +681,6 @@ static int bsg_put_device(struct bsg_device *bd)
kfree(bd);
out:
- kref_put(&q->bsg_dev.ref, bsg_kref_release_function);
if (do_free)
blk_put_queue(q);
return ret;
@@ -760,8 +746,6 @@ static struct bsg_device *bsg_get_device(struct inode *inode, struct file *file)
*/
mutex_lock(&bsg_mutex);
bcd = idr_find(&bsg_minor_idr, iminor(inode));
- if (bcd)
- kref_get(&bcd->ref);
mutex_unlock(&bsg_mutex);
if (!bcd)
@@ -772,8 +756,6 @@ static struct bsg_device *bsg_get_device(struct inode *inode, struct file *file)
return bd;
bd = bsg_add_device(inode, bcd->queue, file);
- if (IS_ERR(bd))
- kref_put(&bcd->ref, bsg_kref_release_function);
return bd;
}
@@ -913,25 +895,17 @@ void bsg_unregister_queue(struct request_queue *q)
sysfs_remove_link(&q->kobj, "bsg");
device_unregister(bcd->class_dev);
bcd->class_dev = NULL;
- kref_put(&bcd->ref, bsg_kref_release_function);
mutex_unlock(&bsg_mutex);
}
EXPORT_SYMBOL_GPL(bsg_unregister_queue);
int bsg_register_queue(struct request_queue *q, struct device *parent,
- const char *name, const struct bsg_ops *ops,
- void (*release)(struct device *))
+ const char *name, const struct bsg_ops *ops)
{
struct bsg_class_device *bcd;
dev_t dev;
int ret;
struct device *class_dev = NULL;
- const char *devname;
-
- if (name)
- devname = name;
- else
- devname = dev_name(parent);
/*
* we need a proper transport to send commands, not a stacked device
@@ -955,15 +929,12 @@ int bsg_register_queue(struct request_queue *q, struct device *parent,
bcd->minor = ret;
bcd->queue = q;
- bcd->parent = get_device(parent);
- bcd->release = release;
bcd->ops = ops;
- kref_init(&bcd->ref);
dev = MKDEV(bsg_major, bcd->minor);
- class_dev = device_create(bsg_class, parent, dev, NULL, "%s", devname);
+ class_dev = device_create(bsg_class, parent, dev, NULL, "%s", name);
if (IS_ERR(class_dev)) {
ret = PTR_ERR(class_dev);
- goto put_dev;
+ goto idr_remove;
}
bcd->class_dev = class_dev;
@@ -978,8 +949,7 @@ int bsg_register_queue(struct request_queue *q, struct device *parent,
unregister_class_dev:
device_unregister(class_dev);
-put_dev:
- put_device(parent);
+idr_remove:
idr_remove(&bsg_minor_idr, bcd->minor);
unlock:
mutex_unlock(&bsg_mutex);
@@ -993,7 +963,7 @@ int bsg_scsi_register_queue(struct request_queue *q, struct device *parent)
return -EINVAL;
}
- return bsg_register_queue(q, parent, NULL, &bsg_scsi_ops, NULL);
+ return bsg_register_queue(q, parent, dev_name(parent), &bsg_scsi_ops);
}
EXPORT_SYMBOL_GPL(bsg_scsi_register_queue);
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 9f342ef1ad42..82b6c27b3245 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -210,9 +210,9 @@ struct cfqg_stats {
/* total time with empty current active q with other requests queued */
struct blkg_stat empty_time;
/* fields after this shouldn't be cleared on stat reset */
- uint64_t start_group_wait_time;
- uint64_t start_idle_time;
- uint64_t start_empty_time;
+ u64 start_group_wait_time;
+ u64 start_idle_time;
+ u64 start_empty_time;
uint16_t flags;
#endif /* CONFIG_DEBUG_BLK_CGROUP */
#endif /* CONFIG_CFQ_GROUP_IOSCHED */
@@ -491,13 +491,13 @@ CFQG_FLAG_FNS(empty)
/* This should be called with the queue_lock held. */
static void cfqg_stats_update_group_wait_time(struct cfqg_stats *stats)
{
- unsigned long long now;
+ u64 now;
if (!cfqg_stats_waiting(stats))
return;
- now = sched_clock();
- if (time_after64(now, stats->start_group_wait_time))
+ now = ktime_get_ns();
+ if (now > stats->start_group_wait_time)
blkg_stat_add(&stats->group_wait_time,
now - stats->start_group_wait_time);
cfqg_stats_clear_waiting(stats);
@@ -513,20 +513,20 @@ static void cfqg_stats_set_start_group_wait_time(struct cfq_group *cfqg,
return;
if (cfqg == curr_cfqg)
return;
- stats->start_group_wait_time = sched_clock();
+ stats->start_group_wait_time = ktime_get_ns();
cfqg_stats_mark_waiting(stats);
}
/* This should be called with the queue_lock held. */
static void cfqg_stats_end_empty_time(struct cfqg_stats *stats)
{
- unsigned long long now;
+ u64 now;
if (!cfqg_stats_empty(stats))
return;
- now = sched_clock();
- if (time_after64(now, stats->start_empty_time))
+ now = ktime_get_ns();
+ if (now > stats->start_empty_time)
blkg_stat_add(&stats->empty_time,
now - stats->start_empty_time);
cfqg_stats_clear_empty(stats);
@@ -552,7 +552,7 @@ static void cfqg_stats_set_start_empty_time(struct cfq_group *cfqg)
if (cfqg_stats_empty(stats))
return;
- stats->start_empty_time = sched_clock();
+ stats->start_empty_time = ktime_get_ns();
cfqg_stats_mark_empty(stats);
}
@@ -561,9 +561,9 @@ static void cfqg_stats_update_idle_time(struct cfq_group *cfqg)
struct cfqg_stats *stats = &cfqg->stats;
if (cfqg_stats_idling(stats)) {
- unsigned long long now = sched_clock();
+ u64 now = ktime_get_ns();
- if (time_after64(now, stats->start_idle_time))
+ if (now > stats->start_idle_time)
blkg_stat_add(&stats->idle_time,
now - stats->start_idle_time);
cfqg_stats_clear_idling(stats);
@@ -576,7 +576,7 @@ static void cfqg_stats_set_start_idle_time(struct cfq_group *cfqg)
BUG_ON(cfqg_stats_idling(stats));
- stats->start_idle_time = sched_clock();
+ stats->start_idle_time = ktime_get_ns();
cfqg_stats_mark_idling(stats);
}
@@ -701,17 +701,19 @@ static inline void cfqg_stats_update_io_merged(struct cfq_group *cfqg,
}
static inline void cfqg_stats_update_completion(struct cfq_group *cfqg,
- uint64_t start_time, uint64_t io_start_time,
- unsigned int op)
+ u64 start_time_ns,
+ u64 io_start_time_ns,
+ unsigned int op)
{
struct cfqg_stats *stats = &cfqg->stats;
- unsigned long long now = sched_clock();
+ u64 now = ktime_get_ns();
- if (time_after64(now, io_start_time))
- blkg_rwstat_add(&stats->service_time, op, now - io_start_time);
- if (time_after64(io_start_time, start_time))
+ if (now > io_start_time_ns)
+ blkg_rwstat_add(&stats->service_time, op,
+ now - io_start_time_ns);
+ if (io_start_time_ns > start_time_ns)
blkg_rwstat_add(&stats->wait_time, op,
- io_start_time - start_time);
+ io_start_time_ns - start_time_ns);
}
/* @stats = 0 */
@@ -797,8 +799,9 @@ static inline void cfqg_stats_update_io_remove(struct cfq_group *cfqg,
static inline void cfqg_stats_update_io_merged(struct cfq_group *cfqg,
unsigned int op) { }
static inline void cfqg_stats_update_completion(struct cfq_group *cfqg,
- uint64_t start_time, uint64_t io_start_time,
- unsigned int op) { }
+ u64 start_time_ns,
+ u64 io_start_time_ns,
+ unsigned int op) { }
#endif /* CONFIG_CFQ_GROUP_IOSCHED */
@@ -4225,8 +4228,8 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq)
cfqd->rq_in_driver--;
cfqq->dispatched--;
(RQ_CFQG(rq))->dispatched--;
- cfqg_stats_update_completion(cfqq->cfqg, rq_start_time_ns(rq),
- rq_io_start_time_ns(rq), rq->cmd_flags);
+ cfqg_stats_update_completion(cfqq->cfqg, rq->start_time_ns,
+ rq->io_start_time_ns, rq->cmd_flags);
cfqd->rq_in_flight[cfq_cfqq_sync(cfqq)]--;
@@ -4242,16 +4245,7 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq)
cfqq_type(cfqq));
st->ttime.last_end_request = now;
- /*
- * We have to do this check in jiffies since start_time is in
- * jiffies and it is not trivial to convert to ns. If
- * cfq_fifo_expire[1] ever comes close to 1 jiffie, this test
- * will become problematic but so far we are fine (the default
- * is 128 ms).
- */
- if (!time_after(rq->start_time +
- nsecs_to_jiffies(cfqd->cfq_fifo_expire[1]),
- jiffies))
+ if (rq->start_time_ns + cfqd->cfq_fifo_expire[1] <= now)
cfqd->last_delayed_sync = now;
}
@@ -4792,7 +4786,7 @@ USEC_STORE_FUNCTION(cfq_target_latency_us_store, &cfqd->cfq_target_latency, 1, U
#undef USEC_STORE_FUNCTION
#define CFQ_ATTR(name) \
- __ATTR(name, S_IRUGO|S_IWUSR, cfq_##name##_show, cfq_##name##_store)
+ __ATTR(name, 0644, cfq_##name##_show, cfq_##name##_store)
static struct elv_fs_entry cfq_attrs[] = {
CFQ_ATTR(quantum),
diff --git a/block/deadline-iosched.c b/block/deadline-iosched.c
index 9de9f156e203..ef2f1f09e9b3 100644
--- a/block/deadline-iosched.c
+++ b/block/deadline-iosched.c
@@ -512,8 +512,7 @@ STORE_FUNCTION(deadline_fifo_batch_store, &dd->fifo_batch, 0, INT_MAX, 0);
#undef STORE_FUNCTION
#define DD_ATTR(name) \
- __ATTR(name, S_IRUGO|S_IWUSR, deadline_##name##_show, \
- deadline_##name##_store)
+ __ATTR(name, 0644, deadline_##name##_show, deadline_##name##_store)
static struct elv_fs_entry deadline_attrs[] = {
DD_ATTR(read_expire),
diff --git a/block/elevator.c b/block/elevator.c
index e87e9b43aba0..fa828b5bfd4b 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -199,76 +199,46 @@ static void elevator_release(struct kobject *kobj)
kfree(e);
}
-int elevator_init(struct request_queue *q, char *name)
+/*
+ * Use the default elevator specified by config boot param for non-mq devices,
+ * or by config option. Don't try to load modules as we could be running off
+ * async and request_module() isn't allowed from async.
+ */
+int elevator_init(struct request_queue *q)
{
struct elevator_type *e = NULL;
- int err;
+ int err = 0;
/*
* q->sysfs_lock must be held to provide mutual exclusion between
* elevator_switch() and here.
*/
- lockdep_assert_held(&q->sysfs_lock);
-
+ mutex_lock(&q->sysfs_lock);
if (unlikely(q->elevator))
- return 0;
-
- INIT_LIST_HEAD(&q->queue_head);
- q->last_merge = NULL;
- q->end_sector = 0;
- q->boundary_rq = NULL;
-
- if (name) {
- e = elevator_get(q, name, true);
- if (!e)
- return -EINVAL;
- }
+ goto out_unlock;
- /*
- * Use the default elevator specified by config boot param for
- * non-mq devices, or by config option. Don't try to load modules
- * as we could be running off async and request_module() isn't
- * allowed from async.
- */
- if (!e && !q->mq_ops && *chosen_elevator) {
+ if (*chosen_elevator) {
e = elevator_get(q, chosen_elevator, false);
if (!e)
printk(KERN_ERR "I/O scheduler %s not found\n",
chosen_elevator);
}
+ if (!e)
+ e = elevator_get(q, CONFIG_DEFAULT_IOSCHED, false);
if (!e) {
- /*
- * For blk-mq devices, we default to using mq-deadline,
- * if available, for single queue devices. If deadline
- * isn't available OR we have multiple queues, default
- * to "none".
- */
- if (q->mq_ops) {
- if (q->nr_hw_queues == 1)
- e = elevator_get(q, "mq-deadline", false);
- if (!e)
- return 0;
- } else
- e = elevator_get(q, CONFIG_DEFAULT_IOSCHED, false);
-
- if (!e) {
- printk(KERN_ERR
- "Default I/O scheduler not found. " \
- "Using noop.\n");
- e = elevator_get(q, "noop", false);
- }
+ printk(KERN_ERR
+ "Default I/O scheduler not found. Using noop.\n");
+ e = elevator_get(q, "noop", false);
}
- if (e->uses_mq)
- err = blk_mq_init_sched(q, e);
- else
- err = e->ops.sq.elevator_init_fn(q, e);
+ err = e->ops.sq.elevator_init_fn(q, e);
if (err)
elevator_put(e);
+out_unlock:
+ mutex_unlock(&q->sysfs_lock);
return err;
}
-EXPORT_SYMBOL(elevator_init);
void elevator_exit(struct request_queue *q, struct elevator_queue *e)
{
@@ -281,7 +251,6 @@ void elevator_exit(struct request_queue *q, struct elevator_queue *e)
kobject_put(&e->kobj);
}
-EXPORT_SYMBOL(elevator_exit);
static inline void __elv_rqhash_del(struct request *rq)
{
@@ -1005,6 +974,40 @@ out:
}
/*
+ * For blk-mq devices, we default to using mq-deadline, if available, for single
+ * queue devices. If deadline isn't available OR we have multiple queues,
+ * default to "none".
+ */
+int elevator_init_mq(struct request_queue *q)
+{
+ struct elevator_type *e;
+ int err = 0;
+
+ if (q->nr_hw_queues != 1)
+ return 0;
+
+ /*
+ * q->sysfs_lock must be held to provide mutual exclusion between
+ * elevator_switch() and here.
+ */
+ mutex_lock(&q->sysfs_lock);
+ if (unlikely(q->elevator))
+ goto out_unlock;
+
+ e = elevator_get(q, "mq-deadline", false);
+ if (!e)
+ goto out_unlock;
+
+ err = blk_mq_init_sched(q, e);
+ if (err)
+ elevator_put(e);
+out_unlock:
+ mutex_unlock(&q->sysfs_lock);
+ return err;
+}
+
+
+/*
* switch to new_e io scheduler. be careful not to introduce deadlocks -
* we don't free the old io scheduler, before we have allocated what we
* need for the new one. this way we have a chance of going back to the old
diff --git a/block/genhd.c b/block/genhd.c
index dc7e089373b9..f1543a45e73b 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -82,6 +82,18 @@ void part_in_flight(struct request_queue *q, struct hd_struct *part,
}
}
+void part_in_flight_rw(struct request_queue *q, struct hd_struct *part,
+ unsigned int inflight[2])
+{
+ if (q->mq_ops) {
+ blk_mq_in_flight_rw(q, part, inflight);
+ return;
+ }
+
+ inflight[0] = atomic_read(&part->in_flight[0]);
+ inflight[1] = atomic_read(&part->in_flight[1]);
+}
+
struct hd_struct *__disk_get_part(struct gendisk *disk, int partno)
{
struct disk_part_tbl *ptbl = rcu_dereference(disk->part_tbl);
@@ -1015,18 +1027,6 @@ static const struct seq_operations partitions_op = {
.stop = disk_seqf_stop,
.show = show_partition
};
-
-static int partitions_open(struct inode *inode, struct file *file)
-{
- return seq_open(file, &partitions_op);
-}
-
-static const struct file_operations proc_partitions_operations = {
- .open = partitions_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
#endif
@@ -1127,28 +1127,25 @@ static ssize_t disk_discard_alignment_show(struct device *dev,
return sprintf(buf, "%d\n", queue_discard_alignment(disk->queue));
}
-static DEVICE_ATTR(range, S_IRUGO, disk_range_show, NULL);
-static DEVICE_ATTR(ext_range, S_IRUGO, disk_ext_range_show, NULL);
-static DEVICE_ATTR(removable, S_IRUGO, disk_removable_show, NULL);
-static DEVICE_ATTR(hidden, S_IRUGO, disk_hidden_show, NULL);
-static DEVICE_ATTR(ro, S_IRUGO, disk_ro_show, NULL);
-static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL);
-static DEVICE_ATTR(alignment_offset, S_IRUGO, disk_alignment_offset_show, NULL);
-static DEVICE_ATTR(discard_alignment, S_IRUGO, disk_discard_alignment_show,
- NULL);
-static DEVICE_ATTR(capability, S_IRUGO, disk_capability_show, NULL);
-static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL);
-static DEVICE_ATTR(inflight, S_IRUGO, part_inflight_show, NULL);
-static DEVICE_ATTR(badblocks, S_IRUGO | S_IWUSR, disk_badblocks_show,
- disk_badblocks_store);
+static DEVICE_ATTR(range, 0444, disk_range_show, NULL);
+static DEVICE_ATTR(ext_range, 0444, disk_ext_range_show, NULL);
+static DEVICE_ATTR(removable, 0444, disk_removable_show, NULL);
+static DEVICE_ATTR(hidden, 0444, disk_hidden_show, NULL);
+static DEVICE_ATTR(ro, 0444, disk_ro_show, NULL);
+static DEVICE_ATTR(size, 0444, part_size_show, NULL);
+static DEVICE_ATTR(alignment_offset, 0444, disk_alignment_offset_show, NULL);
+static DEVICE_ATTR(discard_alignment, 0444, disk_discard_alignment_show, NULL);
+static DEVICE_ATTR(capability, 0444, disk_capability_show, NULL);
+static DEVICE_ATTR(stat, 0444, part_stat_show, NULL);
+static DEVICE_ATTR(inflight, 0444, part_inflight_show, NULL);
+static DEVICE_ATTR(badblocks, 0644, disk_badblocks_show, disk_badblocks_store);
#ifdef CONFIG_FAIL_MAKE_REQUEST
static struct device_attribute dev_attr_fail =
- __ATTR(make-it-fail, S_IRUGO|S_IWUSR, part_fail_show, part_fail_store);
+ __ATTR(make-it-fail, 0644, part_fail_show, part_fail_store);
#endif
#ifdef CONFIG_FAIL_IO_TIMEOUT
static struct device_attribute dev_attr_fail_timeout =
- __ATTR(io-timeout-fail, S_IRUGO|S_IWUSR, part_timeout_show,
- part_timeout_store);
+ __ATTR(io-timeout-fail, 0644, part_timeout_show, part_timeout_store);
#endif
static struct attribute *disk_attrs[] = {
@@ -1365,22 +1362,10 @@ static const struct seq_operations diskstats_op = {
.show = diskstats_show
};
-static int diskstats_open(struct inode *inode, struct file *file)
-{
- return seq_open(file, &diskstats_op);
-}
-
-static const struct file_operations proc_diskstats_operations = {
- .open = diskstats_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
static int __init proc_genhd_init(void)
{
- proc_create("diskstats", 0, NULL, &proc_diskstats_operations);
- proc_create("partitions", 0, NULL, &proc_partitions_operations);
+ proc_create_seq("diskstats", 0, NULL, &diskstats_op);
+ proc_create_seq("partitions", 0, NULL, &partitions_op);
return 0;
}
module_init(proc_genhd_init);
@@ -1912,9 +1897,9 @@ static ssize_t disk_events_poll_msecs_store(struct device *dev,
return count;
}
-static const DEVICE_ATTR(events, S_IRUGO, disk_events_show, NULL);
-static const DEVICE_ATTR(events_async, S_IRUGO, disk_events_async_show, NULL);
-static const DEVICE_ATTR(events_poll_msecs, S_IRUGO|S_IWUSR,
+static const DEVICE_ATTR(events, 0444, disk_events_show, NULL);
+static const DEVICE_ATTR(events_async, 0444, disk_events_async_show, NULL);
+static const DEVICE_ATTR(events_poll_msecs, 0644,
disk_events_poll_msecs_show,
disk_events_poll_msecs_store);
diff --git a/block/kyber-iosched.c b/block/kyber-iosched.c
index 0d6d25e32e1f..a1660bafc912 100644
--- a/block/kyber-iosched.c
+++ b/block/kyber-iosched.c
@@ -72,6 +72,19 @@ static const unsigned int kyber_batch_size[] = {
[KYBER_OTHER] = 8,
};
+/*
+ * There is a same mapping between ctx & hctx and kcq & khd,
+ * we use request->mq_ctx->index_hw to index the kcq in khd.
+ */
+struct kyber_ctx_queue {
+ /*
+ * Used to ensure operations on rq_list and kcq_map to be an atmoic one.
+ * Also protect the rqs on rq_list when merge.
+ */
+ spinlock_t lock;
+ struct list_head rq_list[KYBER_NUM_DOMAINS];
+} ____cacheline_aligned_in_smp;
+
struct kyber_queue_data {
struct request_queue *q;
@@ -99,6 +112,8 @@ struct kyber_hctx_data {
struct list_head rqs[KYBER_NUM_DOMAINS];
unsigned int cur_domain;
unsigned int batching;
+ struct kyber_ctx_queue *kcqs;
+ struct sbitmap kcq_map[KYBER_NUM_DOMAINS];
wait_queue_entry_t domain_wait[KYBER_NUM_DOMAINS];
struct sbq_wait_state *domain_ws[KYBER_NUM_DOMAINS];
atomic_t wait_index[KYBER_NUM_DOMAINS];
@@ -107,10 +122,8 @@ struct kyber_hctx_data {
static int kyber_domain_wake(wait_queue_entry_t *wait, unsigned mode, int flags,
void *key);
-static int rq_sched_domain(const struct request *rq)
+static unsigned int kyber_sched_domain(unsigned int op)
{
- unsigned int op = rq->cmd_flags;
-
if ((op & REQ_OP_MASK) == REQ_OP_READ)
return KYBER_READ;
else if ((op & REQ_OP_MASK) == REQ_OP_WRITE && op_is_sync(op))
@@ -284,6 +297,11 @@ static unsigned int kyber_sched_tags_shift(struct kyber_queue_data *kqd)
return kqd->q->queue_hw_ctx[0]->sched_tags->bitmap_tags.sb.shift;
}
+static int kyber_bucket_fn(const struct request *rq)
+{
+ return kyber_sched_domain(rq->cmd_flags);
+}
+
static struct kyber_queue_data *kyber_queue_data_alloc(struct request_queue *q)
{
struct kyber_queue_data *kqd;
@@ -297,7 +315,7 @@ static struct kyber_queue_data *kyber_queue_data_alloc(struct request_queue *q)
goto err;
kqd->q = q;
- kqd->cb = blk_stat_alloc_callback(kyber_stat_timer_fn, rq_sched_domain,
+ kqd->cb = blk_stat_alloc_callback(kyber_stat_timer_fn, kyber_bucket_fn,
KYBER_NUM_DOMAINS, kqd);
if (!kqd->cb)
goto err_kqd;
@@ -376,8 +394,18 @@ static void kyber_exit_sched(struct elevator_queue *e)
kfree(kqd);
}
+static void kyber_ctx_queue_init(struct kyber_ctx_queue *kcq)
+{
+ unsigned int i;
+
+ spin_lock_init(&kcq->lock);
+ for (i = 0; i < KYBER_NUM_DOMAINS; i++)
+ INIT_LIST_HEAD(&kcq->rq_list[i]);
+}
+
static int kyber_init_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
{
+ struct kyber_queue_data *kqd = hctx->queue->elevator->elevator_data;
struct kyber_hctx_data *khd;
int i;
@@ -385,6 +413,24 @@ static int kyber_init_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
if (!khd)
return -ENOMEM;
+ khd->kcqs = kmalloc_array_node(hctx->nr_ctx,
+ sizeof(struct kyber_ctx_queue),
+ GFP_KERNEL, hctx->numa_node);
+ if (!khd->kcqs)
+ goto err_khd;
+
+ for (i = 0; i < hctx->nr_ctx; i++)
+ kyber_ctx_queue_init(&khd->kcqs[i]);
+
+ for (i = 0; i < KYBER_NUM_DOMAINS; i++) {
+ if (sbitmap_init_node(&khd->kcq_map[i], hctx->nr_ctx,
+ ilog2(8), GFP_KERNEL, hctx->numa_node)) {
+ while (--i >= 0)
+ sbitmap_free(&khd->kcq_map[i]);
+ goto err_kcqs;
+ }
+ }
+
spin_lock_init(&khd->lock);
for (i = 0; i < KYBER_NUM_DOMAINS; i++) {
@@ -400,12 +446,26 @@ static int kyber_init_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
khd->batching = 0;
hctx->sched_data = khd;
+ sbitmap_queue_min_shallow_depth(&hctx->sched_tags->bitmap_tags,
+ kqd->async_depth);
return 0;
+
+err_kcqs:
+ kfree(khd->kcqs);
+err_khd:
+ kfree(khd);
+ return -ENOMEM;
}
static void kyber_exit_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
{
+ struct kyber_hctx_data *khd = hctx->sched_data;
+ int i;
+
+ for (i = 0; i < KYBER_NUM_DOMAINS; i++)
+ sbitmap_free(&khd->kcq_map[i]);
+ kfree(khd->kcqs);
kfree(hctx->sched_data);
}
@@ -427,7 +487,7 @@ static void rq_clear_domain_token(struct kyber_queue_data *kqd,
nr = rq_get_domain_token(rq);
if (nr != -1) {
- sched_domain = rq_sched_domain(rq);
+ sched_domain = kyber_sched_domain(rq->cmd_flags);
sbitmap_queue_clear(&kqd->domain_tokens[sched_domain], nr,
rq->mq_ctx->cpu);
}
@@ -446,11 +506,51 @@ static void kyber_limit_depth(unsigned int op, struct blk_mq_alloc_data *data)
}
}
+static bool kyber_bio_merge(struct blk_mq_hw_ctx *hctx, struct bio *bio)
+{
+ struct kyber_hctx_data *khd = hctx->sched_data;
+ struct blk_mq_ctx *ctx = blk_mq_get_ctx(hctx->queue);
+ struct kyber_ctx_queue *kcq = &khd->kcqs[ctx->index_hw];
+ unsigned int sched_domain = kyber_sched_domain(bio->bi_opf);
+ struct list_head *rq_list = &kcq->rq_list[sched_domain];
+ bool merged;
+
+ spin_lock(&kcq->lock);
+ merged = blk_mq_bio_list_merge(hctx->queue, rq_list, bio);
+ spin_unlock(&kcq->lock);
+ blk_mq_put_ctx(ctx);
+
+ return merged;
+}
+
static void kyber_prepare_request(struct request *rq, struct bio *bio)
{
rq_set_domain_token(rq, -1);
}
+static void kyber_insert_requests(struct blk_mq_hw_ctx *hctx,
+ struct list_head *rq_list, bool at_head)
+{
+ struct kyber_hctx_data *khd = hctx->sched_data;
+ struct request *rq, *next;
+
+ list_for_each_entry_safe(rq, next, rq_list, queuelist) {
+ unsigned int sched_domain = kyber_sched_domain(rq->cmd_flags);
+ struct kyber_ctx_queue *kcq = &khd->kcqs[rq->mq_ctx->index_hw];
+ struct list_head *head = &kcq->rq_list[sched_domain];
+
+ spin_lock(&kcq->lock);
+ if (at_head)
+ list_move(&rq->queuelist, head);
+ else
+ list_move_tail(&rq->queuelist, head);
+ sbitmap_set_bit(&khd->kcq_map[sched_domain],
+ rq->mq_ctx->index_hw);
+ blk_mq_sched_request_inserted(rq);
+ spin_unlock(&kcq->lock);
+ }
+}
+
static void kyber_finish_request(struct request *rq)
{
struct kyber_queue_data *kqd = rq->q->elevator->elevator_data;
@@ -469,7 +569,7 @@ static void kyber_completed_request(struct request *rq)
* Check if this request met our latency goal. If not, quickly gather
* some statistics and start throttling.
*/
- sched_domain = rq_sched_domain(rq);
+ sched_domain = kyber_sched_domain(rq->cmd_flags);
switch (sched_domain) {
case KYBER_READ:
target = kqd->read_lat_nsec;
@@ -485,29 +585,48 @@ static void kyber_completed_request(struct request *rq)
if (blk_stat_is_active(kqd->cb))
return;
- now = __blk_stat_time(ktime_to_ns(ktime_get()));
- if (now < blk_stat_time(&rq->issue_stat))
+ now = ktime_get_ns();
+ if (now < rq->io_start_time_ns)
return;
- latency = now - blk_stat_time(&rq->issue_stat);
+ latency = now - rq->io_start_time_ns;
if (latency > target)
blk_stat_activate_msecs(kqd->cb, 10);
}
-static void kyber_flush_busy_ctxs(struct kyber_hctx_data *khd,
- struct blk_mq_hw_ctx *hctx)
+struct flush_kcq_data {
+ struct kyber_hctx_data *khd;
+ unsigned int sched_domain;
+ struct list_head *list;
+};
+
+static bool flush_busy_kcq(struct sbitmap *sb, unsigned int bitnr, void *data)
{
- LIST_HEAD(rq_list);
- struct request *rq, *next;
+ struct flush_kcq_data *flush_data = data;
+ struct kyber_ctx_queue *kcq = &flush_data->khd->kcqs[bitnr];
- blk_mq_flush_busy_ctxs(hctx, &rq_list);
- list_for_each_entry_safe(rq, next, &rq_list, queuelist) {
- unsigned int sched_domain;
+ spin_lock(&kcq->lock);
+ list_splice_tail_init(&kcq->rq_list[flush_data->sched_domain],
+ flush_data->list);
+ sbitmap_clear_bit(sb, bitnr);
+ spin_unlock(&kcq->lock);
- sched_domain = rq_sched_domain(rq);
- list_move_tail(&rq->queuelist, &khd->rqs[sched_domain]);
- }
+ return true;
+}
+
+static void kyber_flush_busy_kcqs(struct kyber_hctx_data *khd,
+ unsigned int sched_domain,
+ struct list_head *list)
+{
+ struct flush_kcq_data data = {
+ .khd = khd,
+ .sched_domain = sched_domain,
+ .list = list,
+ };
+
+ sbitmap_for_each_set(&khd->kcq_map[sched_domain],
+ flush_busy_kcq, &data);
}
static int kyber_domain_wake(wait_queue_entry_t *wait, unsigned mode, int flags,
@@ -570,26 +689,23 @@ static int kyber_get_domain_token(struct kyber_queue_data *kqd,
static struct request *
kyber_dispatch_cur_domain(struct kyber_queue_data *kqd,
struct kyber_hctx_data *khd,
- struct blk_mq_hw_ctx *hctx,
- bool *flushed)
+ struct blk_mq_hw_ctx *hctx)
{
struct list_head *rqs;
struct request *rq;
int nr;
rqs = &khd->rqs[khd->cur_domain];
- rq = list_first_entry_or_null(rqs, struct request, queuelist);
/*
- * If there wasn't already a pending request and we haven't flushed the
- * software queues yet, flush the software queues and check again.
+ * If we already have a flushed request, then we just need to get a
+ * token for it. Otherwise, if there are pending requests in the kcqs,
+ * flush the kcqs, but only if we can get a token. If not, we should
+ * leave the requests in the kcqs so that they can be merged. Note that
+ * khd->lock serializes the flushes, so if we observed any bit set in
+ * the kcq_map, we will always get a request.
*/
- if (!rq && !*flushed) {
- kyber_flush_busy_ctxs(khd, hctx);
- *flushed = true;
- rq = list_first_entry_or_null(rqs, struct request, queuelist);
- }
-
+ rq = list_first_entry_or_null(rqs, struct request, queuelist);
if (rq) {
nr = kyber_get_domain_token(kqd, khd, hctx);
if (nr >= 0) {
@@ -598,6 +714,16 @@ kyber_dispatch_cur_domain(struct kyber_queue_data *kqd,
list_del_init(&rq->queuelist);
return rq;
}
+ } else if (sbitmap_any_bit_set(&khd->kcq_map[khd->cur_domain])) {
+ nr = kyber_get_domain_token(kqd, khd, hctx);
+ if (nr >= 0) {
+ kyber_flush_busy_kcqs(khd, khd->cur_domain, rqs);
+ rq = list_first_entry(rqs, struct request, queuelist);
+ khd->batching++;
+ rq_set_domain_token(rq, nr);
+ list_del_init(&rq->queuelist);
+ return rq;
+ }
}
/* There were either no pending requests or no tokens. */
@@ -608,7 +734,6 @@ static struct request *kyber_dispatch_request(struct blk_mq_hw_ctx *hctx)
{
struct kyber_queue_data *kqd = hctx->queue->elevator->elevator_data;
struct kyber_hctx_data *khd = hctx->sched_data;
- bool flushed = false;
struct request *rq;
int i;
@@ -619,7 +744,7 @@ static struct request *kyber_dispatch_request(struct blk_mq_hw_ctx *hctx)
* from the batch.
*/
if (khd->batching < kyber_batch_size[khd->cur_domain]) {
- rq = kyber_dispatch_cur_domain(kqd, khd, hctx, &flushed);
+ rq = kyber_dispatch_cur_domain(kqd, khd, hctx);
if (rq)
goto out;
}
@@ -640,7 +765,7 @@ static struct request *kyber_dispatch_request(struct blk_mq_hw_ctx *hctx)
else
khd->cur_domain++;
- rq = kyber_dispatch_cur_domain(kqd, khd, hctx, &flushed);
+ rq = kyber_dispatch_cur_domain(kqd, khd, hctx);
if (rq)
goto out;
}
@@ -657,10 +782,12 @@ static bool kyber_has_work(struct blk_mq_hw_ctx *hctx)
int i;
for (i = 0; i < KYBER_NUM_DOMAINS; i++) {
- if (!list_empty_careful(&khd->rqs[i]))
+ if (!list_empty_careful(&khd->rqs[i]) ||
+ sbitmap_any_bit_set(&khd->kcq_map[i]))
return true;
}
- return sbitmap_any_bit_set(&hctx->ctx_map);
+
+ return false;
}
#define KYBER_LAT_SHOW_STORE(op) \
@@ -831,7 +958,9 @@ static struct elevator_type kyber_sched = {
.init_hctx = kyber_init_hctx,
.exit_hctx = kyber_exit_hctx,
.limit_depth = kyber_limit_depth,
+ .bio_merge = kyber_bio_merge,
.prepare_request = kyber_prepare_request,
+ .insert_requests = kyber_insert_requests,
.finish_request = kyber_finish_request,
.requeue_request = kyber_finish_request,
.completed_request = kyber_completed_request,
diff --git a/block/mq-deadline.c b/block/mq-deadline.c
index 8ec0ba9f5386..099a9e05854c 100644
--- a/block/mq-deadline.c
+++ b/block/mq-deadline.c
@@ -630,8 +630,7 @@ STORE_FUNCTION(deadline_fifo_batch_store, &dd->fifo_batch, 0, INT_MAX, 0);
#undef STORE_FUNCTION
#define DD_ATTR(name) \
- __ATTR(name, S_IRUGO|S_IWUSR, deadline_##name##_show, \
- deadline_##name##_store)
+ __ATTR(name, 0644, deadline_##name##_show, deadline_##name##_store)
static struct elv_fs_entry deadline_attrs[] = {
DD_ATTR(read_expire),
diff --git a/block/partition-generic.c b/block/partition-generic.c
index 08dabcd8b6ae..3dcfd4ec0e11 100644
--- a/block/partition-generic.c
+++ b/block/partition-generic.c
@@ -145,13 +145,15 @@ ssize_t part_stat_show(struct device *dev,
jiffies_to_msecs(part_stat_read(p, time_in_queue)));
}
-ssize_t part_inflight_show(struct device *dev,
- struct device_attribute *attr, char *buf)
+ssize_t part_inflight_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
{
struct hd_struct *p = dev_to_part(dev);
+ struct request_queue *q = part_to_disk(p)->queue;
+ unsigned int inflight[2];
- return sprintf(buf, "%8u %8u\n", atomic_read(&p->in_flight[0]),
- atomic_read(&p->in_flight[1]));
+ part_in_flight_rw(q, p, inflight);
+ return sprintf(buf, "%8u %8u\n", inflight[0], inflight[1]);
}
#ifdef CONFIG_FAIL_MAKE_REQUEST
@@ -177,18 +179,17 @@ ssize_t part_fail_store(struct device *dev,
}
#endif
-static DEVICE_ATTR(partition, S_IRUGO, part_partition_show, NULL);
-static DEVICE_ATTR(start, S_IRUGO, part_start_show, NULL);
-static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL);
-static DEVICE_ATTR(ro, S_IRUGO, part_ro_show, NULL);
-static DEVICE_ATTR(alignment_offset, S_IRUGO, part_alignment_offset_show, NULL);
-static DEVICE_ATTR(discard_alignment, S_IRUGO, part_discard_alignment_show,
- NULL);
-static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL);
-static DEVICE_ATTR(inflight, S_IRUGO, part_inflight_show, NULL);
+static DEVICE_ATTR(partition, 0444, part_partition_show, NULL);
+static DEVICE_ATTR(start, 0444, part_start_show, NULL);
+static DEVICE_ATTR(size, 0444, part_size_show, NULL);
+static DEVICE_ATTR(ro, 0444, part_ro_show, NULL);
+static DEVICE_ATTR(alignment_offset, 0444, part_alignment_offset_show, NULL);
+static DEVICE_ATTR(discard_alignment, 0444, part_discard_alignment_show, NULL);
+static DEVICE_ATTR(stat, 0444, part_stat_show, NULL);
+static DEVICE_ATTR(inflight, 0444, part_inflight_show, NULL);
#ifdef CONFIG_FAIL_MAKE_REQUEST
static struct device_attribute dev_attr_fail =
- __ATTR(make-it-fail, S_IRUGO|S_IWUSR, part_fail_show, part_fail_store);
+ __ATTR(make-it-fail, 0644, part_fail_show, part_fail_store);
#endif
static struct attribute *part_attrs[] = {
@@ -289,8 +290,7 @@ static ssize_t whole_disk_show(struct device *dev,
{
return 0;
}
-static DEVICE_ATTR(whole_disk, S_IRUSR | S_IRGRP | S_IROTH,
- whole_disk_show, NULL);
+static DEVICE_ATTR(whole_disk, 0444, whole_disk_show, NULL);
/*
* Must be called either with bd_mutex held, before a disk can be opened or
@@ -516,7 +516,7 @@ rescan:
if (disk->fops->revalidate_disk)
disk->fops->revalidate_disk(disk);
- check_disk_size_change(disk, bdev);
+ check_disk_size_change(disk, bdev, true);
bdev->bd_invalidated = 0;
if (!get_capacity(disk) || !(state = check_partition(disk, bdev)))
return 0;
@@ -641,7 +641,7 @@ int invalidate_partitions(struct gendisk *disk, struct block_device *bdev)
return res;
set_capacity(disk, 0);
- check_disk_size_change(disk, bdev);
+ check_disk_size_change(disk, bdev, false);
bdev->bd_invalidated = 0;
/* tell userspace that the media / partition table may have changed */
kobject_uevent(&disk_to_dev(disk)->kobj, KOBJ_CHANGE);
diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c
index 60b471f8621b..533f4aee8567 100644
--- a/block/scsi_ioctl.c
+++ b/block/scsi_ioctl.c
@@ -321,8 +321,7 @@ static int sg_io(struct request_queue *q, struct gendisk *bd_disk,
at_head = 1;
ret = -ENOMEM;
- rq = blk_get_request(q, writing ? REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN,
- GFP_KERNEL);
+ rq = blk_get_request(q, writing ? REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN, 0);
if (IS_ERR(rq))
return PTR_ERR(rq);
req = scsi_req(rq);
@@ -449,8 +448,7 @@ int sg_scsi_ioctl(struct request_queue *q, struct gendisk *disk, fmode_t mode,
}
- rq = blk_get_request(q, in_len ? REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN,
- __GFP_RECLAIM);
+ rq = blk_get_request(q, in_len ? REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN, 0);
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
goto error_free_buffer;
@@ -501,7 +499,7 @@ int sg_scsi_ioctl(struct request_queue *q, struct gendisk *disk, fmode_t mode,
break;
}
- if (bytes && blk_rq_map_kern(q, rq, buffer, bytes, __GFP_RECLAIM)) {
+ if (bytes && blk_rq_map_kern(q, rq, buffer, bytes, GFP_NOIO)) {
err = DRIVER_ERROR << 24;
goto error;
}
@@ -538,7 +536,7 @@ static int __blk_send_generic(struct request_queue *q, struct gendisk *bd_disk,
struct request *rq;
int err;
- rq = blk_get_request(q, REQ_OP_SCSI_OUT, __GFP_RECLAIM);
+ rq = blk_get_request(q, REQ_OP_SCSI_OUT, 0);
if (IS_ERR(rq))
return PTR_ERR(rq);
rq->timeout = BLK_DEFAULT_SG_TIMEOUT;
diff --git a/crypto/af_alg.c b/crypto/af_alg.c
index 7846c0c20cfe..89ed613c017e 100644
--- a/crypto/af_alg.c
+++ b/crypto/af_alg.c
@@ -347,7 +347,6 @@ static const struct proto_ops alg_proto_ops = {
.sendpage = sock_no_sendpage,
.sendmsg = sock_no_sendmsg,
.recvmsg = sock_no_recvmsg,
- .poll = sock_no_poll,
.bind = alg_bind,
.release = af_alg_release,
@@ -1061,19 +1060,12 @@ void af_alg_async_cb(struct crypto_async_request *_req, int err)
}
EXPORT_SYMBOL_GPL(af_alg_async_cb);
-/**
- * af_alg_poll - poll system call handler
- */
-__poll_t af_alg_poll(struct file *file, struct socket *sock,
- poll_table *wait)
+__poll_t af_alg_poll_mask(struct socket *sock, __poll_t events)
{
struct sock *sk = sock->sk;
struct alg_sock *ask = alg_sk(sk);
struct af_alg_ctx *ctx = ask->private;
- __poll_t mask;
-
- sock_poll_wait(file, sk_sleep(sk), wait);
- mask = 0;
+ __poll_t mask = 0;
if (!ctx->more || ctx->used)
mask |= EPOLLIN | EPOLLRDNORM;
@@ -1083,7 +1075,7 @@ __poll_t af_alg_poll(struct file *file, struct socket *sock,
return mask;
}
-EXPORT_SYMBOL_GPL(af_alg_poll);
+EXPORT_SYMBOL_GPL(af_alg_poll_mask);
/**
* af_alg_alloc_areq - allocate struct af_alg_async_req
diff --git a/crypto/algif_aead.c b/crypto/algif_aead.c
index 4b07edd5a9ff..330cf9f2b767 100644
--- a/crypto/algif_aead.c
+++ b/crypto/algif_aead.c
@@ -375,7 +375,7 @@ static struct proto_ops algif_aead_ops = {
.sendmsg = aead_sendmsg,
.sendpage = af_alg_sendpage,
.recvmsg = aead_recvmsg,
- .poll = af_alg_poll,
+ .poll_mask = af_alg_poll_mask,
};
static int aead_check_key(struct socket *sock)
@@ -471,7 +471,7 @@ static struct proto_ops algif_aead_ops_nokey = {
.sendmsg = aead_sendmsg_nokey,
.sendpage = aead_sendpage_nokey,
.recvmsg = aead_recvmsg_nokey,
- .poll = af_alg_poll,
+ .poll_mask = af_alg_poll_mask,
};
static void *aead_bind(const char *name, u32 type, u32 mask)
diff --git a/crypto/algif_hash.c b/crypto/algif_hash.c
index 6c9b1927a520..bfcf595fd8f9 100644
--- a/crypto/algif_hash.c
+++ b/crypto/algif_hash.c
@@ -288,7 +288,6 @@ static struct proto_ops algif_hash_ops = {
.mmap = sock_no_mmap,
.bind = sock_no_bind,
.setsockopt = sock_no_setsockopt,
- .poll = sock_no_poll,
.release = af_alg_release,
.sendmsg = hash_sendmsg,
@@ -396,7 +395,6 @@ static struct proto_ops algif_hash_ops_nokey = {
.mmap = sock_no_mmap,
.bind = sock_no_bind,
.setsockopt = sock_no_setsockopt,
- .poll = sock_no_poll,
.release = af_alg_release,
.sendmsg = hash_sendmsg_nokey,
diff --git a/crypto/algif_rng.c b/crypto/algif_rng.c
index 150c2b6480ed..22df3799a17b 100644
--- a/crypto/algif_rng.c
+++ b/crypto/algif_rng.c
@@ -106,7 +106,6 @@ static struct proto_ops algif_rng_ops = {
.bind = sock_no_bind,
.accept = sock_no_accept,
.setsockopt = sock_no_setsockopt,
- .poll = sock_no_poll,
.sendmsg = sock_no_sendmsg,
.sendpage = sock_no_sendpage,
diff --git a/crypto/algif_skcipher.c b/crypto/algif_skcipher.c
index c4e885df4564..15cf3c5222e0 100644
--- a/crypto/algif_skcipher.c
+++ b/crypto/algif_skcipher.c
@@ -205,7 +205,7 @@ static struct proto_ops algif_skcipher_ops = {
.sendmsg = skcipher_sendmsg,
.sendpage = af_alg_sendpage,
.recvmsg = skcipher_recvmsg,
- .poll = af_alg_poll,
+ .poll_mask = af_alg_poll_mask,
};
static int skcipher_check_key(struct socket *sock)
@@ -301,7 +301,7 @@ static struct proto_ops algif_skcipher_ops_nokey = {
.sendmsg = skcipher_sendmsg_nokey,
.sendpage = skcipher_sendpage_nokey,
.recvmsg = skcipher_recvmsg_nokey,
- .poll = af_alg_poll,
+ .poll_mask = af_alg_poll_mask,
};
static void *skcipher_bind(const char *name, u32 type, u32 mask)
diff --git a/crypto/proc.c b/crypto/proc.c
index 822fcef6d91c..f4eb6139973e 100644
--- a/crypto/proc.c
+++ b/crypto/proc.c
@@ -94,21 +94,9 @@ static const struct seq_operations crypto_seq_ops = {
.show = c_show
};
-static int crypto_info_open(struct inode *inode, struct file *file)
-{
- return seq_open(file, &crypto_seq_ops);
-}
-
-static const struct file_operations proc_crypto_ops = {
- .open = crypto_info_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release
-};
-
void __init crypto_init_proc(void)
{
- proc_create("crypto", 0, NULL, &proc_crypto_ops);
+ proc_create_seq("crypto", 0, NULL, &crypto_seq_ops);
}
void __exit crypto_exit_proc(void)
diff --git a/drivers/acpi/ac.c b/drivers/acpi/ac.c
index 2d8de2f8c1ed..84fdfa70920a 100644
--- a/drivers/acpi/ac.c
+++ b/drivers/acpi/ac.c
@@ -82,7 +82,6 @@ static SIMPLE_DEV_PM_OPS(acpi_ac_pm, NULL, acpi_ac_resume);
#ifdef CONFIG_ACPI_PROCFS_POWER
extern struct proc_dir_entry *acpi_lock_ac_dir(void);
extern void *acpi_unlock_ac_dir(struct proc_dir_entry *acpi_ac_dir);
-static int acpi_ac_open_fs(struct inode *inode, struct file *file);
#endif
@@ -111,16 +110,6 @@ struct acpi_ac {
#define to_acpi_ac(x) power_supply_get_drvdata(x)
-#ifdef CONFIG_ACPI_PROCFS_POWER
-static const struct file_operations acpi_ac_fops = {
- .owner = THIS_MODULE,
- .open = acpi_ac_open_fs,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-#endif
-
/* --------------------------------------------------------------------------
AC Adapter Management
-------------------------------------------------------------------------- */
@@ -209,11 +198,6 @@ static int acpi_ac_seq_show(struct seq_file *seq, void *offset)
return 0;
}
-static int acpi_ac_open_fs(struct inode *inode, struct file *file)
-{
- return single_open(file, acpi_ac_seq_show, PDE_DATA(inode));
-}
-
static int acpi_ac_add_fs(struct acpi_ac *ac)
{
struct proc_dir_entry *entry = NULL;
@@ -228,9 +212,8 @@ static int acpi_ac_add_fs(struct acpi_ac *ac)
}
/* 'state' [R] */
- entry = proc_create_data(ACPI_AC_FILE_STATE,
- S_IRUGO, acpi_device_dir(ac->device),
- &acpi_ac_fops, ac);
+ entry = proc_create_single_data(ACPI_AC_FILE_STATE, S_IRUGO,
+ acpi_device_dir(ac->device), acpi_ac_seq_show, ac);
if (!entry)
return -ENODEV;
return 0;
diff --git a/drivers/acpi/acpica/acnamesp.h b/drivers/acpi/acpica/acnamesp.h
index 514aaf948ea9..3825df923480 100644
--- a/drivers/acpi/acpica/acnamesp.h
+++ b/drivers/acpi/acpica/acnamesp.h
@@ -56,6 +56,10 @@ acpi_status acpi_ns_initialize_objects(void);
acpi_status acpi_ns_initialize_devices(u32 flags);
+acpi_status
+acpi_ns_init_one_package(acpi_handle obj_handle,
+ u32 level, void *context, void **return_value);
+
/*
* nsload - Namespace loading
*/
diff --git a/drivers/acpi/acpica/exconfig.c b/drivers/acpi/acpica/exconfig.c
index 99d92cb32803..f85c6f3271f6 100644
--- a/drivers/acpi/acpica/exconfig.c
+++ b/drivers/acpi/acpica/exconfig.c
@@ -174,6 +174,13 @@ acpi_ex_load_table_op(struct acpi_walk_state *walk_state,
return_ACPI_STATUS(status);
}
+ /* Complete the initialization/resolution of package objects */
+
+ status = acpi_ns_walk_namespace(ACPI_TYPE_PACKAGE, ACPI_ROOT_OBJECT,
+ ACPI_UINT32_MAX, 0,
+ acpi_ns_init_one_package, NULL, NULL,
+ NULL);
+
/* Parameter Data (optional) */
if (parameter_node) {
@@ -430,6 +437,13 @@ acpi_ex_load_op(union acpi_operand_object *obj_desc,
return_ACPI_STATUS(status);
}
+ /* Complete the initialization/resolution of package objects */
+
+ status = acpi_ns_walk_namespace(ACPI_TYPE_PACKAGE, ACPI_ROOT_OBJECT,
+ ACPI_UINT32_MAX, 0,
+ acpi_ns_init_one_package, NULL, NULL,
+ NULL);
+
/* Store the ddb_handle into the Target operand */
status = acpi_ex_store(ddb_handle, target, walk_state);
diff --git a/drivers/acpi/acpica/nsinit.c b/drivers/acpi/acpica/nsinit.c
index 77f2b5f4948a..d77257d1c827 100644
--- a/drivers/acpi/acpica/nsinit.c
+++ b/drivers/acpi/acpica/nsinit.c
@@ -242,6 +242,58 @@ error_exit:
/*******************************************************************************
*
+ * FUNCTION: acpi_ns_init_one_package
+ *
+ * PARAMETERS: obj_handle - Node
+ * level - Current nesting level
+ * context - Not used
+ * return_value - Not used
+ *
+ * RETURN: Status
+ *
+ * DESCRIPTION: Callback from acpi_walk_namespace. Invoked for every package
+ * within the namespace. Used during dynamic load of an SSDT.
+ *
+ ******************************************************************************/
+
+acpi_status
+acpi_ns_init_one_package(acpi_handle obj_handle,
+ u32 level, void *context, void **return_value)
+{
+ acpi_status status;
+ union acpi_operand_object *obj_desc;
+ struct acpi_namespace_node *node =
+ (struct acpi_namespace_node *)obj_handle;
+
+ obj_desc = acpi_ns_get_attached_object(node);
+ if (!obj_desc) {
+ return (AE_OK);
+ }
+
+ /* Exit if package is already initialized */
+
+ if (obj_desc->package.flags & AOPOBJ_DATA_VALID) {
+ return (AE_OK);
+ }
+
+ status = acpi_ds_get_package_arguments(obj_desc);
+ if (ACPI_FAILURE(status)) {
+ return (AE_OK);
+ }
+
+ status =
+ acpi_ut_walk_package_tree(obj_desc, NULL,
+ acpi_ds_init_package_element, NULL);
+ if (ACPI_FAILURE(status)) {
+ return (AE_OK);
+ }
+
+ obj_desc->package.flags |= AOPOBJ_DATA_VALID;
+ return (AE_OK);
+}
+
+/*******************************************************************************
+ *
* FUNCTION: acpi_ns_init_one_object
*
* PARAMETERS: obj_handle - Node
@@ -360,27 +412,11 @@ acpi_ns_init_one_object(acpi_handle obj_handle,
case ACPI_TYPE_PACKAGE:
- info->package_init++;
- status = acpi_ds_get_package_arguments(obj_desc);
- if (ACPI_FAILURE(status)) {
- break;
- }
-
- ACPI_DEBUG_PRINT_RAW((ACPI_DB_PARSE,
- "%s: Completing resolution of Package elements\n",
- ACPI_GET_FUNCTION_NAME));
+ /* Complete the initialization/resolution of the package object */
- /*
- * Resolve all named references in package objects (and all
- * sub-packages). This action has been deferred until the entire
- * namespace has been loaded, in order to support external and
- * forward references from individual package elements (05/2017).
- */
- status = acpi_ut_walk_package_tree(obj_desc, NULL,
- acpi_ds_init_package_element,
- NULL);
-
- obj_desc->package.flags |= AOPOBJ_DATA_VALID;
+ info->package_init++;
+ status =
+ acpi_ns_init_one_package(obj_handle, level, NULL, NULL);
break;
default:
diff --git a/drivers/acpi/battery.c b/drivers/acpi/battery.c
index bdb24d636d9a..76550689ce10 100644
--- a/drivers/acpi/battery.c
+++ b/drivers/acpi/battery.c
@@ -81,14 +81,6 @@ MODULE_PARM_DESC(cache_time, "cache time in milliseconds");
#ifdef CONFIG_ACPI_PROCFS_POWER
extern struct proc_dir_entry *acpi_lock_battery_dir(void);
extern void *acpi_unlock_battery_dir(struct proc_dir_entry *acpi_battery_dir);
-
-enum acpi_battery_files {
- info_tag = 0,
- state_tag,
- alarm_tag,
- ACPI_BATTERY_NUMFILES,
-};
-
#endif
static const struct acpi_device_id battery_device_ids[] = {
@@ -985,9 +977,10 @@ static const char *acpi_battery_units(const struct acpi_battery *battery)
"mA" : "mW";
}
-static int acpi_battery_print_info(struct seq_file *seq, int result)
+static int acpi_battery_info_proc_show(struct seq_file *seq, void *offset)
{
struct acpi_battery *battery = seq->private;
+ int result = acpi_battery_update(battery, false);
if (result)
goto end;
@@ -1041,9 +1034,10 @@ static int acpi_battery_print_info(struct seq_file *seq, int result)
return result;
}
-static int acpi_battery_print_state(struct seq_file *seq, int result)
+static int acpi_battery_state_proc_show(struct seq_file *seq, void *offset)
{
struct acpi_battery *battery = seq->private;
+ int result = acpi_battery_update(battery, false);
if (result)
goto end;
@@ -1088,9 +1082,10 @@ static int acpi_battery_print_state(struct seq_file *seq, int result)
return result;
}
-static int acpi_battery_print_alarm(struct seq_file *seq, int result)
+static int acpi_battery_alarm_proc_show(struct seq_file *seq, void *offset)
{
struct acpi_battery *battery = seq->private;
+ int result = acpi_battery_update(battery, false);
if (result)
goto end;
@@ -1142,82 +1137,22 @@ static ssize_t acpi_battery_write_alarm(struct file *file,
return result;
}
-typedef int(*print_func)(struct seq_file *seq, int result);
-
-static print_func acpi_print_funcs[ACPI_BATTERY_NUMFILES] = {
- acpi_battery_print_info,
- acpi_battery_print_state,
- acpi_battery_print_alarm,
-};
-
-static int acpi_battery_read(int fid, struct seq_file *seq)
+static int acpi_battery_alarm_proc_open(struct inode *inode, struct file *file)
{
- struct acpi_battery *battery = seq->private;
- int result = acpi_battery_update(battery, false);
- return acpi_print_funcs[fid](seq, result);
+ return single_open(file, acpi_battery_alarm_proc_show, PDE_DATA(inode));
}
-#define DECLARE_FILE_FUNCTIONS(_name) \
-static int acpi_battery_read_##_name(struct seq_file *seq, void *offset) \
-{ \
- return acpi_battery_read(_name##_tag, seq); \
-} \
-static int acpi_battery_##_name##_open_fs(struct inode *inode, struct file *file) \
-{ \
- return single_open(file, acpi_battery_read_##_name, PDE_DATA(inode)); \
-}
-
-DECLARE_FILE_FUNCTIONS(info);
-DECLARE_FILE_FUNCTIONS(state);
-DECLARE_FILE_FUNCTIONS(alarm);
-
-#undef DECLARE_FILE_FUNCTIONS
-
-#define FILE_DESCRIPTION_RO(_name) \
- { \
- .name = __stringify(_name), \
- .mode = S_IRUGO, \
- .ops = { \
- .open = acpi_battery_##_name##_open_fs, \
- .read = seq_read, \
- .llseek = seq_lseek, \
- .release = single_release, \
- .owner = THIS_MODULE, \
- }, \
- }
-
-#define FILE_DESCRIPTION_RW(_name) \
- { \
- .name = __stringify(_name), \
- .mode = S_IFREG | S_IRUGO | S_IWUSR, \
- .ops = { \
- .open = acpi_battery_##_name##_open_fs, \
- .read = seq_read, \
- .llseek = seq_lseek, \
- .write = acpi_battery_write_##_name, \
- .release = single_release, \
- .owner = THIS_MODULE, \
- }, \
- }
-
-static const struct battery_file {
- struct file_operations ops;
- umode_t mode;
- const char *name;
-} acpi_battery_file[] = {
- FILE_DESCRIPTION_RO(info),
- FILE_DESCRIPTION_RO(state),
- FILE_DESCRIPTION_RW(alarm),
+static const struct file_operations acpi_battery_alarm_fops = {
+ .owner = THIS_MODULE,
+ .open = acpi_battery_alarm_proc_open,
+ .read = seq_read,
+ .write = acpi_battery_write_alarm,
+ .llseek = seq_lseek,
+ .release = single_release,
};
-#undef FILE_DESCRIPTION_RO
-#undef FILE_DESCRIPTION_RW
-
static int acpi_battery_add_fs(struct acpi_device *device)
{
- struct proc_dir_entry *entry = NULL;
- int i;
-
printk(KERN_WARNING PREFIX "Deprecated procfs I/F for battery is loaded,"
" please retry with CONFIG_ACPI_PROCFS_POWER cleared\n");
if (!acpi_device_dir(device)) {
@@ -1227,28 +1162,24 @@ static int acpi_battery_add_fs(struct acpi_device *device)
return -ENODEV;
}
- for (i = 0; i < ACPI_BATTERY_NUMFILES; ++i) {
- entry = proc_create_data(acpi_battery_file[i].name,
- acpi_battery_file[i].mode,
- acpi_device_dir(device),
- &acpi_battery_file[i].ops,
- acpi_driver_data(device));
- if (!entry)
- return -ENODEV;
- }
+ if (!proc_create_single_data("info", S_IRUGO, acpi_device_dir(device),
+ acpi_battery_info_proc_show, acpi_driver_data(device)))
+ return -ENODEV;
+ if (!proc_create_single_data("state", S_IRUGO, acpi_device_dir(device),
+ acpi_battery_state_proc_show, acpi_driver_data(device)))
+ return -ENODEV;
+ if (!proc_create_data("alarm", S_IFREG | S_IRUGO | S_IWUSR,
+ acpi_device_dir(device), &acpi_battery_alarm_fops,
+ acpi_driver_data(device)))
+ return -ENODEV;
return 0;
}
static void acpi_battery_remove_fs(struct acpi_device *device)
{
- int i;
if (!acpi_device_dir(device))
return;
- for (i = 0; i < ACPI_BATTERY_NUMFILES; ++i)
- remove_proc_entry(acpi_battery_file[i].name,
- acpi_device_dir(device));
-
- remove_proc_entry(acpi_device_bid(device), acpi_battery_dir);
+ remove_proc_subtree(acpi_device_bid(device), acpi_battery_dir);
acpi_device_dir(device) = NULL;
}
diff --git a/drivers/acpi/button.c b/drivers/acpi/button.c
index f1cc4f9d31cd..2345a5ee2dbb 100644
--- a/drivers/acpi/button.c
+++ b/drivers/acpi/button.c
@@ -263,19 +263,6 @@ static int acpi_button_state_seq_show(struct seq_file *seq, void *offset)
return 0;
}
-static int acpi_button_state_open_fs(struct inode *inode, struct file *file)
-{
- return single_open(file, acpi_button_state_seq_show, PDE_DATA(inode));
-}
-
-static const struct file_operations acpi_button_state_fops = {
- .owner = THIS_MODULE,
- .open = acpi_button_state_open_fs,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
static int acpi_button_add_fs(struct acpi_device *device)
{
struct acpi_button *button = acpi_driver_data(device);
@@ -311,9 +298,9 @@ static int acpi_button_add_fs(struct acpi_device *device)
}
/* create /proc/acpi/button/lid/LID/state */
- entry = proc_create_data(ACPI_BUTTON_FILE_STATE,
- S_IRUGO, acpi_device_dir(device),
- &acpi_button_state_fops, device);
+ entry = proc_create_single_data(ACPI_BUTTON_FILE_STATE, S_IRUGO,
+ acpi_device_dir(device), acpi_button_state_seq_show,
+ device);
if (!entry) {
ret = -ENODEV;
goto remove_dev_dir;
diff --git a/drivers/amba/bus.c b/drivers/amba/bus.c
index 4a3ac31c07d0..3b0118786b43 100644
--- a/drivers/amba/bus.c
+++ b/drivers/amba/bus.c
@@ -20,6 +20,7 @@
#include <linux/sizes.h>
#include <linux/limits.h>
#include <linux/clk/clk-conf.h>
+#include <linux/platform_device.h>
#include <asm/irq.h>
@@ -193,14 +194,16 @@ static const struct dev_pm_ops amba_pm = {
/*
* Primecells are part of the Advanced Microcontroller Bus Architecture,
* so we call the bus "amba".
+ * DMA configuration for platform and AMBA bus is same. So here we reuse
+ * platform's DMA config routine.
*/
struct bus_type amba_bustype = {
.name = "amba",
.dev_groups = amba_dev_groups,
.match = amba_match,
.uevent = amba_uevent,
+ .dma_configure = platform_dma_configure,
.pm = &amba_pm,
- .force_dma = true,
};
static int __init amba_init(void)
diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c
index 1ff17799769d..738fb22978dd 100644
--- a/drivers/ata/ahci.c
+++ b/drivers/ata/ahci.c
@@ -334,6 +334,7 @@ static const struct pci_device_id ahci_pci_tbl[] = {
{ PCI_VDEVICE(INTEL, 0x9c07), board_ahci_mobile }, /* Lynx LP RAID */
{ PCI_VDEVICE(INTEL, 0x9c0e), board_ahci_mobile }, /* Lynx LP RAID */
{ PCI_VDEVICE(INTEL, 0x9c0f), board_ahci_mobile }, /* Lynx LP RAID */
+ { PCI_VDEVICE(INTEL, 0x9dd3), board_ahci_mobile }, /* Cannon Lake PCH-LP AHCI */
{ PCI_VDEVICE(INTEL, 0x1f22), board_ahci }, /* Avoton AHCI */
{ PCI_VDEVICE(INTEL, 0x1f23), board_ahci }, /* Avoton AHCI */
{ PCI_VDEVICE(INTEL, 0x1f24), board_ahci }, /* Avoton RAID */
@@ -698,7 +699,7 @@ static int ahci_vt8251_hardreset(struct ata_link *link, unsigned int *class,
DPRINTK("ENTER\n");
- ahci_stop_engine(ap);
+ hpriv->stop_engine(ap);
rc = sata_link_hardreset(link, sata_ehc_deb_timing(&link->eh_context),
deadline, &online, NULL);
@@ -724,7 +725,7 @@ static int ahci_p5wdh_hardreset(struct ata_link *link, unsigned int *class,
bool online;
int rc;
- ahci_stop_engine(ap);
+ hpriv->stop_engine(ap);
/* clear D2H reception area to properly wait for D2H FIS */
ata_tf_init(link->device, &tf);
@@ -788,7 +789,7 @@ static int ahci_avn_hardreset(struct ata_link *link, unsigned int *class,
DPRINTK("ENTER\n");
- ahci_stop_engine(ap);
+ hpriv->stop_engine(ap);
for (i = 0; i < 2; i++) {
u16 val;
diff --git a/drivers/ata/ahci.h b/drivers/ata/ahci.h
index 4356ef1d28a8..824bd399f02e 100644
--- a/drivers/ata/ahci.h
+++ b/drivers/ata/ahci.h
@@ -350,7 +350,6 @@ struct ahci_host_priv {
u32 em_msg_type; /* EM message type */
bool got_runtime_pm; /* Did we do pm_runtime_get? */
struct clk *clks[AHCI_MAX_CLKS]; /* Optional */
- struct reset_control *rsts; /* Optional */
struct regulator **target_pwrs; /* Optional */
/*
* If platform uses PHYs. There is a 1:1 relation between the port number and
@@ -366,6 +365,13 @@ struct ahci_host_priv {
* be overridden anytime before the host is activated.
*/
void (*start_engine)(struct ata_port *ap);
+ /*
+ * Optional ahci_stop_engine override, if not set this gets set to the
+ * default ahci_stop_engine during ahci_save_initial_config, this can
+ * be overridden anytime before the host is activated.
+ */
+ int (*stop_engine)(struct ata_port *ap);
+
irqreturn_t (*irq_handler)(int irq, void *dev_instance);
/* only required for per-port MSI(-X) support */
diff --git a/drivers/ata/ahci_mvebu.c b/drivers/ata/ahci_mvebu.c
index de7128d81e9c..0045dacd814b 100644
--- a/drivers/ata/ahci_mvebu.c
+++ b/drivers/ata/ahci_mvebu.c
@@ -62,6 +62,60 @@ static void ahci_mvebu_regret_option(struct ahci_host_priv *hpriv)
writel(0x80, hpriv->mmio + AHCI_VENDOR_SPECIFIC_0_DATA);
}
+/**
+ * ahci_mvebu_stop_engine
+ *
+ * @ap: Target ata port
+ *
+ * Errata Ref#226 - SATA Disk HOT swap issue when connected through
+ * Port Multiplier in FIS-based Switching mode.
+ *
+ * To avoid the issue, according to design, the bits[11:8, 0] of
+ * register PxFBS are cleared when Port Command and Status (0x18) bit[0]
+ * changes its value from 1 to 0, i.e. falling edge of Port
+ * Command and Status bit[0] sends PULSE that resets PxFBS
+ * bits[11:8; 0].
+ *
+ * This function is used to override function of "ahci_stop_engine"
+ * from libahci.c by adding the mvebu work around(WA) to save PxFBS
+ * value before the PxCMD ST write of 0, then restore PxFBS value.
+ *
+ * Return: 0 on success; Error code otherwise.
+ */
+int ahci_mvebu_stop_engine(struct ata_port *ap)
+{
+ void __iomem *port_mmio = ahci_port_base(ap);
+ u32 tmp, port_fbs;
+
+ tmp = readl(port_mmio + PORT_CMD);
+
+ /* check if the HBA is idle */
+ if ((tmp & (PORT_CMD_START | PORT_CMD_LIST_ON)) == 0)
+ return 0;
+
+ /* save the port PxFBS register for later restore */
+ port_fbs = readl(port_mmio + PORT_FBS);
+
+ /* setting HBA to idle */
+ tmp &= ~PORT_CMD_START;
+ writel(tmp, port_mmio + PORT_CMD);
+
+ /*
+ * bit #15 PxCMD signal doesn't clear PxFBS,
+ * restore the PxFBS register right after clearing the PxCMD ST,
+ * no need to wait for the PxCMD bit #15.
+ */
+ writel(port_fbs, port_mmio + PORT_FBS);
+
+ /* wait for engine to stop. This could be as long as 500 msec */
+ tmp = ata_wait_register(ap, port_mmio + PORT_CMD,
+ PORT_CMD_LIST_ON, PORT_CMD_LIST_ON, 1, 500);
+ if (tmp & PORT_CMD_LIST_ON)
+ return -EIO;
+
+ return 0;
+}
+
#ifdef CONFIG_PM_SLEEP
static int ahci_mvebu_suspend(struct platform_device *pdev, pm_message_t state)
{
@@ -112,6 +166,8 @@ static int ahci_mvebu_probe(struct platform_device *pdev)
if (rc)
return rc;
+ hpriv->stop_engine = ahci_mvebu_stop_engine;
+
if (of_device_is_compatible(pdev->dev.of_node,
"marvell,armada-380-ahci")) {
dram = mv_mbus_dram_info();
diff --git a/drivers/ata/ahci_qoriq.c b/drivers/ata/ahci_qoriq.c
index 2685f28160f7..cfdef4d44ae9 100644
--- a/drivers/ata/ahci_qoriq.c
+++ b/drivers/ata/ahci_qoriq.c
@@ -96,7 +96,7 @@ static int ahci_qoriq_hardreset(struct ata_link *link, unsigned int *class,
DPRINTK("ENTER\n");
- ahci_stop_engine(ap);
+ hpriv->stop_engine(ap);
/*
* There is a errata on ls1021a Rev1.0 and Rev2.0 which is:
diff --git a/drivers/ata/ahci_xgene.c b/drivers/ata/ahci_xgene.c
index c2b5941d9184..ad58da7c9aff 100644
--- a/drivers/ata/ahci_xgene.c
+++ b/drivers/ata/ahci_xgene.c
@@ -165,7 +165,7 @@ static int xgene_ahci_restart_engine(struct ata_port *ap)
PORT_CMD_ISSUE, 0x0, 1, 100))
return -EBUSY;
- ahci_stop_engine(ap);
+ hpriv->stop_engine(ap);
ahci_start_fis_rx(ap);
/*
@@ -421,7 +421,7 @@ static int xgene_ahci_hardreset(struct ata_link *link, unsigned int *class,
portrxfis_saved = readl(port_mmio + PORT_FIS_ADDR);
portrxfishi_saved = readl(port_mmio + PORT_FIS_ADDR_HI);
- ahci_stop_engine(ap);
+ hpriv->stop_engine(ap);
rc = xgene_ahci_do_hardreset(link, deadline, &online);
diff --git a/drivers/ata/libahci.c b/drivers/ata/libahci.c
index 7adcf3caabd0..e5d90977caec 100644
--- a/drivers/ata/libahci.c
+++ b/drivers/ata/libahci.c
@@ -560,6 +560,9 @@ void ahci_save_initial_config(struct device *dev, struct ahci_host_priv *hpriv)
if (!hpriv->start_engine)
hpriv->start_engine = ahci_start_engine;
+ if (!hpriv->stop_engine)
+ hpriv->stop_engine = ahci_stop_engine;
+
if (!hpriv->irq_handler)
hpriv->irq_handler = ahci_single_level_irq_intr;
}
@@ -897,9 +900,10 @@ static void ahci_start_port(struct ata_port *ap)
static int ahci_deinit_port(struct ata_port *ap, const char **emsg)
{
int rc;
+ struct ahci_host_priv *hpriv = ap->host->private_data;
/* disable DMA */
- rc = ahci_stop_engine(ap);
+ rc = hpriv->stop_engine(ap);
if (rc) {
*emsg = "failed to stop engine";
return rc;
@@ -1310,7 +1314,7 @@ int ahci_kick_engine(struct ata_port *ap)
int busy, rc;
/* stop engine */
- rc = ahci_stop_engine(ap);
+ rc = hpriv->stop_engine(ap);
if (rc)
goto out_restart;
@@ -1549,7 +1553,7 @@ int ahci_do_hardreset(struct ata_link *link, unsigned int *class,
DPRINTK("ENTER\n");
- ahci_stop_engine(ap);
+ hpriv->stop_engine(ap);
/* clear D2H reception area to properly wait for D2H FIS */
ata_tf_init(link->device, &tf);
@@ -2075,14 +2079,14 @@ void ahci_error_handler(struct ata_port *ap)
if (!(ap->pflags & ATA_PFLAG_FROZEN)) {
/* restart engine */
- ahci_stop_engine(ap);
+ hpriv->stop_engine(ap);
hpriv->start_engine(ap);
}
sata_pmp_error_handler(ap);
if (!ata_dev_enabled(ap->link.device))
- ahci_stop_engine(ap);
+ hpriv->stop_engine(ap);
}
EXPORT_SYMBOL_GPL(ahci_error_handler);
@@ -2129,7 +2133,7 @@ static void ahci_set_aggressive_devslp(struct ata_port *ap, bool sleep)
return;
/* set DITO, MDAT, DETO and enable DevSlp, need to stop engine first */
- rc = ahci_stop_engine(ap);
+ rc = hpriv->stop_engine(ap);
if (rc)
return;
@@ -2189,7 +2193,7 @@ static void ahci_enable_fbs(struct ata_port *ap)
return;
}
- rc = ahci_stop_engine(ap);
+ rc = hpriv->stop_engine(ap);
if (rc)
return;
@@ -2222,7 +2226,7 @@ static void ahci_disable_fbs(struct ata_port *ap)
return;
}
- rc = ahci_stop_engine(ap);
+ rc = hpriv->stop_engine(ap);
if (rc)
return;
diff --git a/drivers/ata/libahci_platform.c b/drivers/ata/libahci_platform.c
index 46a762442dc5..30cc8f1a31e1 100644
--- a/drivers/ata/libahci_platform.c
+++ b/drivers/ata/libahci_platform.c
@@ -25,7 +25,6 @@
#include <linux/phy/phy.h>
#include <linux/pm_runtime.h>
#include <linux/of_platform.h>
-#include <linux/reset.h>
#include "ahci.h"
static void ahci_host_stop(struct ata_host *host);
@@ -196,8 +195,7 @@ EXPORT_SYMBOL_GPL(ahci_platform_disable_regulators);
* following order:
* 1) Regulator
* 2) Clocks (through ahci_platform_enable_clks)
- * 3) Resets
- * 4) Phys
+ * 3) Phys
*
* If resource enabling fails at any point the previous enabled resources
* are disabled in reverse order.
@@ -217,19 +215,12 @@ int ahci_platform_enable_resources(struct ahci_host_priv *hpriv)
if (rc)
goto disable_regulator;
- rc = reset_control_deassert(hpriv->rsts);
- if (rc)
- goto disable_clks;
-
rc = ahci_platform_enable_phys(hpriv);
if (rc)
- goto disable_resets;
+ goto disable_clks;
return 0;
-disable_resets:
- reset_control_assert(hpriv->rsts);
-
disable_clks:
ahci_platform_disable_clks(hpriv);
@@ -248,15 +239,12 @@ EXPORT_SYMBOL_GPL(ahci_platform_enable_resources);
* following order:
* 1) Phys
* 2) Clocks (through ahci_platform_disable_clks)
- * 3) Resets
- * 4) Regulator
+ * 3) Regulator
*/
void ahci_platform_disable_resources(struct ahci_host_priv *hpriv)
{
ahci_platform_disable_phys(hpriv);
- reset_control_assert(hpriv->rsts);
-
ahci_platform_disable_clks(hpriv);
ahci_platform_disable_regulators(hpriv);
@@ -405,12 +393,6 @@ struct ahci_host_priv *ahci_platform_get_resources(struct platform_device *pdev)
hpriv->clks[i] = clk;
}
- hpriv->rsts = devm_reset_control_array_get_optional_shared(dev);
- if (IS_ERR(hpriv->rsts)) {
- rc = PTR_ERR(hpriv->rsts);
- goto err_out;
- }
-
hpriv->nports = child_nodes = of_get_child_count(dev->of_node);
/*
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 8bc71ca61e7f..346b163f6e89 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -4493,6 +4493,10 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = {
/* https://bugzilla.kernel.org/show_bug.cgi?id=15573 */
{ "C300-CTFDDAC128MAG", "0001", ATA_HORKAGE_NONCQ, },
+ /* Some Sandisk SSDs lock up hard with NCQ enabled. Reported on
+ SD7SN6S256G and SD8SN8U256G */
+ { "SanDisk SD[78]SN*G", NULL, ATA_HORKAGE_NONCQ, },
+
/* devices which puke on READ_NATIVE_MAX */
{ "HDS724040KLSA80", "KFAOA20N", ATA_HORKAGE_BROKEN_HPA, },
{ "WDC WD3200JD-00KLB0", "WD-WCAMR1130137", ATA_HORKAGE_BROKEN_HPA },
@@ -4549,7 +4553,16 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = {
ATA_HORKAGE_ZERO_AFTER_TRIM |
ATA_HORKAGE_NOLPM, },
+ /* These specific Samsung models/firmware-revs do not handle LPM well */
+ { "SAMSUNG MZMPC128HBFU-000MV", "CXM14M1Q", ATA_HORKAGE_NOLPM, },
+ { "SAMSUNG SSD PM830 mSATA *", "CXM13D1Q", ATA_HORKAGE_NOLPM, },
+
+ /* Sandisk devices which are known to not handle LPM well */
+ { "SanDisk SD7UB3Q*G1001", NULL, ATA_HORKAGE_NOLPM, },
+
/* devices that don't properly handle queued TRIM commands */
+ { "Micron_M500IT_*", "MU01", ATA_HORKAGE_NO_NCQ_TRIM |
+ ATA_HORKAGE_ZERO_AFTER_TRIM, },
{ "Micron_M500_*", NULL, ATA_HORKAGE_NO_NCQ_TRIM |
ATA_HORKAGE_ZERO_AFTER_TRIM, },
{ "Crucial_CT*M500*", NULL, ATA_HORKAGE_NO_NCQ_TRIM |
diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c
index c016829a38fd..a2398e28c295 100644
--- a/drivers/ata/libata-eh.c
+++ b/drivers/ata/libata-eh.c
@@ -175,8 +175,8 @@ static void ata_eh_handle_port_resume(struct ata_port *ap)
{ }
#endif /* CONFIG_PM */
-static void __ata_ehi_pushv_desc(struct ata_eh_info *ehi, const char *fmt,
- va_list args)
+static __printf(2, 0) void __ata_ehi_pushv_desc(struct ata_eh_info *ehi,
+ const char *fmt, va_list args)
{
ehi->desc_len += vscnprintf(ehi->desc + ehi->desc_len,
ATA_EH_DESC_LEN - ehi->desc_len,
@@ -500,57 +500,6 @@ void ata_eh_release(struct ata_port *ap)
mutex_unlock(&ap->host->eh_mutex);
}
-/**
- * ata_scsi_timed_out - SCSI layer time out callback
- * @cmd: timed out SCSI command
- *
- * Handles SCSI layer timeout. We race with normal completion of
- * the qc for @cmd. If the qc is already gone, we lose and let
- * the scsi command finish (EH_HANDLED). Otherwise, the qc has
- * timed out and EH should be invoked. Prevent ata_qc_complete()
- * from finishing it by setting EH_SCHEDULED and return
- * EH_NOT_HANDLED.
- *
- * TODO: kill this function once old EH is gone.
- *
- * LOCKING:
- * Called from timer context
- *
- * RETURNS:
- * EH_HANDLED or EH_NOT_HANDLED
- */
-enum blk_eh_timer_return ata_scsi_timed_out(struct scsi_cmnd *cmd)
-{
- struct Scsi_Host *host = cmd->device->host;
- struct ata_port *ap = ata_shost_to_port(host);
- unsigned long flags;
- struct ata_queued_cmd *qc;
- enum blk_eh_timer_return ret;
-
- DPRINTK("ENTER\n");
-
- if (ap->ops->error_handler) {
- ret = BLK_EH_NOT_HANDLED;
- goto out;
- }
-
- ret = BLK_EH_HANDLED;
- spin_lock_irqsave(ap->lock, flags);
- qc = ata_qc_from_tag(ap, ap->link.active_tag);
- if (qc) {
- WARN_ON(qc->scsicmd != cmd);
- qc->flags |= ATA_QCFLAG_EH_SCHEDULED;
- qc->err_mask |= AC_ERR_TIMEOUT;
- ret = BLK_EH_NOT_HANDLED;
- }
- spin_unlock_irqrestore(ap->lock, flags);
-
- out:
- DPRINTK("EXIT, ret=%d\n", ret);
- return ret;
-}
-EXPORT_SYMBOL(ata_scsi_timed_out);
-
static void ata_eh_unload(struct ata_port *ap)
{
struct ata_link *link;
diff --git a/drivers/ata/sata_highbank.c b/drivers/ata/sata_highbank.c
index aafb8cc03523..e67815b896fc 100644
--- a/drivers/ata/sata_highbank.c
+++ b/drivers/ata/sata_highbank.c
@@ -410,7 +410,7 @@ static int ahci_highbank_hardreset(struct ata_link *link, unsigned int *class,
int rc;
int retry = 100;
- ahci_stop_engine(ap);
+ hpriv->stop_engine(ap);
/* clear D2H reception area to properly wait for D2H FIS */
ata_tf_init(link->device, &tf);
diff --git a/drivers/ata/sata_sil24.c b/drivers/ata/sata_sil24.c
index 4b1995e2d044..010ca101d412 100644
--- a/drivers/ata/sata_sil24.c
+++ b/drivers/ata/sata_sil24.c
@@ -285,13 +285,13 @@ static const struct sil24_cerr_info {
[PORT_CERR_INCONSISTENT] = { AC_ERR_HSM, ATA_EH_RESET,
"protocol mismatch" },
[PORT_CERR_DIRECTION] = { AC_ERR_HSM, ATA_EH_RESET,
- "data directon mismatch" },
+ "data direction mismatch" },
[PORT_CERR_UNDERRUN] = { AC_ERR_HSM, ATA_EH_RESET,
"ran out of SGEs while writing" },
[PORT_CERR_OVERRUN] = { AC_ERR_HSM, ATA_EH_RESET,
"ran out of SGEs while reading" },
[PORT_CERR_PKT_PROT] = { AC_ERR_HSM, ATA_EH_RESET,
- "invalid data directon for ATAPI CDB" },
+ "invalid data direction for ATAPI CDB" },
[PORT_CERR_SGT_BOUNDARY] = { AC_ERR_SYSTEM, ATA_EH_RESET,
"SGT not on qword boundary" },
[PORT_CERR_SGT_TGTABRT] = { AC_ERR_HOST_BUS, ATA_EH_RESET,
diff --git a/drivers/atm/firestream.c b/drivers/atm/firestream.c
index d97c05690faa..4e46dc9e41ad 100644
--- a/drivers/atm/firestream.c
+++ b/drivers/atm/firestream.c
@@ -191,7 +191,7 @@ static char *res_strings[] = {
"reserved 37",
"reserved 38",
"reserved 39",
- "reseverd 40",
+ "reserved 40",
"reserved 41",
"reserved 42",
"reserved 43",
diff --git a/drivers/atm/zatm.c b/drivers/atm/zatm.c
index 1ef67db03c8e..a8d2eb0ceb8d 100644
--- a/drivers/atm/zatm.c
+++ b/drivers/atm/zatm.c
@@ -28,6 +28,7 @@
#include <asm/io.h>
#include <linux/atomic.h>
#include <linux/uaccess.h>
+#include <linux/nospec.h>
#include "uPD98401.h"
#include "uPD98402.h"
@@ -1150,8 +1151,8 @@ static void eprom_get_byte(struct zatm_dev *zatm_dev, unsigned char *byte,
}
-static unsigned char eprom_try_esi(struct atm_dev *dev, unsigned short cmd,
- int offset, int swap)
+static int eprom_try_esi(struct atm_dev *dev, unsigned short cmd, int offset,
+ int swap)
{
unsigned char buf[ZEPROM_SIZE];
struct zatm_dev *zatm_dev;
@@ -1458,6 +1459,8 @@ static int zatm_ioctl(struct atm_dev *dev,unsigned int cmd,void __user *arg)
return -EFAULT;
if (pool < 0 || pool > ZATM_LAST_POOL)
return -EINVAL;
+ pool = array_index_nospec(pool,
+ ZATM_LAST_POOL + 1);
spin_lock_irqsave(&zatm_dev->lock, flags);
info = zatm_dev->pool_info[pool];
if (cmd == ZATM_GETPOOLZ) {
diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c
index 2da998baa75c..30cc9c877ebb 100644
--- a/drivers/base/cpu.c
+++ b/drivers/base/cpu.c
@@ -534,14 +534,22 @@ ssize_t __weak cpu_show_spectre_v2(struct device *dev,
return sprintf(buf, "Not affected\n");
}
+ssize_t __weak cpu_show_spec_store_bypass(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return sprintf(buf, "Not affected\n");
+}
+
static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL);
static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL);
static DEVICE_ATTR(spectre_v2, 0444, cpu_show_spectre_v2, NULL);
+static DEVICE_ATTR(spec_store_bypass, 0444, cpu_show_spec_store_bypass, NULL);
static struct attribute *cpu_root_vulnerabilities_attrs[] = {
&dev_attr_meltdown.attr,
&dev_attr_spectre_v1.attr,
&dev_attr_spectre_v2.attr,
+ &dev_attr_spec_store_bypass.attr,
NULL
};
diff --git a/drivers/base/dma-mapping.c b/drivers/base/dma-mapping.c
index d82566d6e237..f831a582209c 100644
--- a/drivers/base/dma-mapping.c
+++ b/drivers/base/dma-mapping.c
@@ -329,36 +329,13 @@ void dma_common_free_remap(void *cpu_addr, size_t size, unsigned long vm_flags)
#endif
/*
- * Common configuration to enable DMA API use for a device
+ * enables DMA API use for a device
*/
-#include <linux/pci.h>
-
int dma_configure(struct device *dev)
{
- struct device *bridge = NULL, *dma_dev = dev;
- enum dev_dma_attr attr;
- int ret = 0;
-
- if (dev_is_pci(dev)) {
- bridge = pci_get_host_bridge_device(to_pci_dev(dev));
- dma_dev = bridge;
- if (IS_ENABLED(CONFIG_OF) && dma_dev->parent &&
- dma_dev->parent->of_node)
- dma_dev = dma_dev->parent;
- }
-
- if (dma_dev->of_node) {
- ret = of_dma_configure(dev, dma_dev->of_node);
- } else if (has_acpi_companion(dma_dev)) {
- attr = acpi_get_dma_attr(to_acpi_device_node(dma_dev->fwnode));
- if (attr != DEV_DMA_NOT_SUPPORTED)
- ret = acpi_dma_configure(dev, attr);
- }
-
- if (bridge)
- pci_put_host_bridge_device(bridge);
-
- return ret;
+ if (dev->bus->dma_configure)
+ return dev->bus->dma_configure(dev);
+ return 0;
}
void dma_deconfigure(struct device *dev)
diff --git a/drivers/base/node.c b/drivers/base/node.c
index 7a3a580821e0..a5e821d09656 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -490,7 +490,8 @@ int unregister_mem_sect_under_nodes(struct memory_block *mem_blk,
return 0;
}
-int link_mem_sections(int nid, unsigned long start_pfn, unsigned long nr_pages)
+int link_mem_sections(int nid, unsigned long start_pfn, unsigned long nr_pages,
+ bool check_nid)
{
unsigned long end_pfn = start_pfn + nr_pages;
unsigned long pfn;
@@ -514,7 +515,7 @@ int link_mem_sections(int nid, unsigned long start_pfn, unsigned long nr_pages)
mem_blk = find_memory_block_hinted(mem_sect, mem_blk);
- ret = register_mem_sect_under_node(mem_blk, nid, true);
+ ret = register_mem_sect_under_node(mem_blk, nid, check_nid);
if (!err)
err = ret;
diff --git a/drivers/base/platform-msi.c b/drivers/base/platform-msi.c
index 8e22073aeeed..60d6cc618f1c 100644
--- a/drivers/base/platform-msi.c
+++ b/drivers/base/platform-msi.c
@@ -101,6 +101,9 @@ static void platform_msi_update_chip_ops(struct msi_domain_info *info)
chip->irq_set_affinity = msi_domain_set_affinity;
if (!chip->irq_write_msi_msg)
chip->irq_write_msi_msg = platform_msi_write_msg;
+ if (WARN_ON((info->flags & MSI_FLAG_LEVEL_CAPABLE) &&
+ !(chip->flags & IRQCHIP_SUPPORTS_LEVEL_MSI)))
+ info->flags &= ~MSI_FLAG_LEVEL_CAPABLE;
}
static void platform_msi_free_descs(struct device *dev, int base, int nvec)
diff --git a/drivers/base/platform.c b/drivers/base/platform.c
index 8075ddc70a17..c0ff1e73a634 100644
--- a/drivers/base/platform.c
+++ b/drivers/base/platform.c
@@ -1130,6 +1130,22 @@ int platform_pm_restore(struct device *dev)
#endif /* CONFIG_HIBERNATE_CALLBACKS */
+int platform_dma_configure(struct device *dev)
+{
+ enum dev_dma_attr attr;
+ int ret = 0;
+
+ if (dev->of_node) {
+ ret = of_dma_configure(dev, dev->of_node, true);
+ } else if (has_acpi_companion(dev)) {
+ attr = acpi_get_dma_attr(to_acpi_device_node(dev->fwnode));
+ if (attr != DEV_DMA_NOT_SUPPORTED)
+ ret = acpi_dma_configure(dev, attr);
+ }
+
+ return ret;
+}
+
static const struct dev_pm_ops platform_dev_pm_ops = {
.runtime_suspend = pm_generic_runtime_suspend,
.runtime_resume = pm_generic_runtime_resume,
@@ -1141,8 +1157,8 @@ struct bus_type platform_bus_type = {
.dev_groups = platform_dev_groups,
.match = platform_match,
.uevent = platform_uevent,
+ .dma_configure = platform_dma_configure,
.pm = &platform_dev_pm_ops,
- .force_dma = true,
};
EXPORT_SYMBOL_GPL(platform_bus_type);
diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index 02a497e7c785..e5e067091572 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -1923,10 +1923,8 @@ static int device_prepare(struct device *dev, pm_message_t state)
dev->power.wakeup_path = false;
- if (dev->power.no_pm_callbacks) {
- ret = 1; /* Let device go direct_complete */
+ if (dev->power.no_pm_callbacks)
goto unlock;
- }
if (dev->pm_domain)
callback = dev->pm_domain->ops.prepare;
@@ -1960,7 +1958,8 @@ unlock:
*/
spin_lock_irq(&dev->power.lock);
dev->power.direct_complete = state.event == PM_EVENT_SUSPEND &&
- pm_runtime_suspended(dev) && ret > 0 &&
+ ((pm_runtime_suspended(dev) && ret > 0) ||
+ dev->power.no_pm_callbacks) &&
!dev_pm_test_driver_flags(dev, DPM_FLAG_NEVER_SKIP);
spin_unlock_irq(&dev->power.lock);
return 0;
diff --git a/drivers/base/regmap/regmap-mmio.c b/drivers/base/regmap/regmap-mmio.c
index 5cadfd3394d8..8741fb5f8f54 100644
--- a/drivers/base/regmap/regmap-mmio.c
+++ b/drivers/base/regmap/regmap-mmio.c
@@ -206,7 +206,8 @@ static void regmap_mmio_free_context(void *context)
if (!IS_ERR(ctx->clk)) {
clk_unprepare(ctx->clk);
- clk_put(ctx->clk);
+ if (!ctx->attached_clk)
+ clk_put(ctx->clk);
}
kfree(context);
}
diff --git a/drivers/base/regmap/regmap-slimbus.c b/drivers/base/regmap/regmap-slimbus.c
index c90bee81d954..91d501eda8a9 100644
--- a/drivers/base/regmap/regmap-slimbus.c
+++ b/drivers/base/regmap/regmap-slimbus.c
@@ -41,7 +41,7 @@ static struct regmap_bus regmap_slimbus_bus = {
static const struct regmap_bus *regmap_get_slimbus(struct slim_device *slim,
const struct regmap_config *config)
{
- if (config->val_bits == 8 && config->reg_bits == 8)
+ if (config->val_bits == 8 && config->reg_bits == 16)
return &regmap_slimbus_bus;
return ERR_PTR(-ENOTSUPP);
diff --git a/drivers/bcma/driver_mips.c b/drivers/bcma/driver_mips.c
index f040aba48d50..27e9686b6d3a 100644
--- a/drivers/bcma/driver_mips.c
+++ b/drivers/bcma/driver_mips.c
@@ -184,7 +184,7 @@ static void bcma_core_mips_print_irq(struct bcma_device *dev, unsigned int irq)
{
int i;
static const char *irq_name[] = {"2(S)", "3", "4", "5", "6", "D", "I"};
- char interrupts[20];
+ char interrupts[25];
char *ints = interrupts;
for (i = 0; i < ARRAY_SIZE(irq_name); i++)
diff --git a/drivers/bcma/main.c b/drivers/bcma/main.c
index e6986c7608f1..fc1f4acdd189 100644
--- a/drivers/bcma/main.c
+++ b/drivers/bcma/main.c
@@ -207,7 +207,7 @@ static void bcma_of_fill_device(struct device *parent,
core->irq = bcma_of_get_irq(parent, core, 0);
- of_dma_configure(&core->dev, node);
+ of_dma_configure(&core->dev, node, false);
}
unsigned int bcma_core_irq(struct bcma_device *core, int num)
diff --git a/drivers/block/DAC960.c b/drivers/block/DAC960.c
index f781eff7d23e..6ca77d6047d6 100644
--- a/drivers/block/DAC960.c
+++ b/drivers/block/DAC960.c
@@ -1179,7 +1179,6 @@ static bool DAC960_V1_EnableMemoryMailboxInterface(DAC960_Controller_T
if (pci_set_dma_mask(Controller->PCIDevice, DMA_BIT_MASK(32)))
return DAC960_Failure(Controller, "DMA mask out of range");
- Controller->BounceBufferLimit = DMA_BIT_MASK(32);
if ((hw_type == DAC960_PD_Controller) || (hw_type == DAC960_P_Controller)) {
CommandMailboxesSize = 0;
@@ -1380,11 +1379,8 @@ static bool DAC960_V2_EnableMemoryMailboxInterface(DAC960_Controller_T
dma_addr_t CommandMailboxDMA;
DAC960_V2_CommandStatus_T CommandStatus;
- if (!pci_set_dma_mask(Controller->PCIDevice, DMA_BIT_MASK(64)))
- Controller->BounceBufferLimit = DMA_BIT_MASK(64);
- else if (!pci_set_dma_mask(Controller->PCIDevice, DMA_BIT_MASK(32)))
- Controller->BounceBufferLimit = DMA_BIT_MASK(32);
- else
+ if (pci_set_dma_mask(Controller->PCIDevice, DMA_BIT_MASK(64)) &&
+ pci_set_dma_mask(Controller->PCIDevice, DMA_BIT_MASK(32)))
return DAC960_Failure(Controller, "DMA mask out of range");
/* This is a temporary dma mapping, used only in the scope of this function */
@@ -2540,7 +2536,6 @@ static bool DAC960_RegisterBlockDevice(DAC960_Controller_T *Controller)
continue;
}
Controller->RequestQueue[n] = RequestQueue;
- blk_queue_bounce_limit(RequestQueue, Controller->BounceBufferLimit);
RequestQueue->queuedata = Controller;
blk_queue_max_segments(RequestQueue, Controller->DriverScatterGatherLimit);
blk_queue_max_hw_sectors(RequestQueue, Controller->MaxBlocksPerCommand);
@@ -6451,19 +6446,6 @@ static int dac960_proc_show(struct seq_file *m, void *v)
return 0;
}
-static int dac960_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, dac960_proc_show, NULL);
-}
-
-static const struct file_operations dac960_proc_fops = {
- .owner = THIS_MODULE,
- .open = dac960_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
static int dac960_initial_status_proc_show(struct seq_file *m, void *v)
{
DAC960_Controller_T *Controller = (DAC960_Controller_T *)m->private;
@@ -6471,19 +6453,6 @@ static int dac960_initial_status_proc_show(struct seq_file *m, void *v)
return 0;
}
-static int dac960_initial_status_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, dac960_initial_status_proc_show, PDE_DATA(inode));
-}
-
-static const struct file_operations dac960_initial_status_proc_fops = {
- .owner = THIS_MODULE,
- .open = dac960_initial_status_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
static int dac960_current_status_proc_show(struct seq_file *m, void *v)
{
DAC960_Controller_T *Controller = (DAC960_Controller_T *) m->private;
@@ -6517,19 +6486,6 @@ static int dac960_current_status_proc_show(struct seq_file *m, void *v)
return 0;
}
-static int dac960_current_status_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, dac960_current_status_proc_show, PDE_DATA(inode));
-}
-
-static const struct file_operations dac960_current_status_proc_fops = {
- .owner = THIS_MODULE,
- .open = dac960_current_status_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
static int dac960_user_command_proc_show(struct seq_file *m, void *v)
{
DAC960_Controller_T *Controller = (DAC960_Controller_T *)m->private;
@@ -6584,17 +6540,19 @@ static void DAC960_CreateProcEntries(DAC960_Controller_T *Controller)
if (DAC960_ProcDirectoryEntry == NULL) {
DAC960_ProcDirectoryEntry = proc_mkdir("rd", NULL);
- proc_create("status", 0, DAC960_ProcDirectoryEntry,
- &dac960_proc_fops);
+ proc_create_single("status", 0, DAC960_ProcDirectoryEntry,
+ dac960_proc_show);
}
snprintf(Controller->ControllerName, sizeof(Controller->ControllerName),
"c%d", Controller->ControllerNumber);
ControllerProcEntry = proc_mkdir(Controller->ControllerName,
DAC960_ProcDirectoryEntry);
- proc_create_data("initial_status", 0, ControllerProcEntry, &dac960_initial_status_proc_fops, Controller);
- proc_create_data("current_status", 0, ControllerProcEntry, &dac960_current_status_proc_fops, Controller);
- proc_create_data("user_command", S_IWUSR | S_IRUSR, ControllerProcEntry, &dac960_user_command_proc_fops, Controller);
+ proc_create_single_data("initial_status", 0, ControllerProcEntry,
+ dac960_initial_status_proc_show, Controller);
+ proc_create_single_data("current_status", 0, ControllerProcEntry,
+ dac960_current_status_proc_show, Controller);
+ proc_create_data("user_command", 0600, ControllerProcEntry, &dac960_user_command_proc_fops, Controller);
Controller->ControllerProcEntry = ControllerProcEntry;
}
diff --git a/drivers/block/DAC960.h b/drivers/block/DAC960.h
index 21aff470d268..1439e651928b 100644
--- a/drivers/block/DAC960.h
+++ b/drivers/block/DAC960.h
@@ -2295,7 +2295,6 @@ typedef struct DAC960_Controller
unsigned short MaxBlocksPerCommand;
unsigned short ControllerScatterGatherLimit;
unsigned short DriverScatterGatherLimit;
- u64 BounceBufferLimit;
unsigned int CombinedStatusBufferLength;
unsigned int InitialStatusLength;
unsigned int CurrentStatusLength;
diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c
index 6797e6c23c8a..429ebb84b592 100644
--- a/drivers/block/aoe/aoeblk.c
+++ b/drivers/block/aoe/aoeblk.c
@@ -159,14 +159,14 @@ static int aoe_debugfs_open(struct inode *inode, struct file *file)
return single_open(file, aoedisk_debugfs_show, inode->i_private);
}
-static DEVICE_ATTR(state, S_IRUGO, aoedisk_show_state, NULL);
-static DEVICE_ATTR(mac, S_IRUGO, aoedisk_show_mac, NULL);
-static DEVICE_ATTR(netif, S_IRUGO, aoedisk_show_netif, NULL);
+static DEVICE_ATTR(state, 0444, aoedisk_show_state, NULL);
+static DEVICE_ATTR(mac, 0444, aoedisk_show_mac, NULL);
+static DEVICE_ATTR(netif, 0444, aoedisk_show_netif, NULL);
static struct device_attribute dev_attr_firmware_version = {
- .attr = { .name = "firmware-version", .mode = S_IRUGO },
+ .attr = { .name = "firmware-version", .mode = 0444 },
.show = aoedisk_show_fwver,
};
-static DEVICE_ATTR(payload, S_IRUGO, aoedisk_show_payload, NULL);
+static DEVICE_ATTR(payload, 0444, aoedisk_show_payload, NULL);
static struct attribute *aoe_attrs[] = {
&dev_attr_state.attr,
@@ -388,7 +388,6 @@ aoeblk_gdalloc(void *vp)
d->aoemajor, d->aoeminor);
goto err_mempool;
}
- blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH);
spin_lock_irqsave(&d->lock, flags);
WARN_ON(!(d->flags & DEVFL_GD_NOW));
diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c
index 540bb60cd071..096882e54095 100644
--- a/drivers/block/aoe/aoecmd.c
+++ b/drivers/block/aoe/aoecmd.c
@@ -1032,8 +1032,9 @@ bvcpy(struct sk_buff *skb, struct bio *bio, struct bvec_iter iter, long cnt)
iter.bi_size = cnt;
__bio_for_each_segment(bv, bio, iter, iter) {
- char *p = page_address(bv.bv_page) + bv.bv_offset;
+ char *p = kmap_atomic(bv.bv_page) + bv.bv_offset;
skb_copy_bits(skb, soff, p, bv.bv_len);
+ kunmap_atomic(p);
soff += bv.bv_len;
}
}
diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index 66cb0f857f64..bb976598ee43 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -331,15 +331,15 @@ static const struct block_device_operations brd_fops = {
* And now the modules code and kernel interface.
*/
static int rd_nr = CONFIG_BLK_DEV_RAM_COUNT;
-module_param(rd_nr, int, S_IRUGO);
+module_param(rd_nr, int, 0444);
MODULE_PARM_DESC(rd_nr, "Maximum number of brd devices");
unsigned long rd_size = CONFIG_BLK_DEV_RAM_SIZE;
-module_param(rd_size, ulong, S_IRUGO);
+module_param(rd_size, ulong, 0444);
MODULE_PARM_DESC(rd_size, "Size of each RAM disk in kbytes.");
static int max_part = 1;
-module_param(max_part, int, S_IRUGO);
+module_param(max_part, int, 0444);
MODULE_PARM_DESC(max_part, "Num Minors to reserve between devices");
MODULE_LICENSE("GPL");
@@ -402,6 +402,10 @@ static struct brd_device *brd_alloc(int i)
set_capacity(disk, rd_size * 2);
disk->queue->backing_dev_info->capabilities |= BDI_CAP_SYNCHRONOUS_IO;
+ /* Tell the block layer that this is not a rotational device */
+ blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue);
+ blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, disk->queue);
+
return brd;
out_free_queue:
diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c
index 9f4e6f502b84..11a85b740327 100644
--- a/drivers/block/drbd/drbd_bitmap.c
+++ b/drivers/block/drbd/drbd_bitmap.c
@@ -977,7 +977,7 @@ static void drbd_bm_endio(struct bio *bio)
bm_page_unlock_io(device, idx);
if (ctx->flags & BM_AIO_COPY_PAGES)
- mempool_free(bio->bi_io_vec[0].bv_page, drbd_md_io_page_pool);
+ mempool_free(bio->bi_io_vec[0].bv_page, &drbd_md_io_page_pool);
bio_put(bio);
@@ -1014,7 +1014,8 @@ static void bm_page_io_async(struct drbd_bm_aio_ctx *ctx, int page_nr) __must_ho
bm_set_page_unchanged(b->bm_pages[page_nr]);
if (ctx->flags & BM_AIO_COPY_PAGES) {
- page = mempool_alloc(drbd_md_io_page_pool, __GFP_HIGHMEM|__GFP_RECLAIM);
+ page = mempool_alloc(&drbd_md_io_page_pool,
+ GFP_NOIO | __GFP_HIGHMEM);
copy_highpage(page, b->bm_pages[page_nr]);
bm_store_page_idx(page, page_nr);
} else
diff --git a/drivers/block/drbd/drbd_debugfs.c b/drivers/block/drbd/drbd_debugfs.c
index ab21976a87b2..5d5e8d6a8a56 100644
--- a/drivers/block/drbd/drbd_debugfs.c
+++ b/drivers/block/drbd/drbd_debugfs.c
@@ -481,9 +481,9 @@ void drbd_debugfs_resource_add(struct drbd_resource *resource)
goto fail;
resource->debugfs_res_connections = dentry;
- dentry = debugfs_create_file("in_flight_summary", S_IRUSR|S_IRGRP,
- resource->debugfs_res, resource,
- &in_flight_summary_fops);
+ dentry = debugfs_create_file("in_flight_summary", 0440,
+ resource->debugfs_res, resource,
+ &in_flight_summary_fops);
if (IS_ERR_OR_NULL(dentry))
goto fail;
resource->debugfs_res_in_flight_summary = dentry;
@@ -645,16 +645,16 @@ void drbd_debugfs_connection_add(struct drbd_connection *connection)
goto fail;
connection->debugfs_conn = dentry;
- dentry = debugfs_create_file("callback_history", S_IRUSR|S_IRGRP,
- connection->debugfs_conn, connection,
- &connection_callback_history_fops);
+ dentry = debugfs_create_file("callback_history", 0440,
+ connection->debugfs_conn, connection,
+ &connection_callback_history_fops);
if (IS_ERR_OR_NULL(dentry))
goto fail;
connection->debugfs_conn_callback_history = dentry;
- dentry = debugfs_create_file("oldest_requests", S_IRUSR|S_IRGRP,
- connection->debugfs_conn, connection,
- &connection_oldest_requests_fops);
+ dentry = debugfs_create_file("oldest_requests", 0440,
+ connection->debugfs_conn, connection,
+ &connection_oldest_requests_fops);
if (IS_ERR_OR_NULL(dentry))
goto fail;
connection->debugfs_conn_oldest_requests = dentry;
@@ -824,7 +824,7 @@ void drbd_debugfs_device_add(struct drbd_device *device)
device->debugfs_minor = dentry;
#define DCF(name) do { \
- dentry = debugfs_create_file(#name, S_IRUSR|S_IRGRP, \
+ dentry = debugfs_create_file(#name, 0440, \
device->debugfs_vol, device, \
&device_ ## name ## _fops); \
if (IS_ERR_OR_NULL(dentry)) \
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index 06ecee1b528e..bc4ed2ed40a2 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -1405,8 +1405,8 @@ extern struct kmem_cache *drbd_request_cache;
extern struct kmem_cache *drbd_ee_cache; /* peer requests */
extern struct kmem_cache *drbd_bm_ext_cache; /* bitmap extents */
extern struct kmem_cache *drbd_al_ext_cache; /* activity log extents */
-extern mempool_t *drbd_request_mempool;
-extern mempool_t *drbd_ee_mempool;
+extern mempool_t drbd_request_mempool;
+extern mempool_t drbd_ee_mempool;
/* drbd's page pool, used to buffer data received from the peer,
* or data requested by the peer.
@@ -1432,16 +1432,16 @@ extern wait_queue_head_t drbd_pp_wait;
* 128 should be plenty, currently we probably can get away with as few as 1.
*/
#define DRBD_MIN_POOL_PAGES 128
-extern mempool_t *drbd_md_io_page_pool;
+extern mempool_t drbd_md_io_page_pool;
/* We also need to make sure we get a bio
* when we need it for housekeeping purposes */
-extern struct bio_set *drbd_md_io_bio_set;
+extern struct bio_set drbd_md_io_bio_set;
/* to allocate from that set */
extern struct bio *bio_alloc_drbd(gfp_t gfp_mask);
/* And a bio_set for cloning */
-extern struct bio_set *drbd_io_bio_set;
+extern struct bio_set drbd_io_bio_set;
extern struct mutex resources_mutex;
@@ -1643,7 +1643,7 @@ void drbd_bump_write_ordering(struct drbd_resource *resource, struct drbd_backin
/* drbd_proc.c */
extern struct proc_dir_entry *drbd_proc;
-extern const struct file_operations drbd_proc_fops;
+int drbd_seq_show(struct seq_file *seq, void *v);
/* drbd_actlog.c */
extern bool drbd_al_begin_io_prepare(struct drbd_device *device, struct drbd_interval *i);
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 185f1ef00a7c..7655d6133139 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -124,11 +124,11 @@ struct kmem_cache *drbd_request_cache;
struct kmem_cache *drbd_ee_cache; /* peer requests */
struct kmem_cache *drbd_bm_ext_cache; /* bitmap extents */
struct kmem_cache *drbd_al_ext_cache; /* activity log extents */
-mempool_t *drbd_request_mempool;
-mempool_t *drbd_ee_mempool;
-mempool_t *drbd_md_io_page_pool;
-struct bio_set *drbd_md_io_bio_set;
-struct bio_set *drbd_io_bio_set;
+mempool_t drbd_request_mempool;
+mempool_t drbd_ee_mempool;
+mempool_t drbd_md_io_page_pool;
+struct bio_set drbd_md_io_bio_set;
+struct bio_set drbd_io_bio_set;
/* I do not use a standard mempool, because:
1) I want to hand out the pre-allocated objects first.
@@ -153,10 +153,10 @@ struct bio *bio_alloc_drbd(gfp_t gfp_mask)
{
struct bio *bio;
- if (!drbd_md_io_bio_set)
+ if (!bioset_initialized(&drbd_md_io_bio_set))
return bio_alloc(gfp_mask, 1);
- bio = bio_alloc_bioset(gfp_mask, 1, drbd_md_io_bio_set);
+ bio = bio_alloc_bioset(gfp_mask, 1, &drbd_md_io_bio_set);
if (!bio)
return NULL;
return bio;
@@ -2097,16 +2097,11 @@ static void drbd_destroy_mempools(void)
/* D_ASSERT(device, atomic_read(&drbd_pp_vacant)==0); */
- if (drbd_io_bio_set)
- bioset_free(drbd_io_bio_set);
- if (drbd_md_io_bio_set)
- bioset_free(drbd_md_io_bio_set);
- if (drbd_md_io_page_pool)
- mempool_destroy(drbd_md_io_page_pool);
- if (drbd_ee_mempool)
- mempool_destroy(drbd_ee_mempool);
- if (drbd_request_mempool)
- mempool_destroy(drbd_request_mempool);
+ bioset_exit(&drbd_io_bio_set);
+ bioset_exit(&drbd_md_io_bio_set);
+ mempool_exit(&drbd_md_io_page_pool);
+ mempool_exit(&drbd_ee_mempool);
+ mempool_exit(&drbd_request_mempool);
if (drbd_ee_cache)
kmem_cache_destroy(drbd_ee_cache);
if (drbd_request_cache)
@@ -2116,11 +2111,6 @@ static void drbd_destroy_mempools(void)
if (drbd_al_ext_cache)
kmem_cache_destroy(drbd_al_ext_cache);
- drbd_io_bio_set = NULL;
- drbd_md_io_bio_set = NULL;
- drbd_md_io_page_pool = NULL;
- drbd_ee_mempool = NULL;
- drbd_request_mempool = NULL;
drbd_ee_cache = NULL;
drbd_request_cache = NULL;
drbd_bm_ext_cache = NULL;
@@ -2133,18 +2123,7 @@ static int drbd_create_mempools(void)
{
struct page *page;
const int number = (DRBD_MAX_BIO_SIZE/PAGE_SIZE) * drbd_minor_count;
- int i;
-
- /* prepare our caches and mempools */
- drbd_request_mempool = NULL;
- drbd_ee_cache = NULL;
- drbd_request_cache = NULL;
- drbd_bm_ext_cache = NULL;
- drbd_al_ext_cache = NULL;
- drbd_pp_pool = NULL;
- drbd_md_io_page_pool = NULL;
- drbd_md_io_bio_set = NULL;
- drbd_io_bio_set = NULL;
+ int i, ret;
/* caches */
drbd_request_cache = kmem_cache_create(
@@ -2168,26 +2147,26 @@ static int drbd_create_mempools(void)
goto Enomem;
/* mempools */
- drbd_io_bio_set = bioset_create(BIO_POOL_SIZE, 0, 0);
- if (drbd_io_bio_set == NULL)
+ ret = bioset_init(&drbd_io_bio_set, BIO_POOL_SIZE, 0, 0);
+ if (ret)
goto Enomem;
- drbd_md_io_bio_set = bioset_create(DRBD_MIN_POOL_PAGES, 0,
- BIOSET_NEED_BVECS);
- if (drbd_md_io_bio_set == NULL)
+ ret = bioset_init(&drbd_md_io_bio_set, DRBD_MIN_POOL_PAGES, 0,
+ BIOSET_NEED_BVECS);
+ if (ret)
goto Enomem;
- drbd_md_io_page_pool = mempool_create_page_pool(DRBD_MIN_POOL_PAGES, 0);
- if (drbd_md_io_page_pool == NULL)
+ ret = mempool_init_page_pool(&drbd_md_io_page_pool, DRBD_MIN_POOL_PAGES, 0);
+ if (ret)
goto Enomem;
- drbd_request_mempool = mempool_create_slab_pool(number,
- drbd_request_cache);
- if (drbd_request_mempool == NULL)
+ ret = mempool_init_slab_pool(&drbd_request_mempool, number,
+ drbd_request_cache);
+ if (ret)
goto Enomem;
- drbd_ee_mempool = mempool_create_slab_pool(number, drbd_ee_cache);
- if (drbd_ee_mempool == NULL)
+ ret = mempool_init_slab_pool(&drbd_ee_mempool, number, drbd_ee_cache);
+ if (ret)
goto Enomem;
/* drbd's page pool */
@@ -3010,7 +2989,7 @@ static int __init drbd_init(void)
goto fail;
err = -ENOMEM;
- drbd_proc = proc_create_data("drbd", S_IFREG | S_IRUGO , NULL, &drbd_proc_fops, NULL);
+ drbd_proc = proc_create_single("drbd", S_IFREG | 0444 , NULL, drbd_seq_show);
if (!drbd_proc) {
pr_err("unable to register proc file\n");
goto fail;
diff --git a/drivers/block/drbd/drbd_proc.c b/drivers/block/drbd/drbd_proc.c
index 582caeb0de86..74ef29247bb5 100644
--- a/drivers/block/drbd/drbd_proc.c
+++ b/drivers/block/drbd/drbd_proc.c
@@ -33,18 +33,7 @@
#include <linux/drbd.h>
#include "drbd_int.h"
-static int drbd_proc_open(struct inode *inode, struct file *file);
-static int drbd_proc_release(struct inode *inode, struct file *file);
-
-
struct proc_dir_entry *drbd_proc;
-const struct file_operations drbd_proc_fops = {
- .owner = THIS_MODULE,
- .open = drbd_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = drbd_proc_release,
-};
static void seq_printf_with_thousands_grouping(struct seq_file *seq, long v)
{
@@ -235,7 +224,7 @@ static void drbd_syncer_progress(struct drbd_device *device, struct seq_file *se
}
}
-static int drbd_seq_show(struct seq_file *seq, void *v)
+int drbd_seq_show(struct seq_file *seq, void *v)
{
int i, prev_i = -1;
const char *sn;
@@ -345,24 +334,3 @@ static int drbd_seq_show(struct seq_file *seq, void *v)
return 0;
}
-
-static int drbd_proc_open(struct inode *inode, struct file *file)
-{
- int err;
-
- if (try_module_get(THIS_MODULE)) {
- err = single_open(file, drbd_seq_show, NULL);
- if (err)
- module_put(THIS_MODULE);
- return err;
- }
- return -ENODEV;
-}
-
-static int drbd_proc_release(struct inode *inode, struct file *file)
-{
- module_put(THIS_MODULE);
- return single_release(inode, file);
-}
-
-/* PROC FS stuff end */
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index c72dee0ef083..be9450f5ad1c 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -378,7 +378,7 @@ drbd_alloc_peer_req(struct drbd_peer_device *peer_device, u64 id, sector_t secto
if (drbd_insert_fault(device, DRBD_FAULT_AL_EE))
return NULL;
- peer_req = mempool_alloc(drbd_ee_mempool, gfp_mask & ~__GFP_HIGHMEM);
+ peer_req = mempool_alloc(&drbd_ee_mempool, gfp_mask & ~__GFP_HIGHMEM);
if (!peer_req) {
if (!(gfp_mask & __GFP_NOWARN))
drbd_err(device, "%s: allocation failed\n", __func__);
@@ -409,7 +409,7 @@ drbd_alloc_peer_req(struct drbd_peer_device *peer_device, u64 id, sector_t secto
return peer_req;
fail:
- mempool_free(peer_req, drbd_ee_mempool);
+ mempool_free(peer_req, &drbd_ee_mempool);
return NULL;
}
@@ -426,7 +426,7 @@ void __drbd_free_peer_req(struct drbd_device *device, struct drbd_peer_request *
peer_req->flags &= ~EE_CALL_AL_COMPLETE_IO;
drbd_al_complete_io(device, &peer_req->i);
}
- mempool_free(peer_req, drbd_ee_mempool);
+ mempool_free(peer_req, &drbd_ee_mempool);
}
int drbd_free_peer_reqs(struct drbd_device *device, struct list_head *list)
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index a500e738d929..a47e4987ee46 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -55,7 +55,7 @@ static struct drbd_request *drbd_req_new(struct drbd_device *device, struct bio
{
struct drbd_request *req;
- req = mempool_alloc(drbd_request_mempool, GFP_NOIO);
+ req = mempool_alloc(&drbd_request_mempool, GFP_NOIO);
if (!req)
return NULL;
memset(req, 0, sizeof(*req));
@@ -184,7 +184,7 @@ void drbd_req_destroy(struct kref *kref)
}
}
- mempool_free(req, drbd_request_mempool);
+ mempool_free(req, &drbd_request_mempool);
}
static void wake_all_senders(struct drbd_connection *connection)
diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h
index cb97b3b30962..94c654020f0f 100644
--- a/drivers/block/drbd/drbd_req.h
+++ b/drivers/block/drbd/drbd_req.h
@@ -269,7 +269,7 @@ enum drbd_req_state_bits {
static inline void drbd_req_make_private_bio(struct drbd_request *req, struct bio *bio_src)
{
struct bio *bio;
- bio = bio_clone_fast(bio_src, GFP_NOIO, drbd_io_bio_set);
+ bio = bio_clone_fast(bio_src, GFP_NOIO, &drbd_io_bio_set);
req->private_bio = bio;
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index 8ec7235fc93b..8871b5044d9e 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -4450,7 +4450,7 @@ static ssize_t floppy_cmos_show(struct device *dev,
return sprintf(buf, "%X\n", UDP->cmos);
}
-static DEVICE_ATTR(cmos, S_IRUGO, floppy_cmos_show, NULL);
+static DEVICE_ATTR(cmos, 0444, floppy_cmos_show, NULL);
static struct attribute *floppy_dev_attrs[] = {
&dev_attr_cmos.attr,
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 5d4e31655d96..4838b0dbaad3 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -732,7 +732,7 @@ static ssize_t loop_attr_do_show_##_name(struct device *d, \
return loop_attr_show(d, b, loop_attr_##_name##_show); \
} \
static struct device_attribute loop_attr_##_name = \
- __ATTR(_name, S_IRUGO, loop_attr_do_show_##_name, NULL);
+ __ATTR(_name, 0444, loop_attr_do_show_##_name, NULL);
static ssize_t loop_attr_backing_file_show(struct loop_device *lo, char *buf)
{
@@ -809,16 +809,17 @@ static struct attribute_group loop_attribute_group = {
.attrs= loop_attrs,
};
-static int loop_sysfs_init(struct loop_device *lo)
+static void loop_sysfs_init(struct loop_device *lo)
{
- return sysfs_create_group(&disk_to_dev(lo->lo_disk)->kobj,
- &loop_attribute_group);
+ lo->sysfs_inited = !sysfs_create_group(&disk_to_dev(lo->lo_disk)->kobj,
+ &loop_attribute_group);
}
static void loop_sysfs_exit(struct loop_device *lo)
{
- sysfs_remove_group(&disk_to_dev(lo->lo_disk)->kobj,
- &loop_attribute_group);
+ if (lo->sysfs_inited)
+ sysfs_remove_group(&disk_to_dev(lo->lo_disk)->kobj,
+ &loop_attribute_group);
}
static void loop_config_discard(struct loop_device *lo)
@@ -1068,6 +1069,7 @@ static int loop_clr_fd(struct loop_device *lo)
if (bdev) {
bdput(bdev);
invalidate_bdev(bdev);
+ bdev->bd_inode->i_mapping->wb_err = 0;
}
set_capacity(lo->lo_disk, 0);
loop_sysfs_exit(lo);
@@ -1676,9 +1678,9 @@ static const struct block_device_operations lo_fops = {
* And now the modules code and kernel interface.
*/
static int max_loop;
-module_param(max_loop, int, S_IRUGO);
+module_param(max_loop, int, 0444);
MODULE_PARM_DESC(max_loop, "Maximum number of loop devices");
-module_param(max_part, int, S_IRUGO);
+module_param(max_part, int, 0444);
MODULE_PARM_DESC(max_part, "Maximum number of partitions per loop device");
MODULE_LICENSE("GPL");
MODULE_ALIAS_BLOCKDEV_MAJOR(LOOP_MAJOR);
diff --git a/drivers/block/loop.h b/drivers/block/loop.h
index b78de9879f4f..4d42c7af7de7 100644
--- a/drivers/block/loop.h
+++ b/drivers/block/loop.h
@@ -58,6 +58,7 @@ struct loop_device {
struct kthread_worker worker;
struct task_struct *worker_task;
bool use_dio;
+ bool sysfs_inited;
struct request_queue *lo_queue;
struct blk_mq_tag_set tag_set;
diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index 769c551e3d71..c73626decb46 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -2285,7 +2285,7 @@ static ssize_t mtip_hw_show_status(struct device *dev,
return size;
}
-static DEVICE_ATTR(status, S_IRUGO, mtip_hw_show_status, NULL);
+static DEVICE_ATTR(status, 0444, mtip_hw_show_status, NULL);
/* debugsfs entries */
@@ -2566,10 +2566,9 @@ static int mtip_hw_debugfs_init(struct driver_data *dd)
return -1;
}
- debugfs_create_file("flags", S_IRUGO, dd->dfs_node, dd,
- &mtip_flags_fops);
- debugfs_create_file("registers", S_IRUGO, dd->dfs_node, dd,
- &mtip_regs_fops);
+ debugfs_create_file("flags", 0444, dd->dfs_node, dd, &mtip_flags_fops);
+ debugfs_create_file("registers", 0444, dd->dfs_node, dd,
+ &mtip_regs_fops);
return 0;
}
@@ -2726,15 +2725,11 @@ static void mtip_softirq_done_fn(struct request *rq)
blk_mq_end_request(rq, cmd->status);
}
-static void mtip_abort_cmd(struct request *req, void *data,
- bool reserved)
+static void mtip_abort_cmd(struct request *req, void *data, bool reserved)
{
struct mtip_cmd *cmd = blk_mq_rq_to_pdu(req);
struct driver_data *dd = data;
- if (!blk_mq_request_started(req))
- return;
-
dbg_printk(MTIP_DRV_NAME " Aborting request, tag = %d\n", req->tag);
clear_bit(req->tag, dd->port->cmds_to_issue);
@@ -2742,14 +2737,10 @@ static void mtip_abort_cmd(struct request *req, void *data,
mtip_softirq_done_fn(req);
}
-static void mtip_queue_cmd(struct request *req, void *data,
- bool reserved)
+static void mtip_queue_cmd(struct request *req, void *data, bool reserved)
{
struct driver_data *dd = data;
- if (!blk_mq_request_started(req))
- return;
-
set_bit(req->tag, dd->port->cmds_to_issue);
blk_abort_request(req);
}
@@ -3720,7 +3711,8 @@ static enum blk_eh_timer_return mtip_cmd_timeout(struct request *req,
struct mtip_cmd *cmd = blk_mq_rq_to_pdu(req);
cmd->status = BLK_STS_TIMEOUT;
- return BLK_EH_HANDLED;
+ blk_mq_complete_request(req);
+ return BLK_EH_DONE;
}
if (test_bit(req->tag, dd->port->cmds_to_issue))
@@ -3862,7 +3854,6 @@ skip_create_disk:
blk_queue_max_hw_sectors(dd->queue, 0xffff);
blk_queue_max_segment_size(dd->queue, 0x400000);
blk_queue_io_min(dd->queue, 4096);
- blk_queue_bounce_limit(dd->queue, dd->pdev->dma_mask);
/* Signal trim support */
if (dd->trim_supp == true) {
@@ -4273,7 +4264,7 @@ static int mtip_pci_probe(struct pci_dev *pdev,
if (!dd->isr_workq) {
dev_warn(&pdev->dev, "Can't create wq %d\n", dd->instance);
rv = -ENOMEM;
- goto block_initialize_err;
+ goto setmask_err;
}
memset(cpu_list, 0, sizeof(cpu_list));
@@ -4614,7 +4605,7 @@ static int __init mtip_init(void)
}
if (dfs_parent) {
dfs_device_status = debugfs_create_file("device_status",
- S_IRUGO, dfs_parent, NULL,
+ 0444, dfs_parent, NULL,
&mtip_device_status_fops);
if (IS_ERR_OR_NULL(dfs_device_status)) {
pr_err("Error creating device_status node\n");
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index afbc202ca6fd..3ed1ef8ee528 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -166,16 +166,19 @@ static ssize_t pid_show(struct device *dev,
}
static const struct device_attribute pid_attr = {
- .attr = { .name = "pid", .mode = S_IRUGO},
+ .attr = { .name = "pid", .mode = 0444},
.show = pid_show,
};
static void nbd_dev_remove(struct nbd_device *nbd)
{
struct gendisk *disk = nbd->disk;
+ struct request_queue *q;
+
if (disk) {
+ q = disk->queue;
del_gendisk(disk);
- blk_cleanup_queue(disk->queue);
+ blk_cleanup_queue(q);
blk_mq_free_tag_set(&nbd->tag_set);
disk->private_data = NULL;
put_disk(disk);
@@ -213,7 +216,15 @@ static void nbd_mark_nsock_dead(struct nbd_device *nbd, struct nbd_sock *nsock,
}
if (!nsock->dead) {
kernel_sock_shutdown(nsock->sock, SHUT_RDWR);
- atomic_dec(&nbd->config->live_connections);
+ if (atomic_dec_return(&nbd->config->live_connections) == 0) {
+ if (test_and_clear_bit(NBD_DISCONNECT_REQUESTED,
+ &nbd->config->runtime_flags)) {
+ set_bit(NBD_DISCONNECTED,
+ &nbd->config->runtime_flags);
+ dev_info(nbd_to_dev(nbd),
+ "Disconnected due to user request.\n");
+ }
+ }
}
nsock->dead = true;
nsock->pending = NULL;
@@ -231,9 +242,22 @@ static void nbd_size_clear(struct nbd_device *nbd)
static void nbd_size_update(struct nbd_device *nbd)
{
struct nbd_config *config = nbd->config;
+ struct block_device *bdev = bdget_disk(nbd->disk, 0);
+
+ if (config->flags & NBD_FLAG_SEND_TRIM) {
+ nbd->disk->queue->limits.discard_granularity = config->blksize;
+ blk_queue_max_discard_sectors(nbd->disk->queue, UINT_MAX);
+ }
blk_queue_logical_block_size(nbd->disk->queue, config->blksize);
blk_queue_physical_block_size(nbd->disk->queue, config->blksize);
set_capacity(nbd->disk, config->bytesize >> 9);
+ if (bdev) {
+ if (bdev->bd_disk)
+ bd_set_size(bdev, config->bytesize);
+ else
+ bdev->bd_invalidated = 1;
+ bdput(bdev);
+ }
kobject_uevent(&nbd_to_dev(nbd)->kobj, KOBJ_CHANGE);
}
@@ -243,6 +267,8 @@ static void nbd_size_set(struct nbd_device *nbd, loff_t blocksize,
struct nbd_config *config = nbd->config;
config->blksize = blocksize;
config->bytesize = blocksize * nr_blocks;
+ if (nbd->task_recv != NULL)
+ nbd_size_update(nbd);
}
static void nbd_complete_rq(struct request *req)
@@ -286,13 +312,15 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req,
if (!refcount_inc_not_zero(&nbd->config_refs)) {
cmd->status = BLK_STS_TIMEOUT;
- return BLK_EH_HANDLED;
+ goto done;
}
config = nbd->config;
if (config->num_connections > 1) {
dev_err_ratelimited(nbd_to_dev(nbd),
- "Connection timed out, retrying\n");
+ "Connection timed out, retrying (%d/%d alive)\n",
+ atomic_read(&config->live_connections),
+ config->num_connections);
/*
* Hooray we have more connections, requeue this IO, the submit
* path will put it on a real connection.
@@ -314,7 +342,7 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req,
}
blk_mq_requeue_request(req, true);
nbd_config_put(nbd);
- return BLK_EH_NOT_HANDLED;
+ return BLK_EH_DONE;
}
} else {
dev_err_ratelimited(nbd_to_dev(nbd),
@@ -324,8 +352,9 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req,
cmd->status = BLK_STS_IOERR;
sock_shutdown(nbd);
nbd_config_put(nbd);
-
- return BLK_EH_HANDLED;
+done:
+ blk_mq_complete_request(req);
+ return BLK_EH_DONE;
}
/*
@@ -647,11 +676,8 @@ static void recv_work(struct work_struct *work)
static void nbd_clear_req(struct request *req, void *data, bool reserved)
{
- struct nbd_cmd *cmd;
+ struct nbd_cmd *cmd = blk_mq_rq_to_pdu(req);
- if (!blk_mq_request_started(req))
- return;
- cmd = blk_mq_rq_to_pdu(req);
cmd->status = BLK_STS_IOERR;
blk_mq_complete_request(req);
}
@@ -714,10 +740,9 @@ static int wait_for_reconnect(struct nbd_device *nbd)
return 0;
if (test_bit(NBD_DISCONNECTED, &config->runtime_flags))
return 0;
- wait_event_timeout(config->conn_wait,
- atomic_read(&config->live_connections),
- config->dead_conn_timeout);
- return atomic_read(&config->live_connections);
+ return wait_event_timeout(config->conn_wait,
+ atomic_read(&config->live_connections) > 0,
+ config->dead_conn_timeout) > 0;
}
static int nbd_handle_cmd(struct nbd_cmd *cmd, int index)
@@ -950,10 +975,6 @@ static void nbd_bdev_reset(struct block_device *bdev)
if (bdev->bd_openers > 1)
return;
bd_set_size(bdev, 0);
- if (max_part > 0) {
- blkdev_reread_part(bdev);
- bdev->bd_invalidated = 1;
- }
}
static void nbd_parse_flags(struct nbd_device *nbd)
@@ -1040,6 +1061,8 @@ static void nbd_config_put(struct nbd_device *nbd)
nbd->config = NULL;
nbd->tag_set.timeout = 0;
+ nbd->disk->queue->limits.discard_granularity = 0;
+ blk_queue_max_discard_sectors(nbd->disk->queue, UINT_MAX);
blk_queue_flag_clear(QUEUE_FLAG_DISCARD, nbd->disk->queue);
mutex_unlock(&nbd->config_lock);
@@ -1109,7 +1132,6 @@ static int nbd_start_device_ioctl(struct nbd_device *nbd, struct block_device *b
if (ret)
return ret;
- bd_set_size(bdev, config->bytesize);
if (max_part)
bdev->bd_invalidated = 1;
mutex_unlock(&nbd->config_lock);
@@ -1118,7 +1140,7 @@ static int nbd_start_device_ioctl(struct nbd_device *nbd, struct block_device *b
if (ret)
sock_shutdown(nbd);
mutex_lock(&nbd->config_lock);
- bd_set_size(bdev, 0);
+ nbd_bdev_reset(bdev);
/* user requested, ignore socket errors */
if (test_bit(NBD_DISCONNECT_REQUESTED, &config->runtime_flags))
ret = 0;
@@ -1269,6 +1291,9 @@ static int nbd_open(struct block_device *bdev, fmode_t mode)
refcount_set(&nbd->config_refs, 1);
refcount_inc(&nbd->refs);
mutex_unlock(&nbd->config_lock);
+ bdev->bd_invalidated = 1;
+ } else if (nbd_disconnected(nbd->config)) {
+ bdev->bd_invalidated = 1;
}
out:
mutex_unlock(&nbd_index_mutex);
@@ -1490,8 +1515,8 @@ static int nbd_dev_add(int index)
*/
blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue);
blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, disk->queue);
- disk->queue->limits.discard_granularity = 512;
- blk_queue_max_discard_sectors(disk->queue, UINT_MAX);
+ disk->queue->limits.discard_granularity = 0;
+ blk_queue_max_discard_sectors(disk->queue, 0);
blk_queue_max_segment_size(disk->queue, UINT_MAX);
blk_queue_max_segments(disk->queue, USHRT_MAX);
blk_queue_max_hw_sectors(disk->queue, 65536);
@@ -1755,6 +1780,7 @@ static int nbd_genl_disconnect(struct sk_buff *skb, struct genl_info *info)
}
mutex_lock(&nbd->config_lock);
nbd_disconnect(nbd);
+ nbd_clear_sock(nbd);
mutex_unlock(&nbd->config_lock);
if (test_and_clear_bit(NBD_HAS_CONFIG_REF,
&nbd->config->runtime_flags))
@@ -2093,7 +2119,8 @@ static int __init nbd_init(void)
if (nbds_max > 1UL << (MINORBITS - part_shift))
return -EINVAL;
recv_workqueue = alloc_workqueue("knbd-recv",
- WQ_MEM_RECLAIM | WQ_HIGHPRI, 0);
+ WQ_MEM_RECLAIM | WQ_HIGHPRI |
+ WQ_UNBOUND, 0);
if (!recv_workqueue)
return -ENOMEM;
diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c
index a76553293a31..2bdadd7f1454 100644
--- a/drivers/block/null_blk.c
+++ b/drivers/block/null_blk.c
@@ -157,23 +157,23 @@ enum {
};
static int g_no_sched;
-module_param_named(no_sched, g_no_sched, int, S_IRUGO);
+module_param_named(no_sched, g_no_sched, int, 0444);
MODULE_PARM_DESC(no_sched, "No io scheduler");
static int g_submit_queues = 1;
-module_param_named(submit_queues, g_submit_queues, int, S_IRUGO);
+module_param_named(submit_queues, g_submit_queues, int, 0444);
MODULE_PARM_DESC(submit_queues, "Number of submission queues");
static int g_home_node = NUMA_NO_NODE;
-module_param_named(home_node, g_home_node, int, S_IRUGO);
+module_param_named(home_node, g_home_node, int, 0444);
MODULE_PARM_DESC(home_node, "Home node for the device");
#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION
static char g_timeout_str[80];
-module_param_string(timeout, g_timeout_str, sizeof(g_timeout_str), S_IRUGO);
+module_param_string(timeout, g_timeout_str, sizeof(g_timeout_str), 0444);
static char g_requeue_str[80];
-module_param_string(requeue, g_requeue_str, sizeof(g_requeue_str), S_IRUGO);
+module_param_string(requeue, g_requeue_str, sizeof(g_requeue_str), 0444);
#endif
static int g_queue_mode = NULL_Q_MQ;
@@ -203,27 +203,27 @@ static const struct kernel_param_ops null_queue_mode_param_ops = {
.get = param_get_int,
};
-device_param_cb(queue_mode, &null_queue_mode_param_ops, &g_queue_mode, S_IRUGO);
+device_param_cb(queue_mode, &null_queue_mode_param_ops, &g_queue_mode, 0444);
MODULE_PARM_DESC(queue_mode, "Block interface to use (0=bio,1=rq,2=multiqueue)");
static int g_gb = 250;
-module_param_named(gb, g_gb, int, S_IRUGO);
+module_param_named(gb, g_gb, int, 0444);
MODULE_PARM_DESC(gb, "Size in GB");
static int g_bs = 512;
-module_param_named(bs, g_bs, int, S_IRUGO);
+module_param_named(bs, g_bs, int, 0444);
MODULE_PARM_DESC(bs, "Block size (in bytes)");
static int nr_devices = 1;
-module_param(nr_devices, int, S_IRUGO);
+module_param(nr_devices, int, 0444);
MODULE_PARM_DESC(nr_devices, "Number of devices to register");
static bool g_blocking;
-module_param_named(blocking, g_blocking, bool, S_IRUGO);
+module_param_named(blocking, g_blocking, bool, 0444);
MODULE_PARM_DESC(blocking, "Register as a blocking blk-mq driver device");
static bool shared_tags;
-module_param(shared_tags, bool, S_IRUGO);
+module_param(shared_tags, bool, 0444);
MODULE_PARM_DESC(shared_tags, "Share tag set between devices for blk-mq");
static int g_irqmode = NULL_IRQ_SOFTIRQ;
@@ -239,19 +239,19 @@ static const struct kernel_param_ops null_irqmode_param_ops = {
.get = param_get_int,
};
-device_param_cb(irqmode, &null_irqmode_param_ops, &g_irqmode, S_IRUGO);
+device_param_cb(irqmode, &null_irqmode_param_ops, &g_irqmode, 0444);
MODULE_PARM_DESC(irqmode, "IRQ completion handler. 0-none, 1-softirq, 2-timer");
static unsigned long g_completion_nsec = 10000;
-module_param_named(completion_nsec, g_completion_nsec, ulong, S_IRUGO);
+module_param_named(completion_nsec, g_completion_nsec, ulong, 0444);
MODULE_PARM_DESC(completion_nsec, "Time in ns to complete a request in hardware. Default: 10,000ns");
static int g_hw_queue_depth = 64;
-module_param_named(hw_queue_depth, g_hw_queue_depth, int, S_IRUGO);
+module_param_named(hw_queue_depth, g_hw_queue_depth, int, 0444);
MODULE_PARM_DESC(hw_queue_depth, "Queue depth for each hardware queue. Default: 64");
static bool g_use_per_node_hctx;
-module_param_named(use_per_node_hctx, g_use_per_node_hctx, bool, S_IRUGO);
+module_param_named(use_per_node_hctx, g_use_per_node_hctx, bool, 0444);
MODULE_PARM_DESC(use_per_node_hctx, "Use per-node allocation for hardware context queues. Default: false");
static struct nullb_device *null_alloc_dev(void);
@@ -1365,7 +1365,8 @@ static blk_qc_t null_queue_bio(struct request_queue *q, struct bio *bio)
static enum blk_eh_timer_return null_rq_timed_out_fn(struct request *rq)
{
pr_info("null: rq %p timed out\n", rq);
- return BLK_EH_HANDLED;
+ blk_mq_complete_request(rq);
+ return BLK_EH_DONE;
}
static int null_rq_prep_fn(struct request_queue *q, struct request *req)
@@ -1427,7 +1428,8 @@ static void null_request_fn(struct request_queue *q)
static enum blk_eh_timer_return null_timeout_rq(struct request *rq, bool res)
{
pr_info("null: rq %p timed out\n", rq);
- return BLK_EH_HANDLED;
+ blk_mq_complete_request(rq);
+ return BLK_EH_DONE;
}
static blk_status_t null_queue_rq(struct blk_mq_hw_ctx *hctx,
diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c
index 27a44b97393a..8961b190e256 100644
--- a/drivers/block/paride/pd.c
+++ b/drivers/block/paride/pd.c
@@ -740,7 +740,7 @@ static int pd_special_command(struct pd_unit *disk,
{
struct request *rq;
- rq = blk_get_request(disk->gd->queue, REQ_OP_DRV_IN, __GFP_RECLAIM);
+ rq = blk_get_request(disk->gd->queue, REQ_OP_DRV_IN, 0);
if (IS_ERR(rq))
return PTR_ERR(rq);
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index c61d20c9f3f8..b3f83cd96f33 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -97,8 +97,8 @@ static int pktdev_major;
static int write_congestion_on = PKT_WRITE_CONGESTION_ON;
static int write_congestion_off = PKT_WRITE_CONGESTION_OFF;
static struct mutex ctl_mutex; /* Serialize open/close/setup/teardown */
-static mempool_t *psd_pool;
-static struct bio_set *pkt_bio_set;
+static mempool_t psd_pool;
+static struct bio_set pkt_bio_set;
static struct class *class_pktcdvd = NULL; /* /sys/class/pktcdvd */
static struct dentry *pkt_debugfs_root = NULL; /* /sys/kernel/debug/pktcdvd */
@@ -478,8 +478,8 @@ static void pkt_debugfs_dev_new(struct pktcdvd_device *pd)
if (!pd->dfs_d_root)
return;
- pd->dfs_f_info = debugfs_create_file("info", S_IRUGO,
- pd->dfs_d_root, pd, &debug_fops);
+ pd->dfs_f_info = debugfs_create_file("info", 0444,
+ pd->dfs_d_root, pd, &debug_fops);
}
static void pkt_debugfs_dev_remove(struct pktcdvd_device *pd)
@@ -631,7 +631,7 @@ static inline struct pkt_rb_node *pkt_rbtree_next(struct pkt_rb_node *node)
static void pkt_rbtree_erase(struct pktcdvd_device *pd, struct pkt_rb_node *node)
{
rb_erase(&node->rb_node, &pd->bio_queue);
- mempool_free(node, pd->rb_pool);
+ mempool_free(node, &pd->rb_pool);
pd->bio_queue_size--;
BUG_ON(pd->bio_queue_size < 0);
}
@@ -704,13 +704,13 @@ static int pkt_generic_packet(struct pktcdvd_device *pd, struct packet_command *
int ret = 0;
rq = blk_get_request(q, (cgc->data_direction == CGC_DATA_WRITE) ?
- REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN, __GFP_RECLAIM);
+ REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN, 0);
if (IS_ERR(rq))
return PTR_ERR(rq);
if (cgc->buflen) {
ret = blk_rq_map_kern(q, rq, cgc->buffer, cgc->buflen,
- __GFP_RECLAIM);
+ GFP_NOIO);
if (ret)
goto out;
}
@@ -1285,7 +1285,7 @@ static void pkt_start_write(struct pktcdvd_device *pd, struct packet_data *pkt)
* Fill-in bvec with data from orig_bios.
*/
spin_lock(&pkt->lock);
- bio_copy_data(pkt->w_bio, pkt->orig_bios.head);
+ bio_list_copy_data(pkt->w_bio, pkt->orig_bios.head);
pkt_set_state(pkt, PACKET_WRITE_WAIT_STATE);
spin_unlock(&pkt->lock);
@@ -2303,14 +2303,14 @@ static void pkt_end_io_read_cloned(struct bio *bio)
psd->bio->bi_status = bio->bi_status;
bio_put(bio);
bio_endio(psd->bio);
- mempool_free(psd, psd_pool);
+ mempool_free(psd, &psd_pool);
pkt_bio_finished(pd);
}
static void pkt_make_request_read(struct pktcdvd_device *pd, struct bio *bio)
{
- struct bio *cloned_bio = bio_clone_fast(bio, GFP_NOIO, pkt_bio_set);
- struct packet_stacked_data *psd = mempool_alloc(psd_pool, GFP_NOIO);
+ struct bio *cloned_bio = bio_clone_fast(bio, GFP_NOIO, &pkt_bio_set);
+ struct packet_stacked_data *psd = mempool_alloc(&psd_pool, GFP_NOIO);
psd->pd = pd;
psd->bio = bio;
@@ -2381,7 +2381,7 @@ static void pkt_make_request_write(struct request_queue *q, struct bio *bio)
/*
* No matching packet found. Store the bio in the work queue.
*/
- node = mempool_alloc(pd->rb_pool, GFP_NOIO);
+ node = mempool_alloc(&pd->rb_pool, GFP_NOIO);
node->bio = bio;
spin_lock(&pd->lock);
BUG_ON(pd->bio_queue_size < 0);
@@ -2451,7 +2451,7 @@ static blk_qc_t pkt_make_request(struct request_queue *q, struct bio *bio)
split = bio_split(bio, last_zone -
bio->bi_iter.bi_sector,
- GFP_NOIO, pkt_bio_set);
+ GFP_NOIO, &pkt_bio_set);
bio_chain(split, bio);
} else {
split = bio;
@@ -2538,18 +2538,6 @@ static int pkt_seq_show(struct seq_file *m, void *p)
return 0;
}
-static int pkt_seq_open(struct inode *inode, struct file *file)
-{
- return single_open(file, pkt_seq_show, PDE_DATA(inode));
-}
-
-static const struct file_operations pkt_proc_fops = {
- .open = pkt_seq_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release
-};
-
static int pkt_new_dev(struct pktcdvd_device *pd, dev_t dev)
{
int i;
@@ -2604,7 +2592,7 @@ static int pkt_new_dev(struct pktcdvd_device *pd, dev_t dev)
goto out_mem;
}
- proc_create_data(pd->name, 0, pkt_proc, &pkt_proc_fops, pd);
+ proc_create_single_data(pd->name, 0, pkt_proc, pkt_seq_show, pd);
pkt_dbg(1, pd, "writer mapped to %s\n", bdevname(bdev, b));
return 0;
@@ -2707,9 +2695,9 @@ static int pkt_setup_dev(dev_t dev, dev_t* pkt_dev)
if (!pd)
goto out_mutex;
- pd->rb_pool = mempool_create_kmalloc_pool(PKT_RB_POOL_SIZE,
- sizeof(struct pkt_rb_node));
- if (!pd->rb_pool)
+ ret = mempool_init_kmalloc_pool(&pd->rb_pool, PKT_RB_POOL_SIZE,
+ sizeof(struct pkt_rb_node));
+ if (ret)
goto out_mem;
INIT_LIST_HEAD(&pd->cdrw.pkt_free_list);
@@ -2766,7 +2754,7 @@ static int pkt_setup_dev(dev_t dev, dev_t* pkt_dev)
out_mem2:
put_disk(disk);
out_mem:
- mempool_destroy(pd->rb_pool);
+ mempool_exit(&pd->rb_pool);
kfree(pd);
out_mutex:
mutex_unlock(&ctl_mutex);
@@ -2817,7 +2805,7 @@ static int pkt_remove_dev(dev_t pkt_dev)
blk_cleanup_queue(pd->disk->queue);
put_disk(pd->disk);
- mempool_destroy(pd->rb_pool);
+ mempool_exit(&pd->rb_pool);
kfree(pd);
/* This is safe: open() is still holding a reference. */
@@ -2914,14 +2902,14 @@ static int __init pkt_init(void)
mutex_init(&ctl_mutex);
- psd_pool = mempool_create_kmalloc_pool(PSD_POOL_SIZE,
- sizeof(struct packet_stacked_data));
- if (!psd_pool)
- return -ENOMEM;
- pkt_bio_set = bioset_create(BIO_POOL_SIZE, 0, 0);
- if (!pkt_bio_set) {
- mempool_destroy(psd_pool);
- return -ENOMEM;
+ ret = mempool_init_kmalloc_pool(&psd_pool, PSD_POOL_SIZE,
+ sizeof(struct packet_stacked_data));
+ if (ret)
+ return ret;
+ ret = bioset_init(&pkt_bio_set, BIO_POOL_SIZE, 0, 0);
+ if (ret) {
+ mempool_exit(&psd_pool);
+ return ret;
}
ret = register_blkdev(pktdev_major, DRIVER_NAME);
@@ -2954,8 +2942,8 @@ out_misc:
out:
unregister_blkdev(pktdev_major, DRIVER_NAME);
out2:
- mempool_destroy(psd_pool);
- bioset_free(pkt_bio_set);
+ mempool_exit(&psd_pool);
+ bioset_exit(&pkt_bio_set);
return ret;
}
@@ -2968,8 +2956,8 @@ static void __exit pkt_exit(void)
pkt_sysfs_cleanup();
unregister_blkdev(pktdev_major, DRIVER_NAME);
- mempool_destroy(psd_pool);
- bioset_free(pkt_bio_set);
+ mempool_exit(&psd_pool);
+ bioset_exit(&pkt_bio_set);
}
MODULE_DESCRIPTION("Packet writing layer for CD/DVD drives");
diff --git a/drivers/block/ps3disk.c b/drivers/block/ps3disk.c
index 075662f2cf46..afe1508d82c6 100644
--- a/drivers/block/ps3disk.c
+++ b/drivers/block/ps3disk.c
@@ -465,8 +465,6 @@ static int ps3disk_probe(struct ps3_system_bus_device *_dev)
priv->queue = queue;
queue->queuedata = dev;
- blk_queue_bounce_limit(queue, BLK_BOUNCE_HIGH);
-
blk_queue_max_hw_sectors(queue, dev->bounce_size >> 9);
blk_queue_segment_boundary(queue, -1UL);
blk_queue_dma_alignment(queue, dev->blk_size-1);
diff --git a/drivers/block/ps3vram.c b/drivers/block/ps3vram.c
index 6a55959cbf78..8fa4533a1249 100644
--- a/drivers/block/ps3vram.c
+++ b/drivers/block/ps3vram.c
@@ -521,26 +521,13 @@ static int ps3vram_proc_show(struct seq_file *m, void *v)
return 0;
}
-static int ps3vram_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, ps3vram_proc_show, PDE_DATA(inode));
-}
-
-static const struct file_operations ps3vram_proc_fops = {
- .owner = THIS_MODULE,
- .open = ps3vram_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
static void ps3vram_proc_init(struct ps3_system_bus_device *dev)
{
struct ps3vram_priv *priv = ps3_system_bus_get_drvdata(dev);
struct proc_dir_entry *pde;
- pde = proc_create_data(DEVICE_NAME, 0444, NULL, &ps3vram_proc_fops,
- priv);
+ pde = proc_create_single_data(DEVICE_NAME, 0444, NULL,
+ ps3vram_proc_show, priv);
if (!pde)
dev_warn(&dev->core, "failed to create /proc entry\n");
}
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 8e8b04cc569a..af354047ac4b 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -424,7 +424,7 @@ static struct workqueue_struct *rbd_wq;
* single-major requires >= 0.75 version of userspace rbd utility.
*/
static bool single_major = true;
-module_param(single_major, bool, S_IRUGO);
+module_param(single_major, bool, 0444);
MODULE_PARM_DESC(single_major, "Use a single major number for all rbd devices (default: true)");
static ssize_t rbd_add(struct bus_type *bus, const char *buf,
@@ -468,11 +468,11 @@ static ssize_t rbd_supported_features_show(struct bus_type *bus, char *buf)
return sprintf(buf, "0x%llx\n", RBD_FEATURES_SUPPORTED);
}
-static BUS_ATTR(add, S_IWUSR, NULL, rbd_add);
-static BUS_ATTR(remove, S_IWUSR, NULL, rbd_remove);
-static BUS_ATTR(add_single_major, S_IWUSR, NULL, rbd_add_single_major);
-static BUS_ATTR(remove_single_major, S_IWUSR, NULL, rbd_remove_single_major);
-static BUS_ATTR(supported_features, S_IRUGO, rbd_supported_features_show, NULL);
+static BUS_ATTR(add, 0200, NULL, rbd_add);
+static BUS_ATTR(remove, 0200, NULL, rbd_remove);
+static BUS_ATTR(add_single_major, 0200, NULL, rbd_add_single_major);
+static BUS_ATTR(remove_single_major, 0200, NULL, rbd_remove_single_major);
+static BUS_ATTR(supported_features, 0444, rbd_supported_features_show, NULL);
static struct attribute *rbd_bus_attrs[] = {
&bus_attr_add.attr,
@@ -2366,7 +2366,9 @@ static int rbd_obj_issue_copyup(struct rbd_obj_request *obj_req, u32 bytes)
osd_req_op_cls_init(obj_req->osd_req, 0, CEPH_OSD_OP_CALL, "rbd",
"copyup");
osd_req_op_cls_request_data_bvecs(obj_req->osd_req, 0,
- obj_req->copyup_bvecs, bytes);
+ obj_req->copyup_bvecs,
+ obj_req->copyup_bvec_count,
+ bytes);
switch (obj_req->img_request->op_type) {
case OBJ_OP_WRITE:
@@ -4202,22 +4204,22 @@ static ssize_t rbd_image_refresh(struct device *dev,
return size;
}
-static DEVICE_ATTR(size, S_IRUGO, rbd_size_show, NULL);
-static DEVICE_ATTR(features, S_IRUGO, rbd_features_show, NULL);
-static DEVICE_ATTR(major, S_IRUGO, rbd_major_show, NULL);
-static DEVICE_ATTR(minor, S_IRUGO, rbd_minor_show, NULL);
-static DEVICE_ATTR(client_addr, S_IRUGO, rbd_client_addr_show, NULL);
-static DEVICE_ATTR(client_id, S_IRUGO, rbd_client_id_show, NULL);
-static DEVICE_ATTR(cluster_fsid, S_IRUGO, rbd_cluster_fsid_show, NULL);
-static DEVICE_ATTR(config_info, S_IRUSR, rbd_config_info_show, NULL);
-static DEVICE_ATTR(pool, S_IRUGO, rbd_pool_show, NULL);
-static DEVICE_ATTR(pool_id, S_IRUGO, rbd_pool_id_show, NULL);
-static DEVICE_ATTR(name, S_IRUGO, rbd_name_show, NULL);
-static DEVICE_ATTR(image_id, S_IRUGO, rbd_image_id_show, NULL);
-static DEVICE_ATTR(refresh, S_IWUSR, NULL, rbd_image_refresh);
-static DEVICE_ATTR(current_snap, S_IRUGO, rbd_snap_show, NULL);
-static DEVICE_ATTR(snap_id, S_IRUGO, rbd_snap_id_show, NULL);
-static DEVICE_ATTR(parent, S_IRUGO, rbd_parent_show, NULL);
+static DEVICE_ATTR(size, 0444, rbd_size_show, NULL);
+static DEVICE_ATTR(features, 0444, rbd_features_show, NULL);
+static DEVICE_ATTR(major, 0444, rbd_major_show, NULL);
+static DEVICE_ATTR(minor, 0444, rbd_minor_show, NULL);
+static DEVICE_ATTR(client_addr, 0444, rbd_client_addr_show, NULL);
+static DEVICE_ATTR(client_id, 0444, rbd_client_id_show, NULL);
+static DEVICE_ATTR(cluster_fsid, 0444, rbd_cluster_fsid_show, NULL);
+static DEVICE_ATTR(config_info, 0400, rbd_config_info_show, NULL);
+static DEVICE_ATTR(pool, 0444, rbd_pool_show, NULL);
+static DEVICE_ATTR(pool_id, 0444, rbd_pool_id_show, NULL);
+static DEVICE_ATTR(name, 0444, rbd_name_show, NULL);
+static DEVICE_ATTR(image_id, 0444, rbd_image_id_show, NULL);
+static DEVICE_ATTR(refresh, 0200, NULL, rbd_image_refresh);
+static DEVICE_ATTR(current_snap, 0444, rbd_snap_show, NULL);
+static DEVICE_ATTR(snap_id, 0444, rbd_snap_id_show, NULL);
+static DEVICE_ATTR(parent, 0444, rbd_parent_show, NULL);
static struct attribute *rbd_attrs[] = {
&dev_attr_size.attr,
diff --git a/drivers/block/rsxx/core.c b/drivers/block/rsxx/core.c
index 34997df132e2..09537bee387f 100644
--- a/drivers/block/rsxx/core.c
+++ b/drivers/block/rsxx/core.c
@@ -247,19 +247,19 @@ static void rsxx_debugfs_dev_new(struct rsxx_cardinfo *card)
if (IS_ERR_OR_NULL(card->debugfs_dir))
goto failed_debugfs_dir;
- debugfs_stats = debugfs_create_file("stats", S_IRUGO,
+ debugfs_stats = debugfs_create_file("stats", 0444,
card->debugfs_dir, card,
&debugfs_stats_fops);
if (IS_ERR_OR_NULL(debugfs_stats))
goto failed_debugfs_stats;
- debugfs_pci_regs = debugfs_create_file("pci_regs", S_IRUGO,
+ debugfs_pci_regs = debugfs_create_file("pci_regs", 0444,
card->debugfs_dir, card,
&debugfs_pci_regs_fops);
if (IS_ERR_OR_NULL(debugfs_pci_regs))
goto failed_debugfs_pci_regs;
- debugfs_cram = debugfs_create_file("cram", S_IRUGO | S_IWUSR,
+ debugfs_cram = debugfs_create_file("cram", 0644,
card->debugfs_dir, card,
&debugfs_cram_fops);
if (IS_ERR_OR_NULL(debugfs_cram))
diff --git a/drivers/block/sx8.c b/drivers/block/sx8.c
index 08586dc14e85..4d90e5eba2f5 100644
--- a/drivers/block/sx8.c
+++ b/drivers/block/sx8.c
@@ -567,7 +567,7 @@ static struct carm_request *carm_get_special(struct carm_host *host)
if (!crq)
return NULL;
- rq = blk_get_request(host->oob_q, REQ_OP_DRV_OUT, GFP_KERNEL);
+ rq = blk_get_request(host->oob_q, REQ_OP_DRV_OUT, 0);
if (IS_ERR(rq)) {
spin_lock_irqsave(&host->lock, flags);
carm_put_request(host, crq);
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 4a07593c2efd..23752dc99b00 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -298,7 +298,7 @@ static int virtblk_get_id(struct gendisk *disk, char *id_str)
struct request *req;
int err;
- req = blk_get_request(q, REQ_OP_DRV_IN, GFP_KERNEL);
+ req = blk_get_request(q, REQ_OP_DRV_IN, 0);
if (IS_ERR(req))
return PTR_ERR(req);
@@ -371,7 +371,7 @@ static ssize_t virtblk_serial_show(struct device *dev,
return err;
}
-static DEVICE_ATTR(serial, S_IRUGO, virtblk_serial_show, NULL);
+static DEVICE_ATTR(serial, 0444, virtblk_serial_show, NULL);
/* The queue's logical block size must be set before calling this */
static void virtblk_update_capacity(struct virtio_blk *vblk, bool resize)
@@ -576,10 +576,10 @@ virtblk_cache_type_show(struct device *dev, struct device_attribute *attr,
}
static const struct device_attribute dev_attr_cache_type_ro =
- __ATTR(cache_type, S_IRUGO,
+ __ATTR(cache_type, 0444,
virtblk_cache_type_show, NULL);
static const struct device_attribute dev_attr_cache_type_rw =
- __ATTR(cache_type, S_IRUGO|S_IWUSR,
+ __ATTR(cache_type, 0644,
virtblk_cache_type_show, virtblk_cache_type_store);
static int virtblk_init_request(struct blk_mq_tag_set *set, struct request *rq,
diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
index 987d665e82de..b55b245e8052 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -98,7 +98,7 @@ MODULE_PARM_DESC(max_queues,
* backend, 4KB page granularity is used.
*/
unsigned int xen_blkif_max_ring_order = XENBUS_MAX_RING_GRANT_ORDER;
-module_param_named(max_ring_page_order, xen_blkif_max_ring_order, int, S_IRUGO);
+module_param_named(max_ring_page_order, xen_blkif_max_ring_order, int, 0444);
MODULE_PARM_DESC(max_ring_page_order, "Maximum order of pages to be used for the shared ring");
/*
* The LRU mechanism to clean the lists of persistent grants needs to
diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c
index 21c1be1eb226..66412eededda 100644
--- a/drivers/block/xen-blkback/xenbus.c
+++ b/drivers/block/xen-blkback/xenbus.c
@@ -367,7 +367,7 @@ int __init xen_blkif_interface_init(void)
out: \
return sprintf(buf, format, result); \
} \
- static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL)
+ static DEVICE_ATTR(name, 0444, show_##name, NULL)
VBD_SHOW_ALLRING(oo_req, "%llu\n");
VBD_SHOW_ALLRING(rd_req, "%llu\n");
@@ -403,7 +403,7 @@ static const struct attribute_group xen_vbdstat_group = {
\
return sprintf(buf, format, ##args); \
} \
- static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL)
+ static DEVICE_ATTR(name, 0444, show_##name, NULL)
VBD_SHOW(physical_device, "%x:%x\n", be->major, be->minor);
VBD_SHOW(mode, "%s\n", be->mode);
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 2a8e7813bd1a..ae00a82f350b 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -129,13 +129,12 @@ static const struct block_device_operations xlvbd_block_fops;
*/
static unsigned int xen_blkif_max_segments = 32;
-module_param_named(max_indirect_segments, xen_blkif_max_segments, uint,
- S_IRUGO);
+module_param_named(max_indirect_segments, xen_blkif_max_segments, uint, 0444);
MODULE_PARM_DESC(max_indirect_segments,
"Maximum amount of segments in indirect requests (default is 32)");
static unsigned int xen_blkif_max_queues = 4;
-module_param_named(max_queues, xen_blkif_max_queues, uint, S_IRUGO);
+module_param_named(max_queues, xen_blkif_max_queues, uint, 0444);
MODULE_PARM_DESC(max_queues, "Maximum number of hardware queues/rings used per virtual disk");
/*
@@ -143,7 +142,7 @@ MODULE_PARM_DESC(max_queues, "Maximum number of hardware queues/rings used per v
* backend, 4KB page granularity is used.
*/
static unsigned int xen_blkif_max_ring_order;
-module_param_named(max_ring_page_order, xen_blkif_max_ring_order, int, S_IRUGO);
+module_param_named(max_ring_page_order, xen_blkif_max_ring_order, int, 0444);
MODULE_PARM_DESC(max_ring_page_order, "Maximum order of pages to be used for the shared ring");
#define BLK_RING_SIZE(info) \
diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c
index c8c8b0b8d333..b937cc1e2c07 100644
--- a/drivers/bluetooth/btusb.c
+++ b/drivers/bluetooth/btusb.c
@@ -231,6 +231,7 @@ static const struct usb_device_id blacklist_table[] = {
{ USB_DEVICE(0x0930, 0x0227), .driver_info = BTUSB_ATH3012 },
{ USB_DEVICE(0x0b05, 0x17d0), .driver_info = BTUSB_ATH3012 },
{ USB_DEVICE(0x0cf3, 0x0036), .driver_info = BTUSB_ATH3012 },
+ { USB_DEVICE(0x0cf3, 0x3004), .driver_info = BTUSB_ATH3012 },
{ USB_DEVICE(0x0cf3, 0x3008), .driver_info = BTUSB_ATH3012 },
{ USB_DEVICE(0x0cf3, 0x311d), .driver_info = BTUSB_ATH3012 },
{ USB_DEVICE(0x0cf3, 0x311e), .driver_info = BTUSB_ATH3012 },
@@ -263,7 +264,6 @@ static const struct usb_device_id blacklist_table[] = {
{ USB_DEVICE(0x0489, 0xe03c), .driver_info = BTUSB_ATH3012 },
/* QCA ROME chipset */
- { USB_DEVICE(0x0cf3, 0x3004), .driver_info = BTUSB_QCA_ROME },
{ USB_DEVICE(0x0cf3, 0xe007), .driver_info = BTUSB_QCA_ROME },
{ USB_DEVICE(0x0cf3, 0xe009), .driver_info = BTUSB_QCA_ROME },
{ USB_DEVICE(0x0cf3, 0xe010), .driver_info = BTUSB_QCA_ROME },
@@ -399,6 +399,13 @@ static const struct dmi_system_id btusb_needs_reset_resume_table[] = {
DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 3060"),
},
},
+ {
+ /* Dell XPS 9360 (QCA ROME device 0cf3:e300) */
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "XPS 13 9360"),
+ },
+ },
{}
};
@@ -2852,6 +2859,12 @@ static int btusb_config_oob_wake(struct hci_dev *hdev)
}
#endif
+static void btusb_check_needs_reset_resume(struct usb_interface *intf)
+{
+ if (dmi_check_system(btusb_needs_reset_resume_table))
+ interface_to_usbdev(intf)->quirks |= USB_QUIRK_RESET_RESUME;
+}
+
static int btusb_probe(struct usb_interface *intf,
const struct usb_device_id *id)
{
@@ -2974,9 +2987,6 @@ static int btusb_probe(struct usb_interface *intf,
hdev->send = btusb_send_frame;
hdev->notify = btusb_notify;
- if (dmi_check_system(btusb_needs_reset_resume_table))
- interface_to_usbdev(intf)->quirks |= USB_QUIRK_RESET_RESUME;
-
#ifdef CONFIG_PM
err = btusb_config_oob_wake(hdev);
if (err)
@@ -3064,6 +3074,7 @@ static int btusb_probe(struct usb_interface *intf,
data->setup_on_usb = btusb_setup_qca;
hdev->set_bdaddr = btusb_set_bdaddr_ath3012;
set_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, &hdev->quirks);
+ btusb_check_needs_reset_resume(intf);
}
#ifdef CONFIG_BT_HCIBTUSB_RTL
diff --git a/drivers/bus/fsl-mc/fsl-mc-msi.c b/drivers/bus/fsl-mc/fsl-mc-msi.c
index ec35e255b496..8b9c66d7c4ff 100644
--- a/drivers/bus/fsl-mc/fsl-mc-msi.c
+++ b/drivers/bus/fsl-mc/fsl-mc-msi.c
@@ -163,6 +163,8 @@ struct irq_domain *fsl_mc_msi_create_irq_domain(struct fwnode_handle *fwnode,
{
struct irq_domain *domain;
+ if (WARN_ON((info->flags & MSI_FLAG_LEVEL_CAPABLE)))
+ info->flags &= ~MSI_FLAG_LEVEL_CAPABLE;
if (info->flags & MSI_FLAG_USE_DEF_DOM_OPS)
fsl_mc_msi_update_dom_ops(info);
if (info->flags & MSI_FLAG_USE_DEF_CHIP_OPS)
diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c
index bfc566d3f31a..9adc8c3eb0fa 100644
--- a/drivers/cdrom/cdrom.c
+++ b/drivers/cdrom/cdrom.c
@@ -2192,7 +2192,7 @@ static int cdrom_read_cdda_bpc(struct cdrom_device_info *cdi, __u8 __user *ubuf,
len = nr * CD_FRAMESIZE_RAW;
- rq = blk_get_request(q, REQ_OP_SCSI_IN, GFP_KERNEL);
+ rq = blk_get_request(q, REQ_OP_SCSI_IN, 0);
if (IS_ERR(rq)) {
ret = PTR_ERR(rq);
break;
diff --git a/drivers/char/agp/uninorth-agp.c b/drivers/char/agp/uninorth-agp.c
index c381c8e396fc..79d8c84693a1 100644
--- a/drivers/char/agp/uninorth-agp.c
+++ b/drivers/char/agp/uninorth-agp.c
@@ -195,7 +195,7 @@ static int uninorth_insert_memory(struct agp_memory *mem, off_t pg_start, int ty
return 0;
}
-int uninorth_remove_memory(struct agp_memory *mem, off_t pg_start, int type)
+static int uninorth_remove_memory(struct agp_memory *mem, off_t pg_start, int type)
{
size_t i;
u32 *gp;
@@ -470,7 +470,7 @@ static int uninorth_free_gatt_table(struct agp_bridge_data *bridge)
return 0;
}
-void null_cache_flush(void)
+static void null_cache_flush(void)
{
mb();
}
diff --git a/drivers/char/apm-emulation.c b/drivers/char/apm-emulation.c
index a5e2f9e557ea..53436c03dbce 100644
--- a/drivers/char/apm-emulation.c
+++ b/drivers/char/apm-emulation.c
@@ -461,19 +461,6 @@ static int proc_apm_show(struct seq_file *m, void *v)
return 0;
}
-
-static int proc_apm_open(struct inode *inode, struct file *file)
-{
- return single_open(file, proc_apm_show, NULL);
-}
-
-static const struct file_operations apm_proc_fops = {
- .owner = THIS_MODULE,
- .open = proc_apm_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
#endif
static int kapmd(void *arg)
@@ -657,7 +644,7 @@ static int __init apm_init(void)
wake_up_process(kapmd_tsk);
#ifdef CONFIG_PROC_FS
- proc_create("apm", 0, NULL, &apm_proc_fops);
+ proc_create_single("apm", 0, NULL, proc_apm_show);
#endif
ret = misc_register(&apm_device);
diff --git a/drivers/char/ds1620.c b/drivers/char/ds1620.c
index eb53cbadb68f..a5ecf6dae02e 100644
--- a/drivers/char/ds1620.c
+++ b/drivers/char/ds1620.c
@@ -345,18 +345,6 @@ static int ds1620_proc_therm_show(struct seq_file *m, void *v)
fan_state[netwinder_get_fan()]);
return 0;
}
-
-static int ds1620_proc_therm_open(struct inode *inode, struct file *file)
-{
- return single_open(file, ds1620_proc_therm_show, NULL);
-}
-
-static const struct file_operations ds1620_proc_therm_fops = {
- .open = ds1620_proc_therm_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
#endif
static const struct file_operations ds1620_fops = {
@@ -404,7 +392,7 @@ static int __init ds1620_init(void)
return ret;
#ifdef THERM_USE_PROC
- if (!proc_create("therm", 0, NULL, &ds1620_proc_therm_fops))
+ if (!proc_create_single("therm", 0, NULL, ds1620_proc_therm_show))
printk(KERN_ERR "therm: unable to register /proc/therm\n");
#endif
diff --git a/drivers/char/efirtc.c b/drivers/char/efirtc.c
index dc62568b7dde..d9aab643997e 100644
--- a/drivers/char/efirtc.c
+++ b/drivers/char/efirtc.c
@@ -358,19 +358,6 @@ static int efi_rtc_proc_show(struct seq_file *m, void *v)
return 0;
}
-
-static int efi_rtc_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, efi_rtc_proc_show, NULL);
-}
-
-static const struct file_operations efi_rtc_proc_fops = {
- .open = efi_rtc_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
static int __init
efi_rtc_init(void)
{
@@ -386,7 +373,7 @@ efi_rtc_init(void)
return ret;
}
- dir = proc_create("driver/efirtc", 0, NULL, &efi_rtc_proc_fops);
+ dir = proc_create_single("driver/efirtc", 0, NULL, efi_rtc_proc_show);
if (dir == NULL) {
printk(KERN_ERR "efirtc: can't create /proc/driver/efirtc.\n");
misc_deregister(&efi_rtc_dev);
diff --git a/drivers/char/misc.c b/drivers/char/misc.c
index 1bb9e7cc82e3..53cfe574d8d4 100644
--- a/drivers/char/misc.c
+++ b/drivers/char/misc.c
@@ -95,19 +95,6 @@ static const struct seq_operations misc_seq_ops = {
.stop = misc_seq_stop,
.show = misc_seq_show,
};
-
-static int misc_seq_open(struct inode *inode, struct file *file)
-{
- return seq_open(file, &misc_seq_ops);
-}
-
-static const struct file_operations misc_proc_fops = {
- .owner = THIS_MODULE,
- .open = misc_seq_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
#endif
static int misc_open(struct inode *inode, struct file *file)
@@ -282,7 +269,7 @@ static int __init misc_init(void)
int err;
struct proc_dir_entry *ret;
- ret = proc_create("misc", 0, NULL, &misc_proc_fops);
+ ret = proc_create_seq("misc", 0, NULL, &misc_seq_ops);
misc_class = class_create(THIS_MODULE, "misc");
err = PTR_ERR(misc_class);
if (IS_ERR(misc_class))
diff --git a/drivers/char/nvram.c b/drivers/char/nvram.c
index 678fa97e41fb..25264d65e716 100644
--- a/drivers/char/nvram.c
+++ b/drivers/char/nvram.c
@@ -389,22 +389,9 @@ static int nvram_proc_read(struct seq_file *seq, void *offset)
return 0;
}
-static int nvram_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, nvram_proc_read, NULL);
-}
-
-static const struct file_operations nvram_proc_fops = {
- .owner = THIS_MODULE,
- .open = nvram_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
static int nvram_add_proc_fs(void)
{
- if (!proc_create("driver/nvram", 0, NULL, &nvram_proc_fops))
+ if (!proc_create_single("driver/nvram", 0, NULL, nvram_proc_read))
return -ENOMEM;
return 0;
}
diff --git a/drivers/char/pcmcia/synclink_cs.c b/drivers/char/pcmcia/synclink_cs.c
index aa502e9fb7fa..66b04194aa9f 100644
--- a/drivers/char/pcmcia/synclink_cs.c
+++ b/drivers/char/pcmcia/synclink_cs.c
@@ -2616,19 +2616,6 @@ static int mgslpc_proc_show(struct seq_file *m, void *v)
return 0;
}
-static int mgslpc_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, mgslpc_proc_show, NULL);
-}
-
-static const struct file_operations mgslpc_proc_fops = {
- .owner = THIS_MODULE,
- .open = mgslpc_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
static int rx_alloc_buffers(MGSLPC_INFO *info)
{
/* each buffer has header and data */
@@ -2815,7 +2802,7 @@ static const struct tty_operations mgslpc_ops = {
.tiocmget = tiocmget,
.tiocmset = tiocmset,
.get_icount = mgslpc_get_icount,
- .proc_fops = &mgslpc_proc_fops,
+ .proc_show = mgslpc_proc_show,
};
static int __init synclink_cs_init(void)
diff --git a/drivers/char/random.c b/drivers/char/random.c
index cd888d4ee605..a8fb0020ba5c 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -402,8 +402,7 @@ static struct poolinfo {
/*
* Static global variables
*/
-static DECLARE_WAIT_QUEUE_HEAD(random_read_wait);
-static DECLARE_WAIT_QUEUE_HEAD(random_write_wait);
+static DECLARE_WAIT_QUEUE_HEAD(random_wait);
static struct fasync_struct *fasync;
static DEFINE_SPINLOCK(random_ready_list_lock);
@@ -722,8 +721,8 @@ retry:
/* should we wake readers? */
if (entropy_bits >= random_read_wakeup_bits &&
- wq_has_sleeper(&random_read_wait)) {
- wake_up_interruptible(&random_read_wait);
+ wq_has_sleeper(&random_wait)) {
+ wake_up_interruptible_poll(&random_wait, POLLIN);
kill_fasync(&fasync, SIGIO, POLL_IN);
}
/* If the input pool is getting full, send some
@@ -1397,7 +1396,7 @@ retry:
trace_debit_entropy(r->name, 8 * ibytes);
if (ibytes &&
(r->entropy_count >> ENTROPY_SHIFT) < random_write_wakeup_bits) {
- wake_up_interruptible(&random_write_wait);
+ wake_up_interruptible_poll(&random_wait, POLLOUT);
kill_fasync(&fasync, SIGIO, POLL_OUT);
}
@@ -1839,7 +1838,7 @@ _random_read(int nonblock, char __user *buf, size_t nbytes)
if (nonblock)
return -EAGAIN;
- wait_event_interruptible(random_read_wait,
+ wait_event_interruptible(random_wait,
ENTROPY_BITS(&input_pool) >=
random_read_wakeup_bits);
if (signal_pending(current))
@@ -1876,14 +1875,17 @@ urandom_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos)
return ret;
}
+static struct wait_queue_head *
+random_get_poll_head(struct file *file, __poll_t events)
+{
+ return &random_wait;
+}
+
static __poll_t
-random_poll(struct file *file, poll_table * wait)
+random_poll_mask(struct file *file, __poll_t events)
{
- __poll_t mask;
+ __poll_t mask = 0;
- poll_wait(file, &random_read_wait, wait);
- poll_wait(file, &random_write_wait, wait);
- mask = 0;
if (ENTROPY_BITS(&input_pool) >= random_read_wakeup_bits)
mask |= EPOLLIN | EPOLLRDNORM;
if (ENTROPY_BITS(&input_pool) < random_write_wakeup_bits)
@@ -1990,7 +1992,8 @@ static int random_fasync(int fd, struct file *filp, int on)
const struct file_operations random_fops = {
.read = random_read,
.write = random_write,
- .poll = random_poll,
+ .get_poll_head = random_get_poll_head,
+ .poll_mask = random_poll_mask,
.unlocked_ioctl = random_ioctl,
.fasync = random_fasync,
.llseek = noop_llseek,
@@ -2323,7 +2326,7 @@ void add_hwgenerator_randomness(const char *buffer, size_t count,
* We'll be woken up again once below random_write_wakeup_thresh,
* or when the calling thread is about to terminate.
*/
- wait_event_interruptible(random_write_wait, kthread_should_stop() ||
+ wait_event_interruptible(random_wait, kthread_should_stop() ||
ENTROPY_BITS(&input_pool) <= random_write_wakeup_bits);
mix_pool_bytes(poolp, buffer, count);
credit_entropy_bits(poolp, entropy);
diff --git a/drivers/char/rtc.c b/drivers/char/rtc.c
index 57dc546628b5..94fedeeec035 100644
--- a/drivers/char/rtc.c
+++ b/drivers/char/rtc.c
@@ -171,7 +171,7 @@ static void mask_rtc_irq_bit(unsigned char bit)
#endif
#ifdef CONFIG_PROC_FS
-static int rtc_proc_open(struct inode *inode, struct file *file);
+static int rtc_proc_show(struct seq_file *seq, void *v);
#endif
/*
@@ -832,16 +832,6 @@ static struct miscdevice rtc_dev = {
.fops = &rtc_fops,
};
-#ifdef CONFIG_PROC_FS
-static const struct file_operations rtc_proc_fops = {
- .owner = THIS_MODULE,
- .open = rtc_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-#endif
-
static resource_size_t rtc_size;
static struct resource * __init rtc_request_region(resource_size_t size)
@@ -982,7 +972,7 @@ no_irq:
}
#ifdef CONFIG_PROC_FS
- ent = proc_create("driver/rtc", 0, NULL, &rtc_proc_fops);
+ ent = proc_create_single("driver/rtc", 0, NULL, rtc_proc_show);
if (!ent)
printk(KERN_WARNING "rtc: Failed to register with procfs.\n");
#endif
@@ -1201,11 +1191,6 @@ static int rtc_proc_show(struct seq_file *seq, void *v)
#undef YN
#undef NY
}
-
-static int rtc_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, rtc_proc_show, NULL);
-}
#endif
static void rtc_get_rtc_time(struct rtc_time *rtc_tm)
diff --git a/drivers/char/toshiba.c b/drivers/char/toshiba.c
index 5488516da8ea..802376fe851a 100644
--- a/drivers/char/toshiba.c
+++ b/drivers/char/toshiba.c
@@ -326,19 +326,6 @@ static int proc_toshiba_show(struct seq_file *m, void *v)
key);
return 0;
}
-
-static int proc_toshiba_open(struct inode *inode, struct file *file)
-{
- return single_open(file, proc_toshiba_show, NULL);
-}
-
-static const struct file_operations proc_toshiba_fops = {
- .owner = THIS_MODULE,
- .open = proc_toshiba_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
#endif
@@ -524,7 +511,7 @@ static int __init toshiba_init(void)
{
struct proc_dir_entry *pde;
- pde = proc_create("toshiba", 0, NULL, &proc_toshiba_fops);
+ pde = proc_create_single("toshiba", 0, NULL, proc_toshiba_show);
if (!pde) {
misc_deregister(&tosh_device);
return -ENOMEM;
diff --git a/drivers/clk/Kconfig b/drivers/clk/Kconfig
index 41492e980ef4..34968a381d0f 100644
--- a/drivers/clk/Kconfig
+++ b/drivers/clk/Kconfig
@@ -266,15 +266,13 @@ config COMMON_CLK_STM32MP157
Support for stm32mp157 SoC family clocks
config COMMON_CLK_STM32F
- bool "Clock driver for stm32f4 and stm32f7 SoC families"
- depends on MACH_STM32F429 || MACH_STM32F469 || MACH_STM32F746
+ def_bool COMMON_CLK && (MACH_STM32F429 || MACH_STM32F469 || MACH_STM32F746)
help
---help---
Support for stm32f4 and stm32f7 SoC families clocks
config COMMON_CLK_STM32H7
- bool "Clock driver for stm32h7 SoC family"
- depends on MACH_STM32H743
+ def_bool COMMON_CLK && MACH_STM32H743
help
---help---
Support for stm32h7 SoC family clocks
diff --git a/drivers/clk/clk-cs2000-cp.c b/drivers/clk/clk-cs2000-cp.c
index c58019750b7e..a2f8c42e527a 100644
--- a/drivers/clk/clk-cs2000-cp.c
+++ b/drivers/clk/clk-cs2000-cp.c
@@ -541,7 +541,7 @@ probe_err:
return ret;
}
-static int cs2000_resume(struct device *dev)
+static int __maybe_unused cs2000_resume(struct device *dev)
{
struct cs2000_priv *priv = dev_get_drvdata(dev);
diff --git a/drivers/clk/clk-mux.c b/drivers/clk/clk-mux.c
index ac4a042f8658..1628b93655ed 100644
--- a/drivers/clk/clk-mux.c
+++ b/drivers/clk/clk-mux.c
@@ -112,10 +112,18 @@ static int clk_mux_set_parent(struct clk_hw *hw, u8 index)
return 0;
}
+static int clk_mux_determine_rate(struct clk_hw *hw,
+ struct clk_rate_request *req)
+{
+ struct clk_mux *mux = to_clk_mux(hw);
+
+ return clk_mux_determine_rate_flags(hw, req, mux->flags);
+}
+
const struct clk_ops clk_mux_ops = {
.get_parent = clk_mux_get_parent,
.set_parent = clk_mux_set_parent,
- .determine_rate = __clk_mux_determine_rate,
+ .determine_rate = clk_mux_determine_rate,
};
EXPORT_SYMBOL_GPL(clk_mux_ops);
diff --git a/drivers/clk/clk-stm32mp1.c b/drivers/clk/clk-stm32mp1.c
index f1d5967b4b39..edd3cf451401 100644
--- a/drivers/clk/clk-stm32mp1.c
+++ b/drivers/clk/clk-stm32mp1.c
@@ -216,7 +216,7 @@ static const char * const usart1_src[] = {
"pclk5", "pll3_q", "ck_hsi", "ck_csi", "pll4_q", "ck_hse"
};
-const char * const usart234578_src[] = {
+static const char * const usart234578_src[] = {
"pclk1", "pll4_q", "ck_hsi", "ck_csi", "ck_hse"
};
@@ -224,10 +224,6 @@ static const char * const usart6_src[] = {
"pclk2", "pll4_q", "ck_hsi", "ck_csi", "ck_hse"
};
-static const char * const dfsdm_src[] = {
- "pclk2", "ck_mcu"
-};
-
static const char * const fdcan_src[] = {
"ck_hse", "pll3_q", "pll4_q"
};
@@ -316,10 +312,8 @@ struct stm32_clk_mgate {
struct clock_config {
u32 id;
const char *name;
- union {
- const char *parent_name;
- const char * const *parent_names;
- };
+ const char *parent_name;
+ const char * const *parent_names;
int num_parents;
unsigned long flags;
void *cfg;
@@ -469,7 +463,7 @@ static void mp1_gate_clk_disable(struct clk_hw *hw)
}
}
-const struct clk_ops mp1_gate_clk_ops = {
+static const struct clk_ops mp1_gate_clk_ops = {
.enable = mp1_gate_clk_enable,
.disable = mp1_gate_clk_disable,
.is_enabled = clk_gate_is_enabled,
@@ -698,7 +692,7 @@ static void mp1_mgate_clk_disable(struct clk_hw *hw)
mp1_gate_clk_disable(hw);
}
-const struct clk_ops mp1_mgate_clk_ops = {
+static const struct clk_ops mp1_mgate_clk_ops = {
.enable = mp1_mgate_clk_enable,
.disable = mp1_mgate_clk_disable,
.is_enabled = clk_gate_is_enabled,
@@ -732,7 +726,7 @@ static int clk_mmux_set_parent(struct clk_hw *hw, u8 index)
return 0;
}
-const struct clk_ops clk_mmux_ops = {
+static const struct clk_ops clk_mmux_ops = {
.get_parent = clk_mmux_get_parent,
.set_parent = clk_mmux_set_parent,
.determine_rate = __clk_mux_determine_rate,
@@ -1048,10 +1042,10 @@ struct stm32_pll_cfg {
u32 offset;
};
-struct clk_hw *_clk_register_pll(struct device *dev,
- struct clk_hw_onecell_data *clk_data,
- void __iomem *base, spinlock_t *lock,
- const struct clock_config *cfg)
+static struct clk_hw *_clk_register_pll(struct device *dev,
+ struct clk_hw_onecell_data *clk_data,
+ void __iomem *base, spinlock_t *lock,
+ const struct clock_config *cfg)
{
struct stm32_pll_cfg *stm_pll_cfg = cfg->cfg;
@@ -1405,7 +1399,8 @@ enum {
G_USBH,
G_ETHSTP,
G_RTCAPB,
- G_TZC,
+ G_TZC1,
+ G_TZC2,
G_TZPC,
G_IWDG1,
G_BSEC,
@@ -1417,7 +1412,7 @@ enum {
G_LAST
};
-struct stm32_mgate mp1_mgate[G_LAST];
+static struct stm32_mgate mp1_mgate[G_LAST];
#define _K_GATE(_id, _gate_offset, _gate_bit_idx, _gate_flags,\
_mgate, _ops)\
@@ -1440,7 +1435,7 @@ struct stm32_mgate mp1_mgate[G_LAST];
&mp1_mgate[_id], &mp1_mgate_clk_ops)
/* Peripheral gates */
-struct stm32_gate_cfg per_gate_cfg[G_LAST] = {
+static struct stm32_gate_cfg per_gate_cfg[G_LAST] = {
/* Multi gates */
K_GATE(G_MDIO, RCC_APB1ENSETR, 31, 0),
K_MGATE(G_DAC12, RCC_APB1ENSETR, 29, 0),
@@ -1506,7 +1501,8 @@ struct stm32_gate_cfg per_gate_cfg[G_LAST] = {
K_GATE(G_BSEC, RCC_APB5ENSETR, 16, 0),
K_GATE(G_IWDG1, RCC_APB5ENSETR, 15, 0),
K_GATE(G_TZPC, RCC_APB5ENSETR, 13, 0),
- K_GATE(G_TZC, RCC_APB5ENSETR, 12, 0),
+ K_GATE(G_TZC2, RCC_APB5ENSETR, 12, 0),
+ K_GATE(G_TZC1, RCC_APB5ENSETR, 11, 0),
K_GATE(G_RTCAPB, RCC_APB5ENSETR, 8, 0),
K_MGATE(G_USART1, RCC_APB5ENSETR, 4, 0),
K_MGATE(G_I2C6, RCC_APB5ENSETR, 3, 0),
@@ -1600,7 +1596,7 @@ enum {
M_LAST
};
-struct stm32_mmux ker_mux[M_LAST];
+static struct stm32_mmux ker_mux[M_LAST];
#define _K_MUX(_id, _offset, _shift, _width, _mux_flags, _mmux, _ops)\
[_id] = {\
@@ -1623,7 +1619,7 @@ struct stm32_mmux ker_mux[M_LAST];
_K_MUX(_id, _offset, _shift, _width, _mux_flags,\
&ker_mux[_id], &clk_mmux_ops)
-const struct stm32_mux_cfg ker_mux_cfg[M_LAST] = {
+static const struct stm32_mux_cfg ker_mux_cfg[M_LAST] = {
/* Kernel multi mux */
K_MMUX(M_SDMMC12, RCC_SDMMC12CKSELR, 0, 3, 0),
K_MMUX(M_SPI23, RCC_SPI2S23CKSELR, 0, 3, 0),
@@ -1860,7 +1856,8 @@ static const struct clock_config stm32mp1_clock_cfg[] = {
PCLK(USART1, "usart1", "pclk5", 0, G_USART1),
PCLK(RTCAPB, "rtcapb", "pclk5", CLK_IGNORE_UNUSED |
CLK_IS_CRITICAL, G_RTCAPB),
- PCLK(TZC, "tzc", "pclk5", CLK_IGNORE_UNUSED, G_TZC),
+ PCLK(TZC1, "tzc1", "ck_axi", CLK_IGNORE_UNUSED, G_TZC1),
+ PCLK(TZC2, "tzc2", "ck_axi", CLK_IGNORE_UNUSED, G_TZC2),
PCLK(TZPC, "tzpc", "pclk5", CLK_IGNORE_UNUSED, G_TZPC),
PCLK(IWDG1, "iwdg1", "pclk5", 0, G_IWDG1),
PCLK(BSEC, "bsec", "pclk5", CLK_IGNORE_UNUSED, G_BSEC),
@@ -1916,8 +1913,7 @@ static const struct clock_config stm32mp1_clock_cfg[] = {
KCLK(RNG1_K, "rng1_k", rng_src, 0, G_RNG1, M_RNG1),
KCLK(RNG2_K, "rng2_k", rng_src, 0, G_RNG2, M_RNG2),
KCLK(USBPHY_K, "usbphy_k", usbphy_src, 0, G_USBPHY, M_USBPHY),
- KCLK(STGEN_K, "stgen_k", stgen_src, CLK_IGNORE_UNUSED,
- G_STGEN, M_STGEN),
+ KCLK(STGEN_K, "stgen_k", stgen_src, CLK_IS_CRITICAL, G_STGEN, M_STGEN),
KCLK(SPDIF_K, "spdif_k", spdif_src, 0, G_SPDIF, M_SPDIF),
KCLK(SPI1_K, "spi1_k", spi123_src, 0, G_SPI1, M_SPI1),
KCLK(SPI2_K, "spi2_k", spi123_src, 0, G_SPI2, M_SPI23),
@@ -1948,8 +1944,8 @@ static const struct clock_config stm32mp1_clock_cfg[] = {
KCLK(FDCAN_K, "fdcan_k", fdcan_src, 0, G_FDCAN, M_FDCAN),
KCLK(SAI1_K, "sai1_k", sai_src, 0, G_SAI1, M_SAI1),
KCLK(SAI2_K, "sai2_k", sai2_src, 0, G_SAI2, M_SAI2),
- KCLK(SAI3_K, "sai3_k", sai_src, 0, G_SAI2, M_SAI3),
- KCLK(SAI4_K, "sai4_k", sai_src, 0, G_SAI2, M_SAI4),
+ KCLK(SAI3_K, "sai3_k", sai_src, 0, G_SAI3, M_SAI3),
+ KCLK(SAI4_K, "sai4_k", sai_src, 0, G_SAI4, M_SAI4),
KCLK(ADC12_K, "adc12_k", adc12_src, 0, G_ADC12, M_ADC12),
KCLK(DSI_K, "dsi_k", dsi_src, 0, G_DSI, M_DSI),
KCLK(ADFSDM_K, "adfsdm_k", sai_src, 0, G_ADFSDM, M_SAI1),
@@ -1992,10 +1988,6 @@ static const struct clock_config stm32mp1_clock_cfg[] = {
_DIV(RCC_MCO2CFGR, 4, 4, 0, NULL)),
/* Debug clocks */
- FIXED_FACTOR(NO_ID, "ck_axi_div2", "ck_axi", 0, 1, 2),
-
- GATE(DBG, "ck_apb_dbg", "ck_axi_div2", 0, RCC_DBGCFGR, 8, 0),
-
GATE(CK_DBG, "ck_sys_dbg", "ck_axi", 0, RCC_DBGCFGR, 8, 0),
COMPOSITE(CK_TRACE, "ck_trace", ck_trace_src, CLK_OPS_PARENT_ENABLE,
diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c
index ea67ac81c6f9..7af555f0e60c 100644
--- a/drivers/clk/clk.c
+++ b/drivers/clk/clk.c
@@ -426,9 +426,9 @@ static bool mux_is_better_rate(unsigned long rate, unsigned long now,
return now <= rate && now > best;
}
-static int
-clk_mux_determine_rate_flags(struct clk_hw *hw, struct clk_rate_request *req,
- unsigned long flags)
+int clk_mux_determine_rate_flags(struct clk_hw *hw,
+ struct clk_rate_request *req,
+ unsigned long flags)
{
struct clk_core *core = hw->core, *parent, *best_parent = NULL;
int i, num_parents, ret;
@@ -488,6 +488,7 @@ out:
return 0;
}
+EXPORT_SYMBOL_GPL(clk_mux_determine_rate_flags);
struct clk *__clk_lookup(const char *name)
{
diff --git a/drivers/clk/imx/clk-imx6ul.c b/drivers/clk/imx/clk-imx6ul.c
index 114ecbb94ec5..12320118f8de 100644
--- a/drivers/clk/imx/clk-imx6ul.c
+++ b/drivers/clk/imx/clk-imx6ul.c
@@ -464,7 +464,7 @@ static void __init imx6ul_clocks_init(struct device_node *ccm_node)
clk_set_rate(clks[IMX6UL_CLK_AHB], 99000000);
/* Change periph_pre clock to pll2_bus to adjust AXI rate to 264MHz */
- clk_set_parent(clks[IMX6UL_CLK_PERIPH_CLK2_SEL], clks[IMX6UL_CLK_PLL3_USB_OTG]);
+ clk_set_parent(clks[IMX6UL_CLK_PERIPH_CLK2_SEL], clks[IMX6UL_CLK_OSC]);
clk_set_parent(clks[IMX6UL_CLK_PERIPH], clks[IMX6UL_CLK_PERIPH_CLK2]);
clk_set_parent(clks[IMX6UL_CLK_PERIPH_PRE], clks[IMX6UL_CLK_PLL2_BUS]);
clk_set_parent(clks[IMX6UL_CLK_PERIPH], clks[IMX6UL_CLK_PERIPH_PRE]);
diff --git a/drivers/clk/meson/clk-regmap.c b/drivers/clk/meson/clk-regmap.c
index 3645fdb62343..ab7a3556f5b2 100644
--- a/drivers/clk/meson/clk-regmap.c
+++ b/drivers/clk/meson/clk-regmap.c
@@ -153,10 +153,19 @@ static int clk_regmap_mux_set_parent(struct clk_hw *hw, u8 index)
val << mux->shift);
}
+static int clk_regmap_mux_determine_rate(struct clk_hw *hw,
+ struct clk_rate_request *req)
+{
+ struct clk_regmap *clk = to_clk_regmap(hw);
+ struct clk_regmap_mux_data *mux = clk_get_regmap_mux_data(clk);
+
+ return clk_mux_determine_rate_flags(hw, req, mux->flags);
+}
+
const struct clk_ops clk_regmap_mux_ops = {
.get_parent = clk_regmap_mux_get_parent,
.set_parent = clk_regmap_mux_set_parent,
- .determine_rate = __clk_mux_determine_rate,
+ .determine_rate = clk_regmap_mux_determine_rate,
};
EXPORT_SYMBOL_GPL(clk_regmap_mux_ops);
diff --git a/drivers/clk/meson/gxbb-aoclk.h b/drivers/clk/meson/gxbb-aoclk.h
index 0be78383f257..badc4c22b4ee 100644
--- a/drivers/clk/meson/gxbb-aoclk.h
+++ b/drivers/clk/meson/gxbb-aoclk.h
@@ -17,8 +17,6 @@
#define AO_RTC_ALT_CLK_CNTL0 0x94
#define AO_RTC_ALT_CLK_CNTL1 0x98
-extern const struct clk_ops meson_aoclk_gate_regmap_ops;
-
struct aoclk_cec_32k {
struct clk_hw hw;
struct regmap *regmap;
diff --git a/drivers/clk/meson/meson8b.c b/drivers/clk/meson/meson8b.c
index cc2992493e0b..d0524ec71aad 100644
--- a/drivers/clk/meson/meson8b.c
+++ b/drivers/clk/meson/meson8b.c
@@ -253,7 +253,7 @@ static struct clk_fixed_factor meson8b_fclk_div3_div = {
.mult = 1,
.div = 3,
.hw.init = &(struct clk_init_data){
- .name = "fclk_div_div3",
+ .name = "fclk_div3_div",
.ops = &clk_fixed_factor_ops,
.parent_names = (const char *[]){ "fixed_pll" },
.num_parents = 1,
@@ -632,7 +632,8 @@ static struct clk_regmap meson8b_cpu_clk = {
.hw.init = &(struct clk_init_data){
.name = "cpu_clk",
.ops = &clk_regmap_mux_ro_ops,
- .parent_names = (const char *[]){ "xtal", "cpu_out_sel" },
+ .parent_names = (const char *[]){ "xtal",
+ "cpu_scale_out_sel" },
.num_parents = 2,
.flags = (CLK_SET_RATE_PARENT |
CLK_SET_RATE_NO_REPARENT),
diff --git a/drivers/connector/connector.c b/drivers/connector/connector.c
index 8615594bd065..e718b8c69a56 100644
--- a/drivers/connector/connector.c
+++ b/drivers/connector/connector.c
@@ -260,19 +260,6 @@ static int cn_proc_show(struct seq_file *m, void *v)
return 0;
}
-static int cn_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, cn_proc_show, NULL);
-}
-
-static const struct file_operations cn_file_ops = {
- .owner = THIS_MODULE,
- .open = cn_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release
-};
-
static struct cn_dev cdev = {
.input = cn_rx_skb,
};
@@ -297,7 +284,7 @@ static int cn_init(void)
cn_already_initialized = 1;
- proc_create("connector", S_IRUGO, init_net.proc_net, &cn_file_ops);
+ proc_create_single("connector", S_IRUGO, init_net.proc_net, cn_proc_show);
return 0;
}
diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm
index de55c7d57438..96b35b8b3606 100644
--- a/drivers/cpufreq/Kconfig.arm
+++ b/drivers/cpufreq/Kconfig.arm
@@ -20,7 +20,7 @@ config ACPI_CPPC_CPUFREQ
config ARM_ARMADA_37XX_CPUFREQ
tristate "Armada 37xx CPUFreq support"
- depends on ARCH_MVEBU
+ depends on ARCH_MVEBU && CPUFREQ_DT
help
This adds the CPUFreq driver support for Marvell Armada 37xx SoCs.
The Armada 37xx PMU supports 4 frequency and VDD levels.
diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c
index bc5fc1630876..b15115a48775 100644
--- a/drivers/cpufreq/cppc_cpufreq.c
+++ b/drivers/cpufreq/cppc_cpufreq.c
@@ -126,6 +126,49 @@ static void cppc_cpufreq_stop_cpu(struct cpufreq_policy *policy)
cpu->perf_caps.lowest_perf, cpu_num, ret);
}
+/*
+ * The PCC subspace describes the rate at which platform can accept commands
+ * on the shared PCC channel (including READs which do not count towards freq
+ * trasition requests), so ideally we need to use the PCC values as a fallback
+ * if we don't have a platform specific transition_delay_us
+ */
+#ifdef CONFIG_ARM64
+#include <asm/cputype.h>
+
+static unsigned int cppc_cpufreq_get_transition_delay_us(int cpu)
+{
+ unsigned long implementor = read_cpuid_implementor();
+ unsigned long part_num = read_cpuid_part_number();
+ unsigned int delay_us = 0;
+
+ switch (implementor) {
+ case ARM_CPU_IMP_QCOM:
+ switch (part_num) {
+ case QCOM_CPU_PART_FALKOR_V1:
+ case QCOM_CPU_PART_FALKOR:
+ delay_us = 10000;
+ break;
+ default:
+ delay_us = cppc_get_transition_latency(cpu) / NSEC_PER_USEC;
+ break;
+ }
+ break;
+ default:
+ delay_us = cppc_get_transition_latency(cpu) / NSEC_PER_USEC;
+ break;
+ }
+
+ return delay_us;
+}
+
+#else
+
+static unsigned int cppc_cpufreq_get_transition_delay_us(int cpu)
+{
+ return cppc_get_transition_latency(cpu) / NSEC_PER_USEC;
+}
+#endif
+
static int cppc_cpufreq_cpu_init(struct cpufreq_policy *policy)
{
struct cppc_cpudata *cpu;
@@ -162,8 +205,7 @@ static int cppc_cpufreq_cpu_init(struct cpufreq_policy *policy)
cpu->perf_caps.highest_perf;
policy->cpuinfo.max_freq = cppc_dmi_max_khz;
- policy->transition_delay_us = cppc_get_transition_latency(cpu_num) /
- NSEC_PER_USEC;
+ policy->transition_delay_us = cppc_cpufreq_get_transition_delay_us(cpu_num);
policy->shared_type = cpu->shared_type;
if (policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) {
diff --git a/drivers/crypto/inside-secure/safexcel.c b/drivers/crypto/inside-secure/safexcel.c
index d4a81be0d7d2..b6be62025325 100644
--- a/drivers/crypto/inside-secure/safexcel.c
+++ b/drivers/crypto/inside-secure/safexcel.c
@@ -152,8 +152,8 @@ static int eip197_load_firmwares(struct safexcel_crypto_priv *priv)
EIP197_PE_ICE_SCRATCH_CTRL_CHANGE_ACCESS;
writel(val, EIP197_PE(priv) + EIP197_PE_ICE_SCRATCH_CTRL);
- memset(EIP197_PE(priv) + EIP197_PE_ICE_SCRATCH_RAM, 0,
- EIP197_NUM_OF_SCRATCH_BLOCKS * sizeof(u32));
+ memset_io(EIP197_PE(priv) + EIP197_PE_ICE_SCRATCH_RAM, 0,
+ EIP197_NUM_OF_SCRATCH_BLOCKS * sizeof(u32));
eip197_write_firmware(priv, fw[FW_IFPP], EIP197_PE_ICE_FPP_CTRL,
EIP197_PE_ICE_RAM_CTRL_FPP_PROG_EN);
diff --git a/drivers/dma/qcom/bam_dma.c b/drivers/dma/qcom/bam_dma.c
index d29275b97e84..4a828c18099a 100644
--- a/drivers/dma/qcom/bam_dma.c
+++ b/drivers/dma/qcom/bam_dma.c
@@ -524,6 +524,14 @@ static int bam_alloc_chan(struct dma_chan *chan)
return 0;
}
+static int bam_pm_runtime_get_sync(struct device *dev)
+{
+ if (pm_runtime_enabled(dev))
+ return pm_runtime_get_sync(dev);
+
+ return 0;
+}
+
/**
* bam_free_chan - Frees dma resources associated with specific channel
* @chan: specified channel
@@ -539,7 +547,7 @@ static void bam_free_chan(struct dma_chan *chan)
unsigned long flags;
int ret;
- ret = pm_runtime_get_sync(bdev->dev);
+ ret = bam_pm_runtime_get_sync(bdev->dev);
if (ret < 0)
return;
@@ -720,7 +728,7 @@ static int bam_pause(struct dma_chan *chan)
unsigned long flag;
int ret;
- ret = pm_runtime_get_sync(bdev->dev);
+ ret = bam_pm_runtime_get_sync(bdev->dev);
if (ret < 0)
return ret;
@@ -746,7 +754,7 @@ static int bam_resume(struct dma_chan *chan)
unsigned long flag;
int ret;
- ret = pm_runtime_get_sync(bdev->dev);
+ ret = bam_pm_runtime_get_sync(bdev->dev);
if (ret < 0)
return ret;
@@ -852,7 +860,7 @@ static irqreturn_t bam_dma_irq(int irq, void *data)
if (srcs & P_IRQ)
tasklet_schedule(&bdev->task);
- ret = pm_runtime_get_sync(bdev->dev);
+ ret = bam_pm_runtime_get_sync(bdev->dev);
if (ret < 0)
return ret;
@@ -969,7 +977,7 @@ static void bam_start_dma(struct bam_chan *bchan)
if (!vd)
return;
- ret = pm_runtime_get_sync(bdev->dev);
+ ret = bam_pm_runtime_get_sync(bdev->dev);
if (ret < 0)
return;
diff --git a/drivers/dma/qcom/hidma_mgmt.c b/drivers/dma/qcom/hidma_mgmt.c
index 000c7019ca7d..d64edeb6771a 100644
--- a/drivers/dma/qcom/hidma_mgmt.c
+++ b/drivers/dma/qcom/hidma_mgmt.c
@@ -398,7 +398,7 @@ static int __init hidma_mgmt_of_populate_channels(struct device_node *np)
}
of_node_get(child);
new_pdev->dev.of_node = child;
- of_dma_configure(&new_pdev->dev, child);
+ of_dma_configure(&new_pdev->dev, child, true);
/*
* It is assumed that calling of_msi_configure is safe on
* platforms with or without MSI support.
diff --git a/drivers/firmware/arm_scmi/driver.c b/drivers/firmware/arm_scmi/driver.c
index 14b147135a0c..2455be8cbc4f 100644
--- a/drivers/firmware/arm_scmi/driver.c
+++ b/drivers/firmware/arm_scmi/driver.c
@@ -778,6 +778,7 @@ scmi_create_protocol_device(struct device_node *np, struct scmi_info *info,
if (scmi_mbox_chan_setup(info, &sdev->dev, prot_id)) {
dev_err(&sdev->dev, "failed to setup transport\n");
scmi_device_destroy(sdev);
+ return;
}
/* setup handle now as the transport is ready */
diff --git a/drivers/firmware/efi/Kconfig b/drivers/firmware/efi/Kconfig
index 3098410abad8..781a4a337557 100644
--- a/drivers/firmware/efi/Kconfig
+++ b/drivers/firmware/efi/Kconfig
@@ -174,6 +174,11 @@ config UEFI_CPER_ARM
depends on UEFI_CPER && ( ARM || ARM64 )
default y
+config UEFI_CPER_X86
+ bool
+ depends on UEFI_CPER && X86
+ default y
+
config EFI_DEV_PATH_PARSER
bool
depends on ACPI
diff --git a/drivers/firmware/efi/Makefile b/drivers/firmware/efi/Makefile
index cb805374f4bc..5f9f5039de50 100644
--- a/drivers/firmware/efi/Makefile
+++ b/drivers/firmware/efi/Makefile
@@ -31,3 +31,4 @@ obj-$(CONFIG_ARM) += $(arm-obj-y)
obj-$(CONFIG_ARM64) += $(arm-obj-y)
obj-$(CONFIG_EFI_CAPSULE_LOADER) += capsule-loader.o
obj-$(CONFIG_UEFI_CPER_ARM) += cper-arm.o
+obj-$(CONFIG_UEFI_CPER_X86) += cper-x86.o
diff --git a/drivers/firmware/efi/capsule-loader.c b/drivers/firmware/efi/capsule-loader.c
index e456f4602df1..96688986da56 100644
--- a/drivers/firmware/efi/capsule-loader.c
+++ b/drivers/firmware/efi/capsule-loader.c
@@ -134,10 +134,16 @@ static ssize_t efi_capsule_submit_update(struct capsule_info *cap_info)
/* Indicate capsule binary uploading is done */
cap_info->index = NO_FURTHER_WRITE_ACTION;
- pr_info("Successfully upload capsule file with reboot type '%s'\n",
- !cap_info->reset_type ? "RESET_COLD" :
- cap_info->reset_type == 1 ? "RESET_WARM" :
- "RESET_SHUTDOWN");
+
+ if (cap_info->header.flags & EFI_CAPSULE_PERSIST_ACROSS_RESET) {
+ pr_info("Successfully uploaded capsule file with reboot type '%s'\n",
+ !cap_info->reset_type ? "RESET_COLD" :
+ cap_info->reset_type == 1 ? "RESET_WARM" :
+ "RESET_SHUTDOWN");
+ } else {
+ pr_info("Successfully processed capsule file\n");
+ }
+
return 0;
}
diff --git a/drivers/firmware/efi/cper-arm.c b/drivers/firmware/efi/cper-arm.c
index 698e5c8e0c8d..502811344e81 100644
--- a/drivers/firmware/efi/cper-arm.c
+++ b/drivers/firmware/efi/cper-arm.c
@@ -30,8 +30,6 @@
#include <acpi/ghes.h>
#include <ras/ras_event.h>
-#define INDENT_SP " "
-
static const char * const arm_reg_ctx_strs[] = {
"AArch32 general purpose registers",
"AArch32 EL1 context registers",
@@ -283,7 +281,7 @@ void cper_print_proc_arm(const char *pfx,
pfx, proc->psci_state);
}
- snprintf(newpfx, sizeof(newpfx), "%s%s", pfx, INDENT_SP);
+ snprintf(newpfx, sizeof(newpfx), "%s ", pfx);
err_info = (struct cper_arm_err_info *)(proc + 1);
for (i = 0; i < proc->err_info_num; i++) {
@@ -310,7 +308,7 @@ void cper_print_proc_arm(const char *pfx,
if (err_info->validation_bits & CPER_ARM_INFO_VALID_ERR_INFO) {
printk("%serror_info: 0x%016llx\n", newpfx,
err_info->error_info);
- snprintf(infopfx, sizeof(infopfx), "%s%s", newpfx, INDENT_SP);
+ snprintf(infopfx, sizeof(infopfx), "%s ", newpfx);
cper_print_arm_err_info(infopfx, err_info->type,
err_info->error_info);
}
diff --git a/drivers/firmware/efi/cper-x86.c b/drivers/firmware/efi/cper-x86.c
new file mode 100644
index 000000000000..2531de49f56c
--- /dev/null
+++ b/drivers/firmware/efi/cper-x86.c
@@ -0,0 +1,356 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2018, Advanced Micro Devices, Inc.
+
+#include <linux/cper.h>
+
+/*
+ * We don't need a "CPER_IA" prefix since these are all locally defined.
+ * This will save us a lot of line space.
+ */
+#define VALID_LAPIC_ID BIT_ULL(0)
+#define VALID_CPUID_INFO BIT_ULL(1)
+#define VALID_PROC_ERR_INFO_NUM(bits) (((bits) & GENMASK_ULL(7, 2)) >> 2)
+#define VALID_PROC_CXT_INFO_NUM(bits) (((bits) & GENMASK_ULL(13, 8)) >> 8)
+
+#define INFO_ERR_STRUCT_TYPE_CACHE \
+ GUID_INIT(0xA55701F5, 0xE3EF, 0x43DE, 0xAC, 0x72, 0x24, 0x9B, \
+ 0x57, 0x3F, 0xAD, 0x2C)
+#define INFO_ERR_STRUCT_TYPE_TLB \
+ GUID_INIT(0xFC06B535, 0x5E1F, 0x4562, 0x9F, 0x25, 0x0A, 0x3B, \
+ 0x9A, 0xDB, 0x63, 0xC3)
+#define INFO_ERR_STRUCT_TYPE_BUS \
+ GUID_INIT(0x1CF3F8B3, 0xC5B1, 0x49a2, 0xAA, 0x59, 0x5E, 0xEF, \
+ 0x92, 0xFF, 0xA6, 0x3C)
+#define INFO_ERR_STRUCT_TYPE_MS \
+ GUID_INIT(0x48AB7F57, 0xDC34, 0x4f6c, 0xA7, 0xD3, 0xB0, 0xB5, \
+ 0xB0, 0xA7, 0x43, 0x14)
+
+#define INFO_VALID_CHECK_INFO BIT_ULL(0)
+#define INFO_VALID_TARGET_ID BIT_ULL(1)
+#define INFO_VALID_REQUESTOR_ID BIT_ULL(2)
+#define INFO_VALID_RESPONDER_ID BIT_ULL(3)
+#define INFO_VALID_IP BIT_ULL(4)
+
+#define CHECK_VALID_TRANS_TYPE BIT_ULL(0)
+#define CHECK_VALID_OPERATION BIT_ULL(1)
+#define CHECK_VALID_LEVEL BIT_ULL(2)
+#define CHECK_VALID_PCC BIT_ULL(3)
+#define CHECK_VALID_UNCORRECTED BIT_ULL(4)
+#define CHECK_VALID_PRECISE_IP BIT_ULL(5)
+#define CHECK_VALID_RESTARTABLE_IP BIT_ULL(6)
+#define CHECK_VALID_OVERFLOW BIT_ULL(7)
+
+#define CHECK_VALID_BUS_PART_TYPE BIT_ULL(8)
+#define CHECK_VALID_BUS_TIME_OUT BIT_ULL(9)
+#define CHECK_VALID_BUS_ADDR_SPACE BIT_ULL(10)
+
+#define CHECK_VALID_BITS(check) (((check) & GENMASK_ULL(15, 0)))
+#define CHECK_TRANS_TYPE(check) (((check) & GENMASK_ULL(17, 16)) >> 16)
+#define CHECK_OPERATION(check) (((check) & GENMASK_ULL(21, 18)) >> 18)
+#define CHECK_LEVEL(check) (((check) & GENMASK_ULL(24, 22)) >> 22)
+#define CHECK_PCC BIT_ULL(25)
+#define CHECK_UNCORRECTED BIT_ULL(26)
+#define CHECK_PRECISE_IP BIT_ULL(27)
+#define CHECK_RESTARTABLE_IP BIT_ULL(28)
+#define CHECK_OVERFLOW BIT_ULL(29)
+
+#define CHECK_BUS_PART_TYPE(check) (((check) & GENMASK_ULL(31, 30)) >> 30)
+#define CHECK_BUS_TIME_OUT BIT_ULL(32)
+#define CHECK_BUS_ADDR_SPACE(check) (((check) & GENMASK_ULL(34, 33)) >> 33)
+
+#define CHECK_VALID_MS_ERR_TYPE BIT_ULL(0)
+#define CHECK_VALID_MS_PCC BIT_ULL(1)
+#define CHECK_VALID_MS_UNCORRECTED BIT_ULL(2)
+#define CHECK_VALID_MS_PRECISE_IP BIT_ULL(3)
+#define CHECK_VALID_MS_RESTARTABLE_IP BIT_ULL(4)
+#define CHECK_VALID_MS_OVERFLOW BIT_ULL(5)
+
+#define CHECK_MS_ERR_TYPE(check) (((check) & GENMASK_ULL(18, 16)) >> 16)
+#define CHECK_MS_PCC BIT_ULL(19)
+#define CHECK_MS_UNCORRECTED BIT_ULL(20)
+#define CHECK_MS_PRECISE_IP BIT_ULL(21)
+#define CHECK_MS_RESTARTABLE_IP BIT_ULL(22)
+#define CHECK_MS_OVERFLOW BIT_ULL(23)
+
+#define CTX_TYPE_MSR 1
+#define CTX_TYPE_MMREG 7
+
+enum err_types {
+ ERR_TYPE_CACHE = 0,
+ ERR_TYPE_TLB,
+ ERR_TYPE_BUS,
+ ERR_TYPE_MS,
+ N_ERR_TYPES
+};
+
+static enum err_types cper_get_err_type(const guid_t *err_type)
+{
+ if (guid_equal(err_type, &INFO_ERR_STRUCT_TYPE_CACHE))
+ return ERR_TYPE_CACHE;
+ else if (guid_equal(err_type, &INFO_ERR_STRUCT_TYPE_TLB))
+ return ERR_TYPE_TLB;
+ else if (guid_equal(err_type, &INFO_ERR_STRUCT_TYPE_BUS))
+ return ERR_TYPE_BUS;
+ else if (guid_equal(err_type, &INFO_ERR_STRUCT_TYPE_MS))
+ return ERR_TYPE_MS;
+ else
+ return N_ERR_TYPES;
+}
+
+static const char * const ia_check_trans_type_strs[] = {
+ "Instruction",
+ "Data Access",
+ "Generic",
+};
+
+static const char * const ia_check_op_strs[] = {
+ "generic error",
+ "generic read",
+ "generic write",
+ "data read",
+ "data write",
+ "instruction fetch",
+ "prefetch",
+ "eviction",
+ "snoop",
+};
+
+static const char * const ia_check_bus_part_type_strs[] = {
+ "Local Processor originated request",
+ "Local Processor responded to request",
+ "Local Processor observed",
+ "Generic",
+};
+
+static const char * const ia_check_bus_addr_space_strs[] = {
+ "Memory Access",
+ "Reserved",
+ "I/O",
+ "Other Transaction",
+};
+
+static const char * const ia_check_ms_error_type_strs[] = {
+ "No Error",
+ "Unclassified",
+ "Microcode ROM Parity Error",
+ "External Error",
+ "FRC Error",
+ "Internal Unclassified",
+};
+
+static const char * const ia_reg_ctx_strs[] = {
+ "Unclassified Data",
+ "MSR Registers (Machine Check and other MSRs)",
+ "32-bit Mode Execution Context",
+ "64-bit Mode Execution Context",
+ "FXSAVE Context",
+ "32-bit Mode Debug Registers (DR0-DR7)",
+ "64-bit Mode Debug Registers (DR0-DR7)",
+ "Memory Mapped Registers",
+};
+
+static inline void print_bool(char *str, const char *pfx, u64 check, u64 bit)
+{
+ printk("%s%s: %s\n", pfx, str, (check & bit) ? "true" : "false");
+}
+
+static void print_err_info_ms(const char *pfx, u16 validation_bits, u64 check)
+{
+ if (validation_bits & CHECK_VALID_MS_ERR_TYPE) {
+ u8 err_type = CHECK_MS_ERR_TYPE(check);
+
+ printk("%sError Type: %u, %s\n", pfx, err_type,
+ err_type < ARRAY_SIZE(ia_check_ms_error_type_strs) ?
+ ia_check_ms_error_type_strs[err_type] : "unknown");
+ }
+
+ if (validation_bits & CHECK_VALID_MS_PCC)
+ print_bool("Processor Context Corrupt", pfx, check, CHECK_MS_PCC);
+
+ if (validation_bits & CHECK_VALID_MS_UNCORRECTED)
+ print_bool("Uncorrected", pfx, check, CHECK_MS_UNCORRECTED);
+
+ if (validation_bits & CHECK_VALID_MS_PRECISE_IP)
+ print_bool("Precise IP", pfx, check, CHECK_MS_PRECISE_IP);
+
+ if (validation_bits & CHECK_VALID_MS_RESTARTABLE_IP)
+ print_bool("Restartable IP", pfx, check, CHECK_MS_RESTARTABLE_IP);
+
+ if (validation_bits & CHECK_VALID_MS_OVERFLOW)
+ print_bool("Overflow", pfx, check, CHECK_MS_OVERFLOW);
+}
+
+static void print_err_info(const char *pfx, u8 err_type, u64 check)
+{
+ u16 validation_bits = CHECK_VALID_BITS(check);
+
+ /*
+ * The MS Check structure varies a lot from the others, so use a
+ * separate function for decoding.
+ */
+ if (err_type == ERR_TYPE_MS)
+ return print_err_info_ms(pfx, validation_bits, check);
+
+ if (validation_bits & CHECK_VALID_TRANS_TYPE) {
+ u8 trans_type = CHECK_TRANS_TYPE(check);
+
+ printk("%sTransaction Type: %u, %s\n", pfx, trans_type,
+ trans_type < ARRAY_SIZE(ia_check_trans_type_strs) ?
+ ia_check_trans_type_strs[trans_type] : "unknown");
+ }
+
+ if (validation_bits & CHECK_VALID_OPERATION) {
+ u8 op = CHECK_OPERATION(check);
+
+ /*
+ * CACHE has more operation types than TLB or BUS, though the
+ * name and the order are the same.
+ */
+ u8 max_ops = (err_type == ERR_TYPE_CACHE) ? 9 : 7;
+
+ printk("%sOperation: %u, %s\n", pfx, op,
+ op < max_ops ? ia_check_op_strs[op] : "unknown");
+ }
+
+ if (validation_bits & CHECK_VALID_LEVEL)
+ printk("%sLevel: %llu\n", pfx, CHECK_LEVEL(check));
+
+ if (validation_bits & CHECK_VALID_PCC)
+ print_bool("Processor Context Corrupt", pfx, check, CHECK_PCC);
+
+ if (validation_bits & CHECK_VALID_UNCORRECTED)
+ print_bool("Uncorrected", pfx, check, CHECK_UNCORRECTED);
+
+ if (validation_bits & CHECK_VALID_PRECISE_IP)
+ print_bool("Precise IP", pfx, check, CHECK_PRECISE_IP);
+
+ if (validation_bits & CHECK_VALID_RESTARTABLE_IP)
+ print_bool("Restartable IP", pfx, check, CHECK_RESTARTABLE_IP);
+
+ if (validation_bits & CHECK_VALID_OVERFLOW)
+ print_bool("Overflow", pfx, check, CHECK_OVERFLOW);
+
+ if (err_type != ERR_TYPE_BUS)
+ return;
+
+ if (validation_bits & CHECK_VALID_BUS_PART_TYPE) {
+ u8 part_type = CHECK_BUS_PART_TYPE(check);
+
+ printk("%sParticipation Type: %u, %s\n", pfx, part_type,
+ part_type < ARRAY_SIZE(ia_check_bus_part_type_strs) ?
+ ia_check_bus_part_type_strs[part_type] : "unknown");
+ }
+
+ if (validation_bits & CHECK_VALID_BUS_TIME_OUT)
+ print_bool("Time Out", pfx, check, CHECK_BUS_TIME_OUT);
+
+ if (validation_bits & CHECK_VALID_BUS_ADDR_SPACE) {
+ u8 addr_space = CHECK_BUS_ADDR_SPACE(check);
+
+ printk("%sAddress Space: %u, %s\n", pfx, addr_space,
+ addr_space < ARRAY_SIZE(ia_check_bus_addr_space_strs) ?
+ ia_check_bus_addr_space_strs[addr_space] : "unknown");
+ }
+}
+
+void cper_print_proc_ia(const char *pfx, const struct cper_sec_proc_ia *proc)
+{
+ int i;
+ struct cper_ia_err_info *err_info;
+ struct cper_ia_proc_ctx *ctx_info;
+ char newpfx[64], infopfx[64];
+ u8 err_type;
+
+ if (proc->validation_bits & VALID_LAPIC_ID)
+ printk("%sLocal APIC_ID: 0x%llx\n", pfx, proc->lapic_id);
+
+ if (proc->validation_bits & VALID_CPUID_INFO) {
+ printk("%sCPUID Info:\n", pfx);
+ print_hex_dump(pfx, "", DUMP_PREFIX_OFFSET, 16, 4, proc->cpuid,
+ sizeof(proc->cpuid), 0);
+ }
+
+ snprintf(newpfx, sizeof(newpfx), "%s ", pfx);
+
+ err_info = (struct cper_ia_err_info *)(proc + 1);
+ for (i = 0; i < VALID_PROC_ERR_INFO_NUM(proc->validation_bits); i++) {
+ printk("%sError Information Structure %d:\n", pfx, i);
+
+ err_type = cper_get_err_type(&err_info->err_type);
+ printk("%sError Structure Type: %s\n", newpfx,
+ err_type < ARRAY_SIZE(cper_proc_error_type_strs) ?
+ cper_proc_error_type_strs[err_type] : "unknown");
+
+ if (err_type >= N_ERR_TYPES) {
+ printk("%sError Structure Type: %pUl\n", newpfx,
+ &err_info->err_type);
+ }
+
+ if (err_info->validation_bits & INFO_VALID_CHECK_INFO) {
+ printk("%sCheck Information: 0x%016llx\n", newpfx,
+ err_info->check_info);
+
+ if (err_type < N_ERR_TYPES) {
+ snprintf(infopfx, sizeof(infopfx), "%s ",
+ newpfx);
+
+ print_err_info(infopfx, err_type,
+ err_info->check_info);
+ }
+ }
+
+ if (err_info->validation_bits & INFO_VALID_TARGET_ID) {
+ printk("%sTarget Identifier: 0x%016llx\n",
+ newpfx, err_info->target_id);
+ }
+
+ if (err_info->validation_bits & INFO_VALID_REQUESTOR_ID) {
+ printk("%sRequestor Identifier: 0x%016llx\n",
+ newpfx, err_info->requestor_id);
+ }
+
+ if (err_info->validation_bits & INFO_VALID_RESPONDER_ID) {
+ printk("%sResponder Identifier: 0x%016llx\n",
+ newpfx, err_info->responder_id);
+ }
+
+ if (err_info->validation_bits & INFO_VALID_IP) {
+ printk("%sInstruction Pointer: 0x%016llx\n",
+ newpfx, err_info->ip);
+ }
+
+ err_info++;
+ }
+
+ ctx_info = (struct cper_ia_proc_ctx *)err_info;
+ for (i = 0; i < VALID_PROC_CXT_INFO_NUM(proc->validation_bits); i++) {
+ int size = sizeof(*ctx_info) + ctx_info->reg_arr_size;
+ int groupsize = 4;
+
+ printk("%sContext Information Structure %d:\n", pfx, i);
+
+ printk("%sRegister Context Type: %s\n", newpfx,
+ ctx_info->reg_ctx_type < ARRAY_SIZE(ia_reg_ctx_strs) ?
+ ia_reg_ctx_strs[ctx_info->reg_ctx_type] : "unknown");
+
+ printk("%sRegister Array Size: 0x%04x\n", newpfx,
+ ctx_info->reg_arr_size);
+
+ if (ctx_info->reg_ctx_type == CTX_TYPE_MSR) {
+ groupsize = 8; /* MSRs are 8 bytes wide. */
+ printk("%sMSR Address: 0x%08x\n", newpfx,
+ ctx_info->msr_addr);
+ }
+
+ if (ctx_info->reg_ctx_type == CTX_TYPE_MMREG) {
+ printk("%sMM Register Address: 0x%016llx\n", newpfx,
+ ctx_info->mm_reg_addr);
+ }
+
+ printk("%sRegister Array:\n", newpfx);
+ print_hex_dump(newpfx, "", DUMP_PREFIX_OFFSET, 16, groupsize,
+ (ctx_info + 1), ctx_info->reg_arr_size, 0);
+
+ ctx_info = (struct cper_ia_proc_ctx *)((long)ctx_info + size);
+ }
+}
diff --git a/drivers/firmware/efi/cper.c b/drivers/firmware/efi/cper.c
index c165933ebf38..3bf0dca378a6 100644
--- a/drivers/firmware/efi/cper.c
+++ b/drivers/firmware/efi/cper.c
@@ -37,8 +37,6 @@
#include <acpi/ghes.h>
#include <ras/ras_event.h>
-#define INDENT_SP " "
-
static char rcd_decode_str[CPER_REC_LEN];
/*
@@ -433,7 +431,7 @@ cper_estatus_print_section(const char *pfx, struct acpi_hest_generic_data *gdata
if (gdata->validation_bits & CPER_SEC_VALID_FRU_TEXT)
printk("%s""fru_text: %.20s\n", pfx, gdata->fru_text);
- snprintf(newpfx, sizeof(newpfx), "%s%s", pfx, INDENT_SP);
+ snprintf(newpfx, sizeof(newpfx), "%s ", pfx);
if (guid_equal(sec_type, &CPER_SEC_PROC_GENERIC)) {
struct cper_sec_proc_generic *proc_err = acpi_hest_get_payload(gdata);
@@ -470,6 +468,16 @@ cper_estatus_print_section(const char *pfx, struct acpi_hest_generic_data *gdata
else
goto err_section_too_small;
#endif
+#if defined(CONFIG_UEFI_CPER_X86)
+ } else if (guid_equal(sec_type, &CPER_SEC_PROC_IA)) {
+ struct cper_sec_proc_ia *ia_err = acpi_hest_get_payload(gdata);
+
+ printk("%ssection_type: IA32/X64 processor error\n", newpfx);
+ if (gdata->error_data_length >= sizeof(*ia_err))
+ cper_print_proc_ia(newpfx, ia_err);
+ else
+ goto err_section_too_small;
+#endif
} else {
const void *err = acpi_hest_get_payload(gdata);
@@ -500,7 +508,7 @@ void cper_estatus_print(const char *pfx,
"It has been corrected by h/w "
"and requires no further action");
printk("%s""event severity: %s\n", pfx, cper_severity_str(severity));
- snprintf(newpfx, sizeof(newpfx), "%s%s", pfx, INDENT_SP);
+ snprintf(newpfx, sizeof(newpfx), "%s ", pfx);
apei_estatus_for_each_section(estatus, gdata) {
cper_estatus_print_section(newpfx, gdata, sec_no);
diff --git a/drivers/firmware/efi/libstub/arm64-stub.c b/drivers/firmware/efi/libstub/arm64-stub.c
index b9bd827caa22..1b4d465cc5d9 100644
--- a/drivers/firmware/efi/libstub/arm64-stub.c
+++ b/drivers/firmware/efi/libstub/arm64-stub.c
@@ -98,6 +98,16 @@ efi_status_t handle_kernel_image(efi_system_table_t *sys_table_arg,
(phys_seed >> 32) & mask : TEXT_OFFSET;
/*
+ * With CONFIG_RANDOMIZE_TEXT_OFFSET=y, TEXT_OFFSET may not
+ * be a multiple of EFI_KIMG_ALIGN, and we must ensure that
+ * we preserve the misalignment of 'offset' relative to
+ * EFI_KIMG_ALIGN so that statically allocated objects whose
+ * alignment exceeds PAGE_SIZE appear correctly aligned in
+ * memory.
+ */
+ offset |= TEXT_OFFSET % EFI_KIMG_ALIGN;
+
+ /*
* If KASLR is enabled, and we have some randomness available,
* locate the kernel at a randomized offset in physical memory.
*/
diff --git a/drivers/firmware/efi/libstub/secureboot.c b/drivers/firmware/efi/libstub/secureboot.c
index 8f07eb414c00..72d9dfbebf08 100644
--- a/drivers/firmware/efi/libstub/secureboot.c
+++ b/drivers/firmware/efi/libstub/secureboot.c
@@ -30,6 +30,9 @@ static const efi_char16_t shim_MokSBState_name[] = L"MokSBState";
/*
* Determine whether we're in secure boot mode.
+ *
+ * Please keep the logic in sync with
+ * arch/x86/xen/efi.c:xen_efi_get_secureboot().
*/
enum efi_secureboot_mode efi_get_secureboot(efi_system_table_t *sys_table_arg)
{
diff --git a/drivers/firmware/efi/libstub/tpm.c b/drivers/firmware/efi/libstub/tpm.c
index 9d08cea3f1b0..caa37a6dd9d4 100644
--- a/drivers/firmware/efi/libstub/tpm.c
+++ b/drivers/firmware/efi/libstub/tpm.c
@@ -59,7 +59,7 @@ void efi_enable_reset_attack_mitigation(efi_system_table_t *sys_table_arg)
#endif
-void efi_retrieve_tpm2_eventlog_1_2(efi_system_table_t *sys_table_arg)
+static void efi_retrieve_tpm2_eventlog_1_2(efi_system_table_t *sys_table_arg)
{
efi_guid_t tcg2_guid = EFI_TCG2_PROTOCOL_GUID;
efi_guid_t linux_eventlog_guid = LINUX_EFI_TPM_EVENT_LOG_GUID;
diff --git a/drivers/firmware/qcom_scm-32.c b/drivers/firmware/qcom_scm-32.c
index dfbd894d5bb7..4e24e591ae74 100644
--- a/drivers/firmware/qcom_scm-32.c
+++ b/drivers/firmware/qcom_scm-32.c
@@ -147,7 +147,7 @@ static u32 smc(u32 cmd_addr)
"smc #0 @ switch to secure world\n"
: "=r" (r0)
: "r" (r0), "r" (r1), "r" (r2)
- : "r3");
+ : "r3", "r12");
} while (r0 == QCOM_SCM_INTERRUPTED);
return r0;
@@ -263,7 +263,7 @@ static s32 qcom_scm_call_atomic1(u32 svc, u32 cmd, u32 arg1)
"smc #0 @ switch to secure world\n"
: "=r" (r0)
: "r" (r0), "r" (r1), "r" (r2)
- : "r3");
+ : "r3", "r12");
return r0;
}
@@ -298,7 +298,7 @@ static s32 qcom_scm_call_atomic2(u32 svc, u32 cmd, u32 arg1, u32 arg2)
"smc #0 @ switch to secure world\n"
: "=r" (r0)
: "r" (r0), "r" (r1), "r" (r2), "r" (r3)
- );
+ : "r12");
return r0;
}
@@ -328,7 +328,7 @@ u32 qcom_scm_get_version(void)
"smc #0 @ switch to secure world\n"
: "=r" (r0), "=r" (r1)
: "r" (r0), "r" (r1)
- : "r2", "r3");
+ : "r2", "r3", "r12");
} while (r0 == QCOM_SCM_INTERRUPTED);
version = r1;
diff --git a/drivers/gpio/gpio-aspeed.c b/drivers/gpio/gpio-aspeed.c
index 77e485557498..6f693b7d5220 100644
--- a/drivers/gpio/gpio-aspeed.c
+++ b/drivers/gpio/gpio-aspeed.c
@@ -384,7 +384,7 @@ static void aspeed_gpio_irq_set_mask(struct irq_data *d, bool set)
if (set)
reg |= bit;
else
- reg &= bit;
+ reg &= ~bit;
iowrite32(reg, addr);
spin_unlock_irqrestore(&gpio->lock, flags);
diff --git a/drivers/gpio/gpio-pci-idio-16.c b/drivers/gpio/gpio-pci-idio-16.c
index 1948724d8c36..25d16b2af1c3 100644
--- a/drivers/gpio/gpio-pci-idio-16.c
+++ b/drivers/gpio/gpio-pci-idio-16.c
@@ -116,9 +116,9 @@ static int idio_16_gpio_get_multiple(struct gpio_chip *chip,
unsigned long word_mask;
const unsigned long port_mask = GENMASK(gpio_reg_size - 1, 0);
unsigned long port_state;
- u8 __iomem ports[] = {
- idio16gpio->reg->out0_7, idio16gpio->reg->out8_15,
- idio16gpio->reg->in0_7, idio16gpio->reg->in8_15,
+ void __iomem *ports[] = {
+ &idio16gpio->reg->out0_7, &idio16gpio->reg->out8_15,
+ &idio16gpio->reg->in0_7, &idio16gpio->reg->in8_15,
};
/* clear bits array to a clean slate */
@@ -143,7 +143,7 @@ static int idio_16_gpio_get_multiple(struct gpio_chip *chip,
}
/* read bits from current gpio port */
- port_state = ioread8(ports + i);
+ port_state = ioread8(ports[i]);
/* store acquired bits at respective bits array offset */
bits[word_index] |= port_state << word_offset;
diff --git a/drivers/gpio/gpio-pcie-idio-24.c b/drivers/gpio/gpio-pcie-idio-24.c
index 835607ecf658..f953541e7890 100644
--- a/drivers/gpio/gpio-pcie-idio-24.c
+++ b/drivers/gpio/gpio-pcie-idio-24.c
@@ -206,10 +206,10 @@ static int idio_24_gpio_get_multiple(struct gpio_chip *chip,
unsigned long word_mask;
const unsigned long port_mask = GENMASK(gpio_reg_size - 1, 0);
unsigned long port_state;
- u8 __iomem ports[] = {
- idio24gpio->reg->out0_7, idio24gpio->reg->out8_15,
- idio24gpio->reg->out16_23, idio24gpio->reg->in0_7,
- idio24gpio->reg->in8_15, idio24gpio->reg->in16_23,
+ void __iomem *ports[] = {
+ &idio24gpio->reg->out0_7, &idio24gpio->reg->out8_15,
+ &idio24gpio->reg->out16_23, &idio24gpio->reg->in0_7,
+ &idio24gpio->reg->in8_15, &idio24gpio->reg->in16_23,
};
const unsigned long out_mode_mask = BIT(1);
@@ -217,7 +217,7 @@ static int idio_24_gpio_get_multiple(struct gpio_chip *chip,
bitmap_zero(bits, chip->ngpio);
/* get bits are evaluated a gpio port register at a time */
- for (i = 0; i < ARRAY_SIZE(ports); i++) {
+ for (i = 0; i < ARRAY_SIZE(ports) + 1; i++) {
/* gpio offset in bits array */
bits_offset = i * gpio_reg_size;
@@ -236,7 +236,7 @@ static int idio_24_gpio_get_multiple(struct gpio_chip *chip,
/* read bits from current gpio port (port 6 is TTL GPIO) */
if (i < 6)
- port_state = ioread8(ports + i);
+ port_state = ioread8(ports[i]);
else if (ioread8(&idio24gpio->reg->ctl) & out_mode_mask)
port_state = ioread8(&idio24gpio->reg->ttl_out0_7);
else
@@ -301,9 +301,9 @@ static void idio_24_gpio_set_multiple(struct gpio_chip *chip,
const unsigned long port_mask = GENMASK(gpio_reg_size, 0);
unsigned long flags;
unsigned int out_state;
- u8 __iomem ports[] = {
- idio24gpio->reg->out0_7, idio24gpio->reg->out8_15,
- idio24gpio->reg->out16_23
+ void __iomem *ports[] = {
+ &idio24gpio->reg->out0_7, &idio24gpio->reg->out8_15,
+ &idio24gpio->reg->out16_23
};
const unsigned long out_mode_mask = BIT(1);
const unsigned int ttl_offset = 48;
@@ -327,9 +327,9 @@ static void idio_24_gpio_set_multiple(struct gpio_chip *chip,
raw_spin_lock_irqsave(&idio24gpio->lock, flags);
/* process output lines */
- out_state = ioread8(ports + i) & ~gpio_mask;
+ out_state = ioread8(ports[i]) & ~gpio_mask;
out_state |= (*bits >> bits_offset) & gpio_mask;
- iowrite8(out_state, ports + i);
+ iowrite8(out_state, ports[i]);
raw_spin_unlock_irqrestore(&idio24gpio->lock, flags);
}
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index 43aeb07343ec..d8ccb500872f 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -497,7 +497,7 @@ static int linehandle_create(struct gpio_device *gdev, void __user *ip)
struct gpiohandle_request handlereq;
struct linehandle_state *lh;
struct file *file;
- int fd, i, ret;
+ int fd, i, count = 0, ret;
u32 lflags;
if (copy_from_user(&handlereq, ip, sizeof(handlereq)))
@@ -558,6 +558,7 @@ static int linehandle_create(struct gpio_device *gdev, void __user *ip)
if (ret)
goto out_free_descs;
lh->descs[i] = desc;
+ count = i;
if (lflags & GPIOHANDLE_REQUEST_ACTIVE_LOW)
set_bit(FLAG_ACTIVE_LOW, &desc->flags);
@@ -628,7 +629,7 @@ static int linehandle_create(struct gpio_device *gdev, void __user *ip)
out_put_unused_fd:
put_unused_fd(fd);
out_free_descs:
- for (; i >= 0; i--)
+ for (i = 0; i < count; i++)
gpiod_free(lh->descs[i]);
kfree(lh->label);
out_free_lh:
@@ -902,7 +903,7 @@ static int lineevent_create(struct gpio_device *gdev, void __user *ip)
desc = &gdev->descs[offset];
ret = gpiod_request(desc, le->label);
if (ret)
- goto out_free_desc;
+ goto out_free_label;
le->desc = desc;
le->eflags = eflags;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index 09d35051fdd6..3fabf9f97022 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -419,9 +419,11 @@ int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx, unsigned ring_id)
if (other) {
signed long r;
- r = dma_fence_wait_timeout(other, false, MAX_SCHEDULE_TIMEOUT);
+ r = dma_fence_wait(other, true);
if (r < 0) {
- DRM_ERROR("Error (%ld) waiting for fence!\n", r);
+ if (r != -ERESTARTSYS)
+ DRM_ERROR("Error (%ld) waiting for fence!\n", r);
+
return r;
}
}
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 1dd1142246c2..27579443cdc5 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -4555,8 +4555,8 @@ static int dm_update_crtcs_state(struct dc *dc,
for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) {
struct amdgpu_crtc *acrtc = NULL;
struct amdgpu_dm_connector *aconnector = NULL;
- struct drm_connector_state *new_con_state = NULL;
- struct dm_connector_state *dm_conn_state = NULL;
+ struct drm_connector_state *drm_new_conn_state = NULL, *drm_old_conn_state = NULL;
+ struct dm_connector_state *dm_new_conn_state = NULL, *dm_old_conn_state = NULL;
struct drm_plane_state *new_plane_state = NULL;
new_stream = NULL;
@@ -4577,19 +4577,23 @@ static int dm_update_crtcs_state(struct dc *dc,
/* TODO This hack should go away */
if (aconnector && enable) {
// Make sure fake sink is created in plug-in scenario
- new_con_state = drm_atomic_get_connector_state(state,
+ drm_new_conn_state = drm_atomic_get_new_connector_state(state,
&aconnector->base);
+ drm_old_conn_state = drm_atomic_get_old_connector_state(state,
+ &aconnector->base);
- if (IS_ERR(new_con_state)) {
- ret = PTR_ERR_OR_ZERO(new_con_state);
+
+ if (IS_ERR(drm_new_conn_state)) {
+ ret = PTR_ERR_OR_ZERO(drm_new_conn_state);
break;
}
- dm_conn_state = to_dm_connector_state(new_con_state);
+ dm_new_conn_state = to_dm_connector_state(drm_new_conn_state);
+ dm_old_conn_state = to_dm_connector_state(drm_old_conn_state);
new_stream = create_stream_for_sink(aconnector,
&new_crtc_state->mode,
- dm_conn_state);
+ dm_new_conn_state);
/*
* we can have no stream on ACTION_SET if a display
@@ -4695,20 +4699,30 @@ next_crtc:
* We want to do dc stream updates that do not require a
* full modeset below.
*/
- if (!enable || !aconnector || modereset_required(new_crtc_state))
+ if (!(enable && aconnector && new_crtc_state->enable &&
+ new_crtc_state->active))
continue;
/*
* Given above conditions, the dc state cannot be NULL because:
- * 1. We're attempting to enable a CRTC. Which has a...
- * 2. Valid connector attached, and
- * 3. User does not want to reset it (disable or mark inactive,
- * which can happen on a CRTC that's already disabled).
- * => It currently exists.
+ * 1. We're in the process of enabling CRTCs (just been added
+ * to the dc context, or already is on the context)
+ * 2. Has a valid connector attached, and
+ * 3. Is currently active and enabled.
+ * => The dc stream state currently exists.
*/
BUG_ON(dm_new_crtc_state->stream == NULL);
- /* Color managment settings */
- if (dm_new_crtc_state->base.color_mgmt_changed) {
+ /* Scaling or underscan settings */
+ if (is_scaling_state_different(dm_old_conn_state, dm_new_conn_state))
+ update_stream_scaling_settings(
+ &new_crtc_state->mode, dm_new_conn_state, dm_new_crtc_state->stream);
+
+ /*
+ * Color management settings. We also update color properties
+ * when a modeset is needed, to ensure it gets reprogrammed.
+ */
+ if (dm_new_crtc_state->base.color_mgmt_changed ||
+ drm_atomic_crtc_needs_modeset(new_crtc_state)) {
ret = amdgpu_dm_set_regamma_lut(dm_new_crtc_state);
if (ret)
goto fail;
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
index ace9ad578ca0..4304d9e408b8 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
@@ -83,21 +83,22 @@ static ssize_t dm_dp_aux_transfer(struct drm_dp_aux *aux,
enum i2c_mot_mode mot = (msg->request & DP_AUX_I2C_MOT) ?
I2C_MOT_TRUE : I2C_MOT_FALSE;
enum ddc_result res;
- ssize_t read_bytes;
+ uint32_t read_bytes = msg->size;
if (WARN_ON(msg->size > 16))
return -E2BIG;
switch (msg->request & ~DP_AUX_I2C_MOT) {
case DP_AUX_NATIVE_READ:
- read_bytes = dal_ddc_service_read_dpcd_data(
+ res = dal_ddc_service_read_dpcd_data(
TO_DM_AUX(aux)->ddc_service,
false,
I2C_MOT_UNDEF,
msg->address,
msg->buffer,
- msg->size);
- return read_bytes;
+ msg->size,
+ &read_bytes);
+ break;
case DP_AUX_NATIVE_WRITE:
res = dal_ddc_service_write_dpcd_data(
TO_DM_AUX(aux)->ddc_service,
@@ -108,14 +109,15 @@ static ssize_t dm_dp_aux_transfer(struct drm_dp_aux *aux,
msg->size);
break;
case DP_AUX_I2C_READ:
- read_bytes = dal_ddc_service_read_dpcd_data(
+ res = dal_ddc_service_read_dpcd_data(
TO_DM_AUX(aux)->ddc_service,
true,
mot,
msg->address,
msg->buffer,
- msg->size);
- return read_bytes;
+ msg->size,
+ &read_bytes);
+ break;
case DP_AUX_I2C_WRITE:
res = dal_ddc_service_write_dpcd_data(
TO_DM_AUX(aux)->ddc_service,
@@ -137,7 +139,9 @@ static ssize_t dm_dp_aux_transfer(struct drm_dp_aux *aux,
r == DDC_RESULT_SUCESSFULL);
#endif
- return msg->size;
+ if (res != DDC_RESULT_SUCESSFULL)
+ return -EIO;
+ return read_bytes;
}
static enum drm_connector_status
diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c
index 985fe8c22875..10a5807a7e8b 100644
--- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c
+++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c
@@ -70,6 +70,10 @@ static enum bp_result get_firmware_info_v3_1(
struct bios_parser *bp,
struct dc_firmware_info *info);
+static enum bp_result get_firmware_info_v3_2(
+ struct bios_parser *bp,
+ struct dc_firmware_info *info);
+
static struct atom_hpd_int_record *get_hpd_record(struct bios_parser *bp,
struct atom_display_object_path_v2 *object);
@@ -1321,9 +1325,11 @@ static enum bp_result bios_parser_get_firmware_info(
case 3:
switch (revision.minor) {
case 1:
- case 2:
result = get_firmware_info_v3_1(bp, info);
break;
+ case 2:
+ result = get_firmware_info_v3_2(bp, info);
+ break;
default:
break;
}
@@ -1383,6 +1389,84 @@ static enum bp_result get_firmware_info_v3_1(
return BP_RESULT_OK;
}
+static enum bp_result get_firmware_info_v3_2(
+ struct bios_parser *bp,
+ struct dc_firmware_info *info)
+{
+ struct atom_firmware_info_v3_2 *firmware_info;
+ struct atom_display_controller_info_v4_1 *dce_info = NULL;
+ struct atom_common_table_header *header;
+ struct atom_data_revision revision;
+ struct atom_smu_info_v3_2 *smu_info_v3_2 = NULL;
+ struct atom_smu_info_v3_3 *smu_info_v3_3 = NULL;
+
+ if (!info)
+ return BP_RESULT_BADINPUT;
+
+ firmware_info = GET_IMAGE(struct atom_firmware_info_v3_2,
+ DATA_TABLES(firmwareinfo));
+
+ dce_info = GET_IMAGE(struct atom_display_controller_info_v4_1,
+ DATA_TABLES(dce_info));
+
+ if (!firmware_info || !dce_info)
+ return BP_RESULT_BADBIOSTABLE;
+
+ memset(info, 0, sizeof(*info));
+
+ header = GET_IMAGE(struct atom_common_table_header,
+ DATA_TABLES(smu_info));
+ get_atom_data_table_revision(header, &revision);
+
+ if (revision.minor == 2) {
+ /* Vega12 */
+ smu_info_v3_2 = GET_IMAGE(struct atom_smu_info_v3_2,
+ DATA_TABLES(smu_info));
+
+ if (!smu_info_v3_2)
+ return BP_RESULT_BADBIOSTABLE;
+
+ info->default_engine_clk = smu_info_v3_2->bootup_dcefclk_10khz * 10;
+ } else if (revision.minor == 3) {
+ /* Vega20 */
+ smu_info_v3_3 = GET_IMAGE(struct atom_smu_info_v3_3,
+ DATA_TABLES(smu_info));
+
+ if (!smu_info_v3_3)
+ return BP_RESULT_BADBIOSTABLE;
+
+ info->default_engine_clk = smu_info_v3_3->bootup_dcefclk_10khz * 10;
+ }
+
+ // We need to convert from 10KHz units into KHz units.
+ info->default_memory_clk = firmware_info->bootup_mclk_in10khz * 10;
+
+ /* 27MHz for Vega10 & Vega12; 100MHz for Vega20 */
+ info->pll_info.crystal_frequency = dce_info->dce_refclk_10khz * 10;
+ /* Hardcode frequency if BIOS gives no DCE Ref Clk */
+ if (info->pll_info.crystal_frequency == 0) {
+ if (revision.minor == 2)
+ info->pll_info.crystal_frequency = 27000;
+ else if (revision.minor == 3)
+ info->pll_info.crystal_frequency = 100000;
+ }
+ /*dp_phy_ref_clk is not correct for atom_display_controller_info_v4_2, but we don't use it*/
+ info->dp_phy_ref_clk = dce_info->dpphy_refclk_10khz * 10;
+ info->i2c_engine_ref_clk = dce_info->i2c_engine_refclk_10khz * 10;
+
+ /* Get GPU PLL VCO Clock */
+ if (bp->cmd_tbl.get_smu_clock_info != NULL) {
+ if (revision.minor == 2)
+ info->smu_gpu_pll_output_freq =
+ bp->cmd_tbl.get_smu_clock_info(bp, SMU9_SYSPLL0_ID) * 10;
+ else if (revision.minor == 3)
+ info->smu_gpu_pll_output_freq =
+ bp->cmd_tbl.get_smu_clock_info(bp, SMU11_SYSPLL3_0_ID) * 10;
+ }
+
+ return BP_RESULT_OK;
+}
+
static enum bp_result bios_parser_get_encoder_cap_info(
struct dc_bios *dcb,
struct graphics_object_id object_id,
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_ddc.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_ddc.c
index 49c2face1e7a..ae48d603ebd6 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link_ddc.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_ddc.c
@@ -629,13 +629,14 @@ bool dal_ddc_service_query_ddc_data(
return ret;
}
-ssize_t dal_ddc_service_read_dpcd_data(
+enum ddc_result dal_ddc_service_read_dpcd_data(
struct ddc_service *ddc,
bool i2c,
enum i2c_mot_mode mot,
uint32_t address,
uint8_t *data,
- uint32_t len)
+ uint32_t len,
+ uint32_t *read)
{
struct aux_payload read_payload = {
.i2c_over_aux = i2c,
@@ -652,6 +653,8 @@ ssize_t dal_ddc_service_read_dpcd_data(
.mot = mot
};
+ *read = 0;
+
if (len > DEFAULT_AUX_MAX_DATA_SIZE) {
BREAK_TO_DEBUGGER();
return DDC_RESULT_FAILED_INVALID_OPERATION;
@@ -661,7 +664,8 @@ ssize_t dal_ddc_service_read_dpcd_data(
ddc->ctx->i2caux,
ddc->ddc_pin,
&command)) {
- return (ssize_t)command.payloads->length;
+ *read = command.payloads->length;
+ return DDC_RESULT_SUCESSFULL;
}
return DDC_RESULT_FAILED_OPERATION;
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_surface.c b/drivers/gpu/drm/amd/display/dc/core/dc_surface.c
index ade5b8ee9c3c..132eef3826e2 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_surface.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_surface.c
@@ -66,8 +66,8 @@ struct dc_plane_state *dc_create_plane_state(struct dc *dc)
{
struct dc *core_dc = dc;
- struct dc_plane_state *plane_state = kzalloc(sizeof(*plane_state),
- GFP_KERNEL);
+ struct dc_plane_state *plane_state = kvzalloc(sizeof(*plane_state),
+ GFP_KERNEL);
if (NULL == plane_state)
return NULL;
@@ -120,7 +120,7 @@ static void dc_plane_state_free(struct kref *kref)
{
struct dc_plane_state *plane_state = container_of(kref, struct dc_plane_state, refcount);
destruct(plane_state);
- kfree(plane_state);
+ kvfree(plane_state);
}
void dc_plane_state_release(struct dc_plane_state *plane_state)
@@ -136,7 +136,7 @@ void dc_gamma_retain(struct dc_gamma *gamma)
static void dc_gamma_free(struct kref *kref)
{
struct dc_gamma *gamma = container_of(kref, struct dc_gamma, refcount);
- kfree(gamma);
+ kvfree(gamma);
}
void dc_gamma_release(struct dc_gamma **gamma)
@@ -147,7 +147,7 @@ void dc_gamma_release(struct dc_gamma **gamma)
struct dc_gamma *dc_create_gamma(void)
{
- struct dc_gamma *gamma = kzalloc(sizeof(*gamma), GFP_KERNEL);
+ struct dc_gamma *gamma = kvzalloc(sizeof(*gamma), GFP_KERNEL);
if (gamma == NULL)
goto alloc_fail;
@@ -167,7 +167,7 @@ void dc_transfer_func_retain(struct dc_transfer_func *tf)
static void dc_transfer_func_free(struct kref *kref)
{
struct dc_transfer_func *tf = container_of(kref, struct dc_transfer_func, refcount);
- kfree(tf);
+ kvfree(tf);
}
void dc_transfer_func_release(struct dc_transfer_func *tf)
@@ -177,7 +177,7 @@ void dc_transfer_func_release(struct dc_transfer_func *tf)
struct dc_transfer_func *dc_create_transfer_func(void)
{
- struct dc_transfer_func *tf = kzalloc(sizeof(*tf), GFP_KERNEL);
+ struct dc_transfer_func *tf = kvzalloc(sizeof(*tf), GFP_KERNEL);
if (tf == NULL)
goto alloc_fail;
diff --git a/drivers/gpu/drm/amd/display/dc/inc/dc_link_ddc.h b/drivers/gpu/drm/amd/display/dc/inc/dc_link_ddc.h
index 090b7a8dd67b..30b3a08b91be 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/dc_link_ddc.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/dc_link_ddc.h
@@ -102,13 +102,14 @@ bool dal_ddc_service_query_ddc_data(
uint8_t *read_buf,
uint32_t read_size);
-ssize_t dal_ddc_service_read_dpcd_data(
+enum ddc_result dal_ddc_service_read_dpcd_data(
struct ddc_service *ddc,
bool i2c,
enum i2c_mot_mode mot,
uint32_t address,
uint8_t *data,
- uint32_t len);
+ uint32_t len,
+ uint32_t *read);
enum ddc_result dal_ddc_service_write_dpcd_data(
struct ddc_service *ddc,
diff --git a/drivers/gpu/drm/amd/display/include/dal_asic_id.h b/drivers/gpu/drm/amd/display/include/dal_asic_id.h
index 9831cb5eaa7c..9b0a04f99ac8 100644
--- a/drivers/gpu/drm/amd/display/include/dal_asic_id.h
+++ b/drivers/gpu/drm/amd/display/include/dal_asic_id.h
@@ -113,9 +113,14 @@
#define AI_GREENLAND_P_A0 1
#define AI_GREENLAND_P_A1 2
+#define AI_UNKNOWN 0xFF
-#define ASICREV_IS_GREENLAND_M(eChipRev) (eChipRev < AI_UNKNOWN)
-#define ASICREV_IS_GREENLAND_P(eChipRev) (eChipRev < AI_UNKNOWN)
+#define AI_VEGA12_P_A0 20
+#define ASICREV_IS_GREENLAND_M(eChipRev) (eChipRev < AI_VEGA12_P_A0)
+#define ASICREV_IS_GREENLAND_P(eChipRev) (eChipRev < AI_VEGA12_P_A0)
+
+#define ASICREV_IS_VEGA12_P(eChipRev) ((eChipRev >= AI_VEGA12_P_A0) && (eChipRev < AI_UNKNOWN))
+#define ASICREV_IS_VEGA12_p(eChipRev) ((eChipRev >= AI_VEGA12_P_A0) && (eChipRev < AI_UNKNOWN))
/* DCN1_0 */
#define INTERNAL_REV_RAVEN_A0 0x00 /* First spin of Raven */
diff --git a/drivers/gpu/drm/amd/display/modules/color/color_gamma.c b/drivers/gpu/drm/amd/display/modules/color/color_gamma.c
index e7e374f56864..b3747a019deb 100644
--- a/drivers/gpu/drm/amd/display/modules/color/color_gamma.c
+++ b/drivers/gpu/drm/amd/display/modules/color/color_gamma.c
@@ -1093,19 +1093,19 @@ bool mod_color_calculate_regamma_params(struct dc_transfer_func *output_tf,
output_tf->type = TF_TYPE_DISTRIBUTED_POINTS;
- rgb_user = kzalloc(sizeof(*rgb_user) * (ramp->num_entries + _EXTRA_POINTS),
- GFP_KERNEL);
+ rgb_user = kvzalloc(sizeof(*rgb_user) * (ramp->num_entries + _EXTRA_POINTS),
+ GFP_KERNEL);
if (!rgb_user)
goto rgb_user_alloc_fail;
- rgb_regamma = kzalloc(sizeof(*rgb_regamma) * (MAX_HW_POINTS + _EXTRA_POINTS),
- GFP_KERNEL);
+ rgb_regamma = kvzalloc(sizeof(*rgb_regamma) * (MAX_HW_POINTS + _EXTRA_POINTS),
+ GFP_KERNEL);
if (!rgb_regamma)
goto rgb_regamma_alloc_fail;
- axix_x = kzalloc(sizeof(*axix_x) * (ramp->num_entries + 3),
- GFP_KERNEL);
+ axix_x = kvzalloc(sizeof(*axix_x) * (ramp->num_entries + 3),
+ GFP_KERNEL);
if (!axix_x)
goto axix_x_alloc_fail;
- coeff = kzalloc(sizeof(*coeff) * (MAX_HW_POINTS + _EXTRA_POINTS), GFP_KERNEL);
+ coeff = kvzalloc(sizeof(*coeff) * (MAX_HW_POINTS + _EXTRA_POINTS), GFP_KERNEL);
if (!coeff)
goto coeff_alloc_fail;
@@ -1157,13 +1157,13 @@ bool mod_color_calculate_regamma_params(struct dc_transfer_func *output_tf,
ret = true;
- kfree(coeff);
+ kvfree(coeff);
coeff_alloc_fail:
- kfree(axix_x);
+ kvfree(axix_x);
axix_x_alloc_fail:
- kfree(rgb_regamma);
+ kvfree(rgb_regamma);
rgb_regamma_alloc_fail:
- kfree(rgb_user);
+ kvfree(rgb_user);
rgb_user_alloc_fail:
return ret;
}
@@ -1192,19 +1192,19 @@ bool mod_color_calculate_degamma_params(struct dc_transfer_func *input_tf,
input_tf->type = TF_TYPE_DISTRIBUTED_POINTS;
- rgb_user = kzalloc(sizeof(*rgb_user) * (ramp->num_entries + _EXTRA_POINTS),
- GFP_KERNEL);
+ rgb_user = kvzalloc(sizeof(*rgb_user) * (ramp->num_entries + _EXTRA_POINTS),
+ GFP_KERNEL);
if (!rgb_user)
goto rgb_user_alloc_fail;
- curve = kzalloc(sizeof(*curve) * (MAX_HW_POINTS + _EXTRA_POINTS),
- GFP_KERNEL);
+ curve = kvzalloc(sizeof(*curve) * (MAX_HW_POINTS + _EXTRA_POINTS),
+ GFP_KERNEL);
if (!curve)
goto curve_alloc_fail;
- axix_x = kzalloc(sizeof(*axix_x) * (ramp->num_entries + _EXTRA_POINTS),
- GFP_KERNEL);
+ axix_x = kvzalloc(sizeof(*axix_x) * (ramp->num_entries + _EXTRA_POINTS),
+ GFP_KERNEL);
if (!axix_x)
goto axix_x_alloc_fail;
- coeff = kzalloc(sizeof(*coeff) * (MAX_HW_POINTS + _EXTRA_POINTS), GFP_KERNEL);
+ coeff = kvzalloc(sizeof(*coeff) * (MAX_HW_POINTS + _EXTRA_POINTS), GFP_KERNEL);
if (!coeff)
goto coeff_alloc_fail;
@@ -1246,13 +1246,13 @@ bool mod_color_calculate_degamma_params(struct dc_transfer_func *input_tf,
ret = true;
- kfree(coeff);
+ kvfree(coeff);
coeff_alloc_fail:
- kfree(axix_x);
+ kvfree(axix_x);
axix_x_alloc_fail:
- kfree(curve);
+ kvfree(curve);
curve_alloc_fail:
- kfree(rgb_user);
+ kvfree(rgb_user);
rgb_user_alloc_fail:
return ret;
@@ -1281,8 +1281,9 @@ bool mod_color_calculate_curve(enum dc_transfer_func_predefined trans,
}
ret = true;
} else if (trans == TRANSFER_FUNCTION_PQ) {
- rgb_regamma = kzalloc(sizeof(*rgb_regamma) * (MAX_HW_POINTS +
- _EXTRA_POINTS), GFP_KERNEL);
+ rgb_regamma = kvzalloc(sizeof(*rgb_regamma) *
+ (MAX_HW_POINTS + _EXTRA_POINTS),
+ GFP_KERNEL);
if (!rgb_regamma)
goto rgb_regamma_alloc_fail;
points->end_exponent = 7;
@@ -1302,11 +1303,12 @@ bool mod_color_calculate_curve(enum dc_transfer_func_predefined trans,
}
ret = true;
- kfree(rgb_regamma);
+ kvfree(rgb_regamma);
} else if (trans == TRANSFER_FUNCTION_SRGB ||
trans == TRANSFER_FUNCTION_BT709) {
- rgb_regamma = kzalloc(sizeof(*rgb_regamma) * (MAX_HW_POINTS +
- _EXTRA_POINTS), GFP_KERNEL);
+ rgb_regamma = kvzalloc(sizeof(*rgb_regamma) *
+ (MAX_HW_POINTS + _EXTRA_POINTS),
+ GFP_KERNEL);
if (!rgb_regamma)
goto rgb_regamma_alloc_fail;
points->end_exponent = 0;
@@ -1324,7 +1326,7 @@ bool mod_color_calculate_curve(enum dc_transfer_func_predefined trans,
}
ret = true;
- kfree(rgb_regamma);
+ kvfree(rgb_regamma);
}
rgb_regamma_alloc_fail:
return ret;
@@ -1348,8 +1350,9 @@ bool mod_color_calculate_degamma_curve(enum dc_transfer_func_predefined trans,
}
ret = true;
} else if (trans == TRANSFER_FUNCTION_PQ) {
- rgb_degamma = kzalloc(sizeof(*rgb_degamma) * (MAX_HW_POINTS +
- _EXTRA_POINTS), GFP_KERNEL);
+ rgb_degamma = kvzalloc(sizeof(*rgb_degamma) *
+ (MAX_HW_POINTS + _EXTRA_POINTS),
+ GFP_KERNEL);
if (!rgb_degamma)
goto rgb_degamma_alloc_fail;
@@ -1364,11 +1367,12 @@ bool mod_color_calculate_degamma_curve(enum dc_transfer_func_predefined trans,
}
ret = true;
- kfree(rgb_degamma);
+ kvfree(rgb_degamma);
} else if (trans == TRANSFER_FUNCTION_SRGB ||
trans == TRANSFER_FUNCTION_BT709) {
- rgb_degamma = kzalloc(sizeof(*rgb_degamma) * (MAX_HW_POINTS +
- _EXTRA_POINTS), GFP_KERNEL);
+ rgb_degamma = kvzalloc(sizeof(*rgb_degamma) *
+ (MAX_HW_POINTS + _EXTRA_POINTS),
+ GFP_KERNEL);
if (!rgb_degamma)
goto rgb_degamma_alloc_fail;
@@ -1382,7 +1386,7 @@ bool mod_color_calculate_degamma_curve(enum dc_transfer_func_predefined trans,
}
ret = true;
- kfree(rgb_degamma);
+ kvfree(rgb_degamma);
}
points->end_exponent = 0;
points->x_point_at_y1_red = 1;
diff --git a/drivers/gpu/drm/amd/include/atomfirmware.h b/drivers/gpu/drm/amd/include/atomfirmware.h
index 0f5ad54d3fd3..de177ce8ca80 100644
--- a/drivers/gpu/drm/amd/include/atomfirmware.h
+++ b/drivers/gpu/drm/amd/include/atomfirmware.h
@@ -501,6 +501,32 @@ enum atom_cooling_solution_id{
LIQUID_COOLING = 0x01
};
+struct atom_firmware_info_v3_2 {
+ struct atom_common_table_header table_header;
+ uint32_t firmware_revision;
+ uint32_t bootup_sclk_in10khz;
+ uint32_t bootup_mclk_in10khz;
+ uint32_t firmware_capability; // enum atombios_firmware_capability
+ uint32_t main_call_parser_entry; /* direct address of main parser call in VBIOS binary. */
+ uint32_t bios_scratch_reg_startaddr; // 1st bios scratch register dword address
+ uint16_t bootup_vddc_mv;
+ uint16_t bootup_vddci_mv;
+ uint16_t bootup_mvddc_mv;
+ uint16_t bootup_vddgfx_mv;
+ uint8_t mem_module_id;
+ uint8_t coolingsolution_id; /*0: Air cooling; 1: Liquid cooling ... */
+ uint8_t reserved1[2];
+ uint32_t mc_baseaddr_high;
+ uint32_t mc_baseaddr_low;
+ uint8_t board_i2c_feature_id; // enum of atom_board_i2c_feature_id_def
+ uint8_t board_i2c_feature_gpio_id; // i2c id find in gpio_lut data table gpio_id
+ uint8_t board_i2c_feature_slave_addr;
+ uint8_t reserved3;
+ uint16_t bootup_mvddq_mv;
+ uint16_t bootup_mvpp_mv;
+ uint32_t zfbstartaddrin16mb;
+ uint32_t reserved2[3];
+};
/*
***************************************************************************
@@ -1169,7 +1195,29 @@ struct atom_gfx_info_v2_2
uint32_t rlc_gpu_timer_refclk;
};
-
+struct atom_gfx_info_v2_3 {
+ struct atom_common_table_header table_header;
+ uint8_t gfxip_min_ver;
+ uint8_t gfxip_max_ver;
+ uint8_t max_shader_engines;
+ uint8_t max_tile_pipes;
+ uint8_t max_cu_per_sh;
+ uint8_t max_sh_per_se;
+ uint8_t max_backends_per_se;
+ uint8_t max_texture_channel_caches;
+ uint32_t regaddr_cp_dma_src_addr;
+ uint32_t regaddr_cp_dma_src_addr_hi;
+ uint32_t regaddr_cp_dma_dst_addr;
+ uint32_t regaddr_cp_dma_dst_addr_hi;
+ uint32_t regaddr_cp_dma_command;
+ uint32_t regaddr_cp_status;
+ uint32_t regaddr_rlc_gpu_clock_32;
+ uint32_t rlc_gpu_timer_refclk;
+ uint8_t active_cu_per_sh;
+ uint8_t active_rb_per_se;
+ uint16_t gcgoldenoffset;
+ uint32_t rm21_sram_vmin_value;
+};
/*
***************************************************************************
@@ -1198,6 +1246,76 @@ struct atom_smu_info_v3_1
uint8_t fw_ctf_polarity; // GPIO polarity for CTF
};
+struct atom_smu_info_v3_2 {
+ struct atom_common_table_header table_header;
+ uint8_t smuip_min_ver;
+ uint8_t smuip_max_ver;
+ uint8_t smu_rsd1;
+ uint8_t gpuclk_ss_mode;
+ uint16_t sclk_ss_percentage;
+ uint16_t sclk_ss_rate_10hz;
+ uint16_t gpuclk_ss_percentage; // in unit of 0.001%
+ uint16_t gpuclk_ss_rate_10hz;
+ uint32_t core_refclk_10khz;
+ uint8_t ac_dc_gpio_bit; // GPIO bit shift in SMU_GPIOPAD_A configured for AC/DC switching, =0xff means invalid
+ uint8_t ac_dc_polarity; // GPIO polarity for AC/DC switching
+ uint8_t vr0hot_gpio_bit; // GPIO bit shift in SMU_GPIOPAD_A configured for VR0 HOT event, =0xff means invalid
+ uint8_t vr0hot_polarity; // GPIO polarity for VR0 HOT event
+ uint8_t vr1hot_gpio_bit; // GPIO bit shift in SMU_GPIOPAD_A configured for VR1 HOT event , =0xff means invalid
+ uint8_t vr1hot_polarity; // GPIO polarity for VR1 HOT event
+ uint8_t fw_ctf_gpio_bit; // GPIO bit shift in SMU_GPIOPAD_A configured for CTF, =0xff means invalid
+ uint8_t fw_ctf_polarity; // GPIO polarity for CTF
+ uint8_t pcc_gpio_bit; // GPIO bit shift in SMU_GPIOPAD_A configured for PCC, =0xff means invalid
+ uint8_t pcc_gpio_polarity; // GPIO polarity for CTF
+ uint16_t smugoldenoffset;
+ uint32_t gpupll_vco_freq_10khz;
+ uint32_t bootup_smnclk_10khz;
+ uint32_t bootup_socclk_10khz;
+ uint32_t bootup_mp0clk_10khz;
+ uint32_t bootup_mp1clk_10khz;
+ uint32_t bootup_lclk_10khz;
+ uint32_t bootup_dcefclk_10khz;
+ uint32_t ctf_threshold_override_value;
+ uint32_t reserved[5];
+};
+
+struct atom_smu_info_v3_3 {
+ struct atom_common_table_header table_header;
+ uint8_t smuip_min_ver;
+ uint8_t smuip_max_ver;
+ uint8_t smu_rsd1;
+ uint8_t gpuclk_ss_mode;
+ uint16_t sclk_ss_percentage;
+ uint16_t sclk_ss_rate_10hz;
+ uint16_t gpuclk_ss_percentage; // in unit of 0.001%
+ uint16_t gpuclk_ss_rate_10hz;
+ uint32_t core_refclk_10khz;
+ uint8_t ac_dc_gpio_bit; // GPIO bit shift in SMU_GPIOPAD_A configured for AC/DC switching, =0xff means invalid
+ uint8_t ac_dc_polarity; // GPIO polarity for AC/DC switching
+ uint8_t vr0hot_gpio_bit; // GPIO bit shift in SMU_GPIOPAD_A configured for VR0 HOT event, =0xff means invalid
+ uint8_t vr0hot_polarity; // GPIO polarity for VR0 HOT event
+ uint8_t vr1hot_gpio_bit; // GPIO bit shift in SMU_GPIOPAD_A configured for VR1 HOT event , =0xff means invalid
+ uint8_t vr1hot_polarity; // GPIO polarity for VR1 HOT event
+ uint8_t fw_ctf_gpio_bit; // GPIO bit shift in SMU_GPIOPAD_A configured for CTF, =0xff means invalid
+ uint8_t fw_ctf_polarity; // GPIO polarity for CTF
+ uint8_t pcc_gpio_bit; // GPIO bit shift in SMU_GPIOPAD_A configured for PCC, =0xff means invalid
+ uint8_t pcc_gpio_polarity; // GPIO polarity for CTF
+ uint16_t smugoldenoffset;
+ uint32_t gpupll_vco_freq_10khz;
+ uint32_t bootup_smnclk_10khz;
+ uint32_t bootup_socclk_10khz;
+ uint32_t bootup_mp0clk_10khz;
+ uint32_t bootup_mp1clk_10khz;
+ uint32_t bootup_lclk_10khz;
+ uint32_t bootup_dcefclk_10khz;
+ uint32_t ctf_threshold_override_value;
+ uint32_t syspll3_0_vco_freq_10khz;
+ uint32_t syspll3_1_vco_freq_10khz;
+ uint32_t bootup_fclk_10khz;
+ uint32_t bootup_waflclk_10khz;
+ uint32_t reserved[3];
+};
+
/*
***************************************************************************
Data Table smc_dpm_info structure
@@ -1283,7 +1401,6 @@ struct atom_smc_dpm_info_v4_1
uint32_t boardreserved[10];
};
-
/*
***************************************************************************
Data Table asic_profiling_info structure
@@ -1864,6 +1981,55 @@ enum atom_smu9_syspll0_clock_id
SMU9_SYSPLL0_DISPCLK_ID = 11, // DISPCLK
};
+enum atom_smu11_syspll_id {
+ SMU11_SYSPLL0_ID = 0,
+ SMU11_SYSPLL1_0_ID = 1,
+ SMU11_SYSPLL1_1_ID = 2,
+ SMU11_SYSPLL1_2_ID = 3,
+ SMU11_SYSPLL2_ID = 4,
+ SMU11_SYSPLL3_0_ID = 5,
+ SMU11_SYSPLL3_1_ID = 6,
+};
+
+
+enum atom_smu11_syspll0_clock_id {
+ SMU11_SYSPLL0_SOCCLK_ID = 0, // SOCCLK
+ SMU11_SYSPLL0_MP0CLK_ID = 1, // MP0CLK
+ SMU11_SYSPLL0_DCLK_ID = 2, // DCLK
+ SMU11_SYSPLL0_VCLK_ID = 3, // VCLK
+ SMU11_SYSPLL0_ECLK_ID = 4, // ECLK
+ SMU11_SYSPLL0_DCEFCLK_ID = 5, // DCEFCLK
+};
+
+
+enum atom_smu11_syspll1_0_clock_id {
+ SMU11_SYSPLL1_0_UCLKA_ID = 0, // UCLK_a
+};
+
+enum atom_smu11_syspll1_1_clock_id {
+ SMU11_SYSPLL1_0_UCLKB_ID = 0, // UCLK_b
+};
+
+enum atom_smu11_syspll1_2_clock_id {
+ SMU11_SYSPLL1_0_FCLK_ID = 0, // FCLK
+};
+
+enum atom_smu11_syspll2_clock_id {
+ SMU11_SYSPLL2_GFXCLK_ID = 0, // GFXCLK
+};
+
+enum atom_smu11_syspll3_0_clock_id {
+ SMU11_SYSPLL3_0_WAFCLK_ID = 0, // WAFCLK
+ SMU11_SYSPLL3_0_DISPCLK_ID = 1, // DISPCLK
+ SMU11_SYSPLL3_0_DPREFCLK_ID = 2, // DPREFCLK
+};
+
+enum atom_smu11_syspll3_1_clock_id {
+ SMU11_SYSPLL3_1_MP1CLK_ID = 0, // MP1CLK
+ SMU11_SYSPLL3_1_SMNCLK_ID = 1, // SMNCLK
+ SMU11_SYSPLL3_1_LCLK_ID = 2, // LCLK
+};
+
struct atom_get_smu_clock_info_output_parameters_v3_1
{
union {
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
index 26fbeafc3c96..18b5b2ff47fe 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
@@ -79,12 +79,13 @@
#define PCIE_BUS_CLK 10000
#define TCLK (PCIE_BUS_CLK / 10)
-static const struct profile_mode_setting smu7_profiling[5] =
+static const struct profile_mode_setting smu7_profiling[6] =
{{1, 0, 100, 30, 1, 0, 100, 10},
{1, 10, 0, 30, 0, 0, 0, 0},
{0, 0, 0, 0, 1, 10, 16, 31},
{1, 0, 11, 50, 1, 0, 100, 10},
{1, 0, 5, 30, 0, 0, 0, 0},
+ {0, 0, 0, 0, 0, 0, 0, 0},
};
/** Values for the CG_THERMAL_CTRL::DPM_EVENT_SRC field. */
@@ -4864,6 +4865,17 @@ static int smu7_get_power_profile_mode(struct pp_hwmgr *hwmgr, char *buf)
len = sizeof(smu7_profiling) / sizeof(struct profile_mode_setting);
for (i = 0; i < len; i++) {
+ if (i == hwmgr->power_profile_mode) {
+ size += sprintf(buf + size, "%3d %14s %s: %8d %16d %16d %16d %16d %16d\n",
+ i, profile_name[i], "*",
+ data->current_profile_setting.sclk_up_hyst,
+ data->current_profile_setting.sclk_down_hyst,
+ data->current_profile_setting.sclk_activity,
+ data->current_profile_setting.mclk_up_hyst,
+ data->current_profile_setting.mclk_down_hyst,
+ data->current_profile_setting.mclk_activity);
+ continue;
+ }
if (smu7_profiling[i].bupdate_sclk)
size += sprintf(buf + size, "%3d %16s: %8d %16d %16d ",
i, profile_name[i], smu7_profiling[i].sclk_up_hyst,
@@ -4883,24 +4895,6 @@ static int smu7_get_power_profile_mode(struct pp_hwmgr *hwmgr, char *buf)
"-", "-", "-");
}
- size += sprintf(buf + size, "%3d %16s: %8d %16d %16d %16d %16d %16d\n",
- i, profile_name[i],
- data->custom_profile_setting.sclk_up_hyst,
- data->custom_profile_setting.sclk_down_hyst,
- data->custom_profile_setting.sclk_activity,
- data->custom_profile_setting.mclk_up_hyst,
- data->custom_profile_setting.mclk_down_hyst,
- data->custom_profile_setting.mclk_activity);
-
- size += sprintf(buf + size, "%3s %16s: %8d %16d %16d %16d %16d %16d\n",
- "*", "CURRENT",
- data->current_profile_setting.sclk_up_hyst,
- data->current_profile_setting.sclk_down_hyst,
- data->current_profile_setting.sclk_activity,
- data->current_profile_setting.mclk_up_hyst,
- data->current_profile_setting.mclk_down_hyst,
- data->current_profile_setting.mclk_activity);
-
return size;
}
@@ -4939,16 +4933,16 @@ static int smu7_set_power_profile_mode(struct pp_hwmgr *hwmgr, long *input, uint
if (size < 8)
return -EINVAL;
- data->custom_profile_setting.bupdate_sclk = input[0];
- data->custom_profile_setting.sclk_up_hyst = input[1];
- data->custom_profile_setting.sclk_down_hyst = input[2];
- data->custom_profile_setting.sclk_activity = input[3];
- data->custom_profile_setting.bupdate_mclk = input[4];
- data->custom_profile_setting.mclk_up_hyst = input[5];
- data->custom_profile_setting.mclk_down_hyst = input[6];
- data->custom_profile_setting.mclk_activity = input[7];
- if (!smum_update_dpm_settings(hwmgr, &data->custom_profile_setting)) {
- memcpy(&data->current_profile_setting, &data->custom_profile_setting, sizeof(struct profile_mode_setting));
+ tmp.bupdate_sclk = input[0];
+ tmp.sclk_up_hyst = input[1];
+ tmp.sclk_down_hyst = input[2];
+ tmp.sclk_activity = input[3];
+ tmp.bupdate_mclk = input[4];
+ tmp.mclk_up_hyst = input[5];
+ tmp.mclk_down_hyst = input[6];
+ tmp.mclk_activity = input[7];
+ if (!smum_update_dpm_settings(hwmgr, &tmp)) {
+ memcpy(&data->current_profile_setting, &tmp, sizeof(struct profile_mode_setting));
hwmgr->power_profile_mode = mode;
}
break;
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.h b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.h
index f40179c9ca97..b8d0bb378595 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.h
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.h
@@ -325,7 +325,6 @@ struct smu7_hwmgr {
uint16_t mem_latency_high;
uint16_t mem_latency_low;
uint32_t vr_config;
- struct profile_mode_setting custom_profile_setting;
struct profile_mode_setting current_profile_setting;
};
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_powertune.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_powertune.c
index 03bc7453f3b1..d9e92e306535 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_powertune.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_powertune.c
@@ -852,12 +852,10 @@ int smu7_set_power_limit(struct pp_hwmgr *hwmgr, uint32_t n)
{
struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend);
- n = (n & 0xff) << 8;
-
if (data->power_containment_features &
POWERCONTAINMENT_FEATURE_PkgPwrLimit)
return smum_send_msg_to_smc_with_parameter(hwmgr,
- PPSMC_MSG_PkgPwrSetLimit, n);
+ PPSMC_MSG_PkgPwrSetLimit, n<<8);
return 0;
}
diff --git a/drivers/gpu/drm/bridge/Kconfig b/drivers/gpu/drm/bridge/Kconfig
index 3aa65bdecb0e..684ac626ac53 100644
--- a/drivers/gpu/drm/bridge/Kconfig
+++ b/drivers/gpu/drm/bridge/Kconfig
@@ -74,6 +74,7 @@ config DRM_SIL_SII8620
tristate "Silicon Image SII8620 HDMI/MHL bridge"
depends on OF && RC_CORE
select DRM_KMS_HELPER
+ imply EXTCON
help
Silicon Image SII8620 HDMI/MHL bridge chip driver.
diff --git a/drivers/gpu/drm/bridge/dumb-vga-dac.c b/drivers/gpu/drm/bridge/dumb-vga-dac.c
index 498d5948d1a8..9837c8d69e69 100644
--- a/drivers/gpu/drm/bridge/dumb-vga-dac.c
+++ b/drivers/gpu/drm/bridge/dumb-vga-dac.c
@@ -56,7 +56,9 @@ static int dumb_vga_get_modes(struct drm_connector *connector)
}
drm_mode_connector_update_edid_property(connector, edid);
- return drm_add_edid_modes(connector, edid);
+ ret = drm_add_edid_modes(connector, edid);
+ kfree(edid);
+ return ret;
fallback:
/*
diff --git a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c
index ec8d0006ef7c..3c136f2b954f 100644
--- a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c
+++ b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c
@@ -2077,7 +2077,7 @@ static irqreturn_t dw_hdmi_hardirq(int irq, void *dev_id)
return ret;
}
-void __dw_hdmi_setup_rx_sense(struct dw_hdmi *hdmi, bool hpd, bool rx_sense)
+void dw_hdmi_setup_rx_sense(struct dw_hdmi *hdmi, bool hpd, bool rx_sense)
{
mutex_lock(&hdmi->mutex);
@@ -2103,13 +2103,6 @@ void __dw_hdmi_setup_rx_sense(struct dw_hdmi *hdmi, bool hpd, bool rx_sense)
}
mutex_unlock(&hdmi->mutex);
}
-
-void dw_hdmi_setup_rx_sense(struct device *dev, bool hpd, bool rx_sense)
-{
- struct dw_hdmi *hdmi = dev_get_drvdata(dev);
-
- __dw_hdmi_setup_rx_sense(hdmi, hpd, rx_sense);
-}
EXPORT_SYMBOL_GPL(dw_hdmi_setup_rx_sense);
static irqreturn_t dw_hdmi_irq(int irq, void *dev_id)
@@ -2145,9 +2138,9 @@ static irqreturn_t dw_hdmi_irq(int irq, void *dev_id)
*/
if (intr_stat &
(HDMI_IH_PHY_STAT0_RX_SENSE | HDMI_IH_PHY_STAT0_HPD)) {
- __dw_hdmi_setup_rx_sense(hdmi,
- phy_stat & HDMI_PHY_HPD,
- phy_stat & HDMI_PHY_RX_SENSE);
+ dw_hdmi_setup_rx_sense(hdmi,
+ phy_stat & HDMI_PHY_HPD,
+ phy_stat & HDMI_PHY_RX_SENSE);
if ((phy_stat & (HDMI_PHY_RX_SENSE | HDMI_PHY_HPD)) == 0)
cec_notifier_set_phys_addr(hdmi->cec_notifier,
diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c
index 7d25c42f22db..c825c76edc1d 100644
--- a/drivers/gpu/drm/drm_atomic.c
+++ b/drivers/gpu/drm/drm_atomic.c
@@ -155,6 +155,8 @@ void drm_atomic_state_default_clear(struct drm_atomic_state *state)
state->connectors[i].state);
state->connectors[i].ptr = NULL;
state->connectors[i].state = NULL;
+ state->connectors[i].old_state = NULL;
+ state->connectors[i].new_state = NULL;
drm_connector_put(connector);
}
@@ -169,6 +171,8 @@ void drm_atomic_state_default_clear(struct drm_atomic_state *state)
state->crtcs[i].ptr = NULL;
state->crtcs[i].state = NULL;
+ state->crtcs[i].old_state = NULL;
+ state->crtcs[i].new_state = NULL;
}
for (i = 0; i < config->num_total_plane; i++) {
@@ -181,6 +185,8 @@ void drm_atomic_state_default_clear(struct drm_atomic_state *state)
state->planes[i].state);
state->planes[i].ptr = NULL;
state->planes[i].state = NULL;
+ state->planes[i].old_state = NULL;
+ state->planes[i].new_state = NULL;
}
for (i = 0; i < state->num_private_objs; i++) {
@@ -190,6 +196,8 @@ void drm_atomic_state_default_clear(struct drm_atomic_state *state)
state->private_objs[i].state);
state->private_objs[i].ptr = NULL;
state->private_objs[i].state = NULL;
+ state->private_objs[i].old_state = NULL;
+ state->private_objs[i].new_state = NULL;
}
state->num_private_objs = 0;
diff --git a/drivers/gpu/drm/drm_dp_helper.c b/drivers/gpu/drm/drm_dp_helper.c
index ffe14ec3e7f2..70ae1f232331 100644
--- a/drivers/gpu/drm/drm_dp_helper.c
+++ b/drivers/gpu/drm/drm_dp_helper.c
@@ -1145,6 +1145,7 @@ int drm_dp_psr_setup_time(const u8 psr_cap[EDP_PSR_RECEIVER_CAP_SIZE])
static const u16 psr_setup_time_us[] = {
PSR_SETUP_TIME(330),
PSR_SETUP_TIME(275),
+ PSR_SETUP_TIME(220),
PSR_SETUP_TIME(165),
PSR_SETUP_TIME(110),
PSR_SETUP_TIME(55),
diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c
index a1b9338736e3..c2c21d839727 100644
--- a/drivers/gpu/drm/drm_drv.c
+++ b/drivers/gpu/drm/drm_drv.c
@@ -716,7 +716,7 @@ static void remove_compat_control_link(struct drm_device *dev)
if (!minor)
return;
- name = kasprintf(GFP_KERNEL, "controlD%d", minor->index);
+ name = kasprintf(GFP_KERNEL, "controlD%d", minor->index + 64);
if (!name)
return;
diff --git a/drivers/gpu/drm/drm_dumb_buffers.c b/drivers/gpu/drm/drm_dumb_buffers.c
index 39ac15ce4702..9e2ae02f31e0 100644
--- a/drivers/gpu/drm/drm_dumb_buffers.c
+++ b/drivers/gpu/drm/drm_dumb_buffers.c
@@ -65,12 +65,13 @@ int drm_mode_create_dumb_ioctl(struct drm_device *dev,
return -EINVAL;
/* overflow checks for 32bit size calculations */
- /* NOTE: DIV_ROUND_UP() can overflow */
+ if (args->bpp > U32_MAX - 8)
+ return -EINVAL;
cpp = DIV_ROUND_UP(args->bpp, 8);
- if (!cpp || cpp > 0xffffffffU / args->width)
+ if (cpp > U32_MAX / args->width)
return -EINVAL;
stride = cpp * args->width;
- if (args->height > 0xffffffffU / stride)
+ if (args->height > U32_MAX / stride)
return -EINVAL;
/* test for wrap-around */
diff --git a/drivers/gpu/drm/drm_file.c b/drivers/gpu/drm/drm_file.c
index e394799979a6..6d9b9453707c 100644
--- a/drivers/gpu/drm/drm_file.c
+++ b/drivers/gpu/drm/drm_file.c
@@ -212,6 +212,7 @@ static int drm_open_helper(struct file *filp, struct drm_minor *minor)
return -ENOMEM;
filp->private_data = priv;
+ filp->f_mode |= FMODE_UNSIGNED_OFFSET;
priv->filp = filp;
priv->pid = get_pid(task_pid(current));
priv->minor = minor;
diff --git a/drivers/gpu/drm/exynos/exynos_hdmi.c b/drivers/gpu/drm/exynos/exynos_hdmi.c
index abd84cbcf1c2..09c4bc0b1859 100644
--- a/drivers/gpu/drm/exynos/exynos_hdmi.c
+++ b/drivers/gpu/drm/exynos/exynos_hdmi.c
@@ -954,8 +954,6 @@ static int hdmi_create_connector(struct drm_encoder *encoder)
drm_mode_connector_attach_encoder(connector, encoder);
if (hdata->bridge) {
- encoder->bridge = hdata->bridge;
- hdata->bridge->encoder = encoder;
ret = drm_bridge_attach(encoder, hdata->bridge, NULL);
if (ret)
DRM_ERROR("Failed to attach bridge\n");
diff --git a/drivers/gpu/drm/exynos/exynos_mixer.c b/drivers/gpu/drm/exynos/exynos_mixer.c
index 257299ec95c4..272c79f5f5bf 100644
--- a/drivers/gpu/drm/exynos/exynos_mixer.c
+++ b/drivers/gpu/drm/exynos/exynos_mixer.c
@@ -473,7 +473,7 @@ static void vp_video_buffer(struct mixer_context *ctx,
chroma_addr[1] = chroma_addr[0] + 0x40;
} else {
luma_addr[1] = luma_addr[0] + fb->pitches[0];
- chroma_addr[1] = chroma_addr[0] + fb->pitches[0];
+ chroma_addr[1] = chroma_addr[0] + fb->pitches[1];
}
} else {
luma_addr[1] = 0;
@@ -482,6 +482,7 @@ static void vp_video_buffer(struct mixer_context *ctx,
spin_lock_irqsave(&ctx->reg_slock, flags);
+ vp_reg_write(ctx, VP_SHADOW_UPDATE, 1);
/* interlace or progressive scan mode */
val = (test_bit(MXR_BIT_INTERLACE, &ctx->flags) ? ~0 : 0);
vp_reg_writemask(ctx, VP_MODE, val, VP_MODE_LINE_SKIP);
@@ -495,21 +496,23 @@ static void vp_video_buffer(struct mixer_context *ctx,
vp_reg_write(ctx, VP_IMG_SIZE_Y, VP_IMG_HSIZE(fb->pitches[0]) |
VP_IMG_VSIZE(fb->height));
/* chroma plane for NV12/NV21 is half the height of the luma plane */
- vp_reg_write(ctx, VP_IMG_SIZE_C, VP_IMG_HSIZE(fb->pitches[0]) |
+ vp_reg_write(ctx, VP_IMG_SIZE_C, VP_IMG_HSIZE(fb->pitches[1]) |
VP_IMG_VSIZE(fb->height / 2));
vp_reg_write(ctx, VP_SRC_WIDTH, state->src.w);
- vp_reg_write(ctx, VP_SRC_HEIGHT, state->src.h);
vp_reg_write(ctx, VP_SRC_H_POSITION,
VP_SRC_H_POSITION_VAL(state->src.x));
- vp_reg_write(ctx, VP_SRC_V_POSITION, state->src.y);
-
vp_reg_write(ctx, VP_DST_WIDTH, state->crtc.w);
vp_reg_write(ctx, VP_DST_H_POSITION, state->crtc.x);
+
if (test_bit(MXR_BIT_INTERLACE, &ctx->flags)) {
+ vp_reg_write(ctx, VP_SRC_HEIGHT, state->src.h / 2);
+ vp_reg_write(ctx, VP_SRC_V_POSITION, state->src.y / 2);
vp_reg_write(ctx, VP_DST_HEIGHT, state->crtc.h / 2);
vp_reg_write(ctx, VP_DST_V_POSITION, state->crtc.y / 2);
} else {
+ vp_reg_write(ctx, VP_SRC_HEIGHT, state->src.h);
+ vp_reg_write(ctx, VP_SRC_V_POSITION, state->src.y);
vp_reg_write(ctx, VP_DST_HEIGHT, state->crtc.h);
vp_reg_write(ctx, VP_DST_V_POSITION, state->crtc.y);
}
@@ -699,6 +702,15 @@ static irqreturn_t mixer_irq_handler(int irq, void *arg)
/* interlace scan need to check shadow register */
if (test_bit(MXR_BIT_INTERLACE, &ctx->flags)) {
+ if (test_bit(MXR_BIT_VP_ENABLED, &ctx->flags) &&
+ vp_reg_read(ctx, VP_SHADOW_UPDATE))
+ goto out;
+
+ base = mixer_reg_read(ctx, MXR_CFG);
+ shadow = mixer_reg_read(ctx, MXR_CFG_S);
+ if (base != shadow)
+ goto out;
+
base = mixer_reg_read(ctx, MXR_GRAPHIC_BASE(0));
shadow = mixer_reg_read(ctx, MXR_GRAPHIC_BASE_S(0));
if (base != shadow)
diff --git a/drivers/gpu/drm/exynos/regs-mixer.h b/drivers/gpu/drm/exynos/regs-mixer.h
index c311f571bdf9..189cfa2470a8 100644
--- a/drivers/gpu/drm/exynos/regs-mixer.h
+++ b/drivers/gpu/drm/exynos/regs-mixer.h
@@ -47,6 +47,7 @@
#define MXR_MO 0x0304
#define MXR_RESOLUTION 0x0310
+#define MXR_CFG_S 0x2004
#define MXR_GRAPHIC0_BASE_S 0x2024
#define MXR_GRAPHIC1_BASE_S 0x2044
diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
index d596a8302ca3..854bd51b9478 100644
--- a/drivers/gpu/drm/i915/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
@@ -778,6 +778,9 @@ i915_gem_userptr_ioctl(struct drm_device *dev,
I915_USERPTR_UNSYNCHRONIZED))
return -EINVAL;
+ if (!args->user_size)
+ return -EINVAL;
+
if (offset_in_page(args->user_ptr | args->user_size))
return -EINVAL;
diff --git a/drivers/gpu/drm/i915/i915_query.c b/drivers/gpu/drm/i915/i915_query.c
index 3ace929dd90f..3f502eef2431 100644
--- a/drivers/gpu/drm/i915/i915_query.c
+++ b/drivers/gpu/drm/i915/i915_query.c
@@ -4,6 +4,8 @@
* Copyright © 2018 Intel Corporation
*/
+#include <linux/nospec.h>
+
#include "i915_drv.h"
#include "i915_query.h"
#include <uapi/drm/i915_drm.h>
@@ -100,7 +102,7 @@ int i915_query_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
for (i = 0; i < args->num_items; i++, user_item_ptr++) {
struct drm_i915_query_item item;
- u64 func_idx;
+ unsigned long func_idx;
int ret;
if (copy_from_user(&item, user_item_ptr, sizeof(item)))
@@ -109,12 +111,17 @@ int i915_query_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
if (item.query_id == 0)
return -EINVAL;
+ if (overflows_type(item.query_id - 1, unsigned long))
+ return -EINVAL;
+
func_idx = item.query_id - 1;
- if (func_idx < ARRAY_SIZE(i915_query_funcs))
+ ret = -EINVAL;
+ if (func_idx < ARRAY_SIZE(i915_query_funcs)) {
+ func_idx = array_index_nospec(func_idx,
+ ARRAY_SIZE(i915_query_funcs));
ret = i915_query_funcs[func_idx](dev_priv, &item);
- else
- ret = -EINVAL;
+ }
/* Only write the length back to userspace if they differ. */
if (ret != item.length && put_user(ret, &user_item_ptr->length))
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index e6a8c0ee7df1..8a69a9275e28 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -7326,6 +7326,9 @@ enum {
#define SLICE_ECO_CHICKEN0 _MMIO(0x7308)
#define PIXEL_MASK_CAMMING_DISABLE (1 << 14)
+#define GEN9_WM_CHICKEN3 _MMIO(0x5588)
+#define GEN9_FACTOR_IN_CLR_VAL_HIZ (1 << 9)
+
/* WaCatErrorRejectionIssue */
#define GEN7_SQ_CHICKEN_MBCUNIT_CONFIG _MMIO(0x9030)
#define GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB (1<<11)
diff --git a/drivers/gpu/drm/i915/intel_cdclk.c b/drivers/gpu/drm/i915/intel_cdclk.c
index 32d24c69da3c..704ddb4d3ca7 100644
--- a/drivers/gpu/drm/i915/intel_cdclk.c
+++ b/drivers/gpu/drm/i915/intel_cdclk.c
@@ -2302,9 +2302,44 @@ static int bdw_modeset_calc_cdclk(struct drm_atomic_state *state)
return 0;
}
+static int skl_dpll0_vco(struct intel_atomic_state *intel_state)
+{
+ struct drm_i915_private *dev_priv = to_i915(intel_state->base.dev);
+ struct intel_crtc *crtc;
+ struct intel_crtc_state *crtc_state;
+ int vco, i;
+
+ vco = intel_state->cdclk.logical.vco;
+ if (!vco)
+ vco = dev_priv->skl_preferred_vco_freq;
+
+ for_each_new_intel_crtc_in_state(intel_state, crtc, crtc_state, i) {
+ if (!crtc_state->base.enable)
+ continue;
+
+ if (!intel_crtc_has_type(crtc_state, INTEL_OUTPUT_EDP))
+ continue;
+
+ /*
+ * DPLL0 VCO may need to be adjusted to get the correct
+ * clock for eDP. This will affect cdclk as well.
+ */
+ switch (crtc_state->port_clock / 2) {
+ case 108000:
+ case 216000:
+ vco = 8640000;
+ break;
+ default:
+ vco = 8100000;
+ break;
+ }
+ }
+
+ return vco;
+}
+
static int skl_modeset_calc_cdclk(struct drm_atomic_state *state)
{
- struct drm_i915_private *dev_priv = to_i915(state->dev);
struct intel_atomic_state *intel_state = to_intel_atomic_state(state);
int min_cdclk, cdclk, vco;
@@ -2312,9 +2347,7 @@ static int skl_modeset_calc_cdclk(struct drm_atomic_state *state)
if (min_cdclk < 0)
return min_cdclk;
- vco = intel_state->cdclk.logical.vco;
- if (!vco)
- vco = dev_priv->skl_preferred_vco_freq;
+ vco = skl_dpll0_vco(intel_state);
/*
* FIXME should also account for plane ratio
diff --git a/drivers/gpu/drm/i915/intel_csr.c b/drivers/gpu/drm/i915/intel_csr.c
index 41e6c75a7f3c..f9550ea46c26 100644
--- a/drivers/gpu/drm/i915/intel_csr.c
+++ b/drivers/gpu/drm/i915/intel_csr.c
@@ -35,6 +35,7 @@
*/
#define I915_CSR_GLK "i915/glk_dmc_ver1_04.bin"
+MODULE_FIRMWARE(I915_CSR_GLK);
#define GLK_CSR_VERSION_REQUIRED CSR_VERSION(1, 4)
#define I915_CSR_CNL "i915/cnl_dmc_ver1_07.bin"
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 3b48fd2561fe..56004ffbd8bb 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -15178,6 +15178,8 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev)
memset(&crtc->base.mode, 0, sizeof(crtc->base.mode));
if (crtc_state->base.active) {
intel_mode_from_pipe_config(&crtc->base.mode, crtc_state);
+ crtc->base.mode.hdisplay = crtc_state->pipe_src_w;
+ crtc->base.mode.vdisplay = crtc_state->pipe_src_h;
intel_mode_from_pipe_config(&crtc_state->base.adjusted_mode, crtc_state);
WARN_ON(drm_atomic_set_mode_for_crtc(crtc->base.state, &crtc->base.mode));
diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
index 9a4a51e79fa1..b7b4cfdeb974 100644
--- a/drivers/gpu/drm/i915/intel_dp.c
+++ b/drivers/gpu/drm/i915/intel_dp.c
@@ -1881,26 +1881,6 @@ found:
reduce_m_n);
}
- /*
- * DPLL0 VCO may need to be adjusted to get the correct
- * clock for eDP. This will affect cdclk as well.
- */
- if (intel_dp_is_edp(intel_dp) && IS_GEN9_BC(dev_priv)) {
- int vco;
-
- switch (pipe_config->port_clock / 2) {
- case 108000:
- case 216000:
- vco = 8640000;
- break;
- default:
- vco = 8100000;
- break;
- }
-
- to_intel_atomic_state(pipe_config->base.state)->cdclk.logical.vco = vco;
- }
-
if (!HAS_DDI(dev_priv))
intel_dp_set_clock(encoder, pipe_config);
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index 4ba139c27fba..f7c25828d3bb 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -1149,6 +1149,10 @@ static int gen9_init_workarounds(struct intel_engine_cs *engine)
WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_GPGPU_LEVEL_MASK,
GEN9_PREEMPT_GPGPU_COMMAND_LEVEL);
+ /* WaClearHIZ_WM_CHICKEN3:bxt,glk */
+ if (IS_GEN9_LP(dev_priv))
+ WA_SET_BIT_MASKED(GEN9_WM_CHICKEN3, GEN9_FACTOR_IN_CLR_VAL_HIZ);
+
/* WaVFEStateAfterPipeControlwithMediaStateClear:skl,bxt,glk,cfl */
ret = wa_ring_whitelist_reg(engine, GEN9_CTX_PREEMPT_REG);
if (ret)
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index e3a5f673ff67..8704f7f8d072 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -884,6 +884,7 @@ static void execlists_submission_tasklet(unsigned long data)
head = execlists->csb_head;
tail = READ_ONCE(buf[write_idx]);
+ rmb(); /* Hopefully paired with a wmb() in HW */
}
GEM_TRACE("%s cs-irq head=%d [%d%s], tail=%d [%d%s]\n",
engine->name,
diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c
index d35d2d50f595..e125d16a1aa7 100644
--- a/drivers/gpu/drm/i915/intel_lvds.c
+++ b/drivers/gpu/drm/i915/intel_lvds.c
@@ -326,7 +326,8 @@ static void intel_enable_lvds(struct intel_encoder *encoder,
I915_WRITE(PP_CONTROL(0), I915_READ(PP_CONTROL(0)) | PANEL_POWER_ON);
POSTING_READ(lvds_encoder->reg);
- if (intel_wait_for_register(dev_priv, PP_STATUS(0), PP_ON, PP_ON, 1000))
+
+ if (intel_wait_for_register(dev_priv, PP_STATUS(0), PP_ON, PP_ON, 5000))
DRM_ERROR("timed out waiting for panel to power on\n");
intel_panel_enable_backlight(pipe_config, conn_state);
@@ -573,6 +574,36 @@ exit:
return NOTIFY_OK;
}
+static int
+intel_lvds_connector_register(struct drm_connector *connector)
+{
+ struct intel_lvds_connector *lvds = to_lvds_connector(connector);
+ int ret;
+
+ ret = intel_connector_register(connector);
+ if (ret)
+ return ret;
+
+ lvds->lid_notifier.notifier_call = intel_lid_notify;
+ if (acpi_lid_notifier_register(&lvds->lid_notifier)) {
+ DRM_DEBUG_KMS("lid notifier registration failed\n");
+ lvds->lid_notifier.notifier_call = NULL;
+ }
+
+ return 0;
+}
+
+static void
+intel_lvds_connector_unregister(struct drm_connector *connector)
+{
+ struct intel_lvds_connector *lvds = to_lvds_connector(connector);
+
+ if (lvds->lid_notifier.notifier_call)
+ acpi_lid_notifier_unregister(&lvds->lid_notifier);
+
+ intel_connector_unregister(connector);
+}
+
/**
* intel_lvds_destroy - unregister and free LVDS structures
* @connector: connector to free
@@ -585,9 +616,6 @@ static void intel_lvds_destroy(struct drm_connector *connector)
struct intel_lvds_connector *lvds_connector =
to_lvds_connector(connector);
- if (lvds_connector->lid_notifier.notifier_call)
- acpi_lid_notifier_unregister(&lvds_connector->lid_notifier);
-
if (!IS_ERR_OR_NULL(lvds_connector->base.edid))
kfree(lvds_connector->base.edid);
@@ -608,8 +636,8 @@ static const struct drm_connector_funcs intel_lvds_connector_funcs = {
.fill_modes = drm_helper_probe_single_connector_modes,
.atomic_get_property = intel_digital_connector_atomic_get_property,
.atomic_set_property = intel_digital_connector_atomic_set_property,
- .late_register = intel_connector_register,
- .early_unregister = intel_connector_unregister,
+ .late_register = intel_lvds_connector_register,
+ .early_unregister = intel_lvds_connector_unregister,
.destroy = intel_lvds_destroy,
.atomic_destroy_state = drm_atomic_helper_connector_destroy_state,
.atomic_duplicate_state = intel_digital_connector_duplicate_state,
@@ -826,6 +854,14 @@ static const struct dmi_system_id intel_no_lvds[] = {
DMI_EXACT_MATCH(DMI_BOARD_NAME, "D525MW"),
},
},
+ {
+ .callback = intel_no_lvds_dmi_callback,
+ .ident = "Radiant P845",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Radiant Systems Inc"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "P845"),
+ },
+ },
{ } /* terminating entry */
};
@@ -1149,12 +1185,6 @@ out:
lvds_encoder->a3_power = lvds & LVDS_A3_POWER_MASK;
- lvds_connector->lid_notifier.notifier_call = intel_lid_notify;
- if (acpi_lid_notifier_register(&lvds_connector->lid_notifier)) {
- DRM_DEBUG_KMS("lid notifier registration failed\n");
- lvds_connector->lid_notifier.notifier_call = NULL;
- }
-
return;
failed:
diff --git a/drivers/gpu/drm/meson/meson_dw_hdmi.c b/drivers/gpu/drm/meson/meson_dw_hdmi.c
index a393095aac1a..c9ad45686e7a 100644
--- a/drivers/gpu/drm/meson/meson_dw_hdmi.c
+++ b/drivers/gpu/drm/meson/meson_dw_hdmi.c
@@ -529,7 +529,7 @@ static irqreturn_t dw_hdmi_top_thread_irq(int irq, void *dev_id)
if (stat & HDMITX_TOP_INTR_HPD_RISE)
hpd_connected = true;
- dw_hdmi_setup_rx_sense(dw_hdmi->dev, hpd_connected,
+ dw_hdmi_setup_rx_sense(dw_hdmi->hdmi, hpd_connected,
hpd_connected);
drm_helper_hpd_irq_event(dw_hdmi->encoder.dev);
diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c
index 6f402c4f2bdd..ab61c038f42c 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -214,7 +214,6 @@ nouveau_bo_new(struct nouveau_cli *cli, u64 size, int align,
INIT_LIST_HEAD(&nvbo->entry);
INIT_LIST_HEAD(&nvbo->vma_list);
nvbo->bo.bdev = &drm->ttm.bdev;
- nvbo->cli = cli;
/* This is confusing, and doesn't actually mean we want an uncached
* mapping, but is what NOUVEAU_GEM_DOMAIN_COHERENT gets translated
diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.h b/drivers/gpu/drm/nouveau/nouveau_bo.h
index be8e00b49cde..73c48440d4d7 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.h
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.h
@@ -26,8 +26,6 @@ struct nouveau_bo {
struct list_head vma_list;
- struct nouveau_cli *cli;
-
unsigned contig:1;
unsigned page:5;
unsigned kind:8;
diff --git a/drivers/gpu/drm/nouveau/nouveau_ttm.c b/drivers/gpu/drm/nouveau/nouveau_ttm.c
index dff51a0ee028..8c093ca4222e 100644
--- a/drivers/gpu/drm/nouveau/nouveau_ttm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_ttm.c
@@ -63,7 +63,7 @@ nouveau_vram_manager_new(struct ttm_mem_type_manager *man,
struct ttm_mem_reg *reg)
{
struct nouveau_bo *nvbo = nouveau_bo(bo);
- struct nouveau_drm *drm = nvbo->cli->drm;
+ struct nouveau_drm *drm = nouveau_bdev(bo->bdev);
struct nouveau_mem *mem;
int ret;
@@ -103,7 +103,7 @@ nouveau_gart_manager_new(struct ttm_mem_type_manager *man,
struct ttm_mem_reg *reg)
{
struct nouveau_bo *nvbo = nouveau_bo(bo);
- struct nouveau_drm *drm = nvbo->cli->drm;
+ struct nouveau_drm *drm = nouveau_bdev(bo->bdev);
struct nouveau_mem *mem;
int ret;
@@ -131,7 +131,7 @@ nv04_gart_manager_new(struct ttm_mem_type_manager *man,
struct ttm_mem_reg *reg)
{
struct nouveau_bo *nvbo = nouveau_bo(bo);
- struct nouveau_drm *drm = nvbo->cli->drm;
+ struct nouveau_drm *drm = nouveau_bdev(bo->bdev);
struct nouveau_mem *mem;
int ret;
diff --git a/drivers/gpu/drm/nouveau/nv50_display.c b/drivers/gpu/drm/nouveau/nv50_display.c
index 8bd739cfd00d..2b3ccd850750 100644
--- a/drivers/gpu/drm/nouveau/nv50_display.c
+++ b/drivers/gpu/drm/nouveau/nv50_display.c
@@ -3264,10 +3264,11 @@ nv50_mstm_destroy_connector(struct drm_dp_mst_topology_mgr *mgr,
drm_connector_unregister(&mstc->connector);
- drm_modeset_lock_all(drm->dev);
drm_fb_helper_remove_one_connector(&drm->fbcon->helper, &mstc->connector);
+
+ drm_modeset_lock(&drm->dev->mode_config.connection_mutex, NULL);
mstc->port = NULL;
- drm_modeset_unlock_all(drm->dev);
+ drm_modeset_unlock(&drm->dev->mode_config.connection_mutex);
drm_connector_unreference(&mstc->connector);
}
@@ -3277,9 +3278,7 @@ nv50_mstm_register_connector(struct drm_connector *connector)
{
struct nouveau_drm *drm = nouveau_drm(connector->dev);
- drm_modeset_lock_all(drm->dev);
drm_fb_helper_add_one_connector(&drm->fbcon->helper, connector);
- drm_modeset_unlock_all(drm->dev);
drm_connector_register(connector);
}
diff --git a/drivers/gpu/drm/omapdrm/dss/dispc.c b/drivers/gpu/drm/omapdrm/dss/dispc.c
index 5e2e65e88847..7f3ac6b13b56 100644
--- a/drivers/gpu/drm/omapdrm/dss/dispc.c
+++ b/drivers/gpu/drm/omapdrm/dss/dispc.c
@@ -828,6 +828,12 @@ static void dispc_ovl_set_scale_coef(struct dispc_device *dispc,
h_coef = dispc_ovl_get_scale_coef(fir_hinc, true);
v_coef = dispc_ovl_get_scale_coef(fir_vinc, five_taps);
+ if (!h_coef || !v_coef) {
+ dev_err(&dispc->pdev->dev, "%s: failed to find scale coefs\n",
+ __func__);
+ return;
+ }
+
for (i = 0; i < 8; i++) {
u32 h, hv;
@@ -2342,7 +2348,7 @@ static int dispc_ovl_calc_scaling_24xx(struct dispc_device *dispc,
}
if (in_width > maxsinglelinewidth) {
- DSSERR("Cannot scale max input width exceeded");
+ DSSERR("Cannot scale max input width exceeded\n");
return -EINVAL;
}
return 0;
@@ -2424,13 +2430,13 @@ again:
}
if (in_width > (maxsinglelinewidth * 2)) {
- DSSERR("Cannot setup scaling");
- DSSERR("width exceeds maximum width possible");
+ DSSERR("Cannot setup scaling\n");
+ DSSERR("width exceeds maximum width possible\n");
return -EINVAL;
}
if (in_width > maxsinglelinewidth && *five_taps) {
- DSSERR("cannot setup scaling with five taps");
+ DSSERR("cannot setup scaling with five taps\n");
return -EINVAL;
}
return 0;
@@ -2472,7 +2478,7 @@ static int dispc_ovl_calc_scaling_44xx(struct dispc_device *dispc,
in_width > maxsinglelinewidth && ++*decim_x);
if (in_width > maxsinglelinewidth) {
- DSSERR("Cannot scale width exceeds max line width");
+ DSSERR("Cannot scale width exceeds max line width\n");
return -EINVAL;
}
@@ -2490,7 +2496,7 @@ static int dispc_ovl_calc_scaling_44xx(struct dispc_device *dispc,
* bandwidth. Despite what theory says this appears to
* be true also for 16-bit color formats.
*/
- DSSERR("Not enough bandwidth, too much downscaling (x-decimation factor %d > 4)", *decim_x);
+ DSSERR("Not enough bandwidth, too much downscaling (x-decimation factor %d > 4)\n", *decim_x);
return -EINVAL;
}
@@ -4633,7 +4639,7 @@ static int dispc_errata_i734_wa_init(struct dispc_device *dispc)
i734_buf.size, &i734_buf.paddr,
GFP_KERNEL);
if (!i734_buf.vaddr) {
- dev_err(&dispc->pdev->dev, "%s: dma_alloc_writecombine failed",
+ dev_err(&dispc->pdev->dev, "%s: dma_alloc_writecombine failed\n",
__func__);
return -ENOMEM;
}
diff --git a/drivers/gpu/drm/omapdrm/dss/hdmi4.c b/drivers/gpu/drm/omapdrm/dss/hdmi4.c
index 97c88861d67a..5879f45f6fc9 100644
--- a/drivers/gpu/drm/omapdrm/dss/hdmi4.c
+++ b/drivers/gpu/drm/omapdrm/dss/hdmi4.c
@@ -679,7 +679,7 @@ static int hdmi_audio_config(struct device *dev,
struct omap_dss_audio *dss_audio)
{
struct omap_hdmi *hd = dev_get_drvdata(dev);
- int ret;
+ int ret = 0;
mutex_lock(&hd->lock);
diff --git a/drivers/gpu/drm/omapdrm/dss/hdmi4_core.c b/drivers/gpu/drm/omapdrm/dss/hdmi4_core.c
index 35ed2add6189..813ba42f2753 100644
--- a/drivers/gpu/drm/omapdrm/dss/hdmi4_core.c
+++ b/drivers/gpu/drm/omapdrm/dss/hdmi4_core.c
@@ -922,8 +922,13 @@ int hdmi4_core_init(struct platform_device *pdev, struct hdmi_core_data *core)
{
const struct hdmi4_features *features;
struct resource *res;
+ const struct soc_device_attribute *soc;
- features = soc_device_match(hdmi4_soc_devices)->data;
+ soc = soc_device_match(hdmi4_soc_devices);
+ if (!soc)
+ return -ENODEV;
+
+ features = soc->data;
core->cts_swmode = features->cts_swmode;
core->audio_use_mclk = features->audio_use_mclk;
diff --git a/drivers/gpu/drm/omapdrm/dss/hdmi5.c b/drivers/gpu/drm/omapdrm/dss/hdmi5.c
index d28da9ac3e90..ae1a001d1b83 100644
--- a/drivers/gpu/drm/omapdrm/dss/hdmi5.c
+++ b/drivers/gpu/drm/omapdrm/dss/hdmi5.c
@@ -671,7 +671,7 @@ static int hdmi_audio_config(struct device *dev,
struct omap_dss_audio *dss_audio)
{
struct omap_hdmi *hd = dev_get_drvdata(dev);
- int ret;
+ int ret = 0;
mutex_lock(&hd->lock);
diff --git a/drivers/gpu/drm/omapdrm/dss/sdi.c b/drivers/gpu/drm/omapdrm/dss/sdi.c
index 68a40ae26f5b..1e2c931f6acf 100644
--- a/drivers/gpu/drm/omapdrm/dss/sdi.c
+++ b/drivers/gpu/drm/omapdrm/dss/sdi.c
@@ -82,7 +82,7 @@ static int sdi_calc_clock_div(struct sdi_device *sdi, unsigned long pclk,
struct dispc_clock_info *dispc_cinfo)
{
int i;
- struct sdi_clk_calc_ctx ctx = { .sdi = sdi };
+ struct sdi_clk_calc_ctx ctx;
/*
* DSS fclk gives us very few possibilities, so finding a good pixel
@@ -95,6 +95,9 @@ static int sdi_calc_clock_div(struct sdi_device *sdi, unsigned long pclk,
bool ok;
memset(&ctx, 0, sizeof(ctx));
+
+ ctx.sdi = sdi;
+
if (pclk > 1000 * i * i * i)
ctx.pck_min = max(pclk - 1000 * i * i * i, 0lu);
else
diff --git a/drivers/gpu/drm/omapdrm/omap_connector.c b/drivers/gpu/drm/omapdrm/omap_connector.c
index a0d7b1d905e8..5cde26ac937b 100644
--- a/drivers/gpu/drm/omapdrm/omap_connector.c
+++ b/drivers/gpu/drm/omapdrm/omap_connector.c
@@ -121,6 +121,9 @@ static int omap_connector_get_modes(struct drm_connector *connector)
if (dssdrv->read_edid) {
void *edid = kzalloc(MAX_EDID, GFP_KERNEL);
+ if (!edid)
+ return 0;
+
if ((dssdrv->read_edid(dssdev, edid, MAX_EDID) > 0) &&
drm_edid_is_valid(edid)) {
drm_mode_connector_update_edid_property(
@@ -139,6 +142,9 @@ static int omap_connector_get_modes(struct drm_connector *connector)
struct drm_display_mode *mode = drm_mode_create(dev);
struct videomode vm = {0};
+ if (!mode)
+ return 0;
+
dssdrv->get_timings(dssdev, &vm);
drm_display_mode_from_videomode(&vm, mode);
@@ -200,6 +206,10 @@ static int omap_connector_mode_valid(struct drm_connector *connector,
if (!r) {
/* check if vrefresh is still valid */
new_mode = drm_mode_duplicate(dev, mode);
+
+ if (!new_mode)
+ return MODE_BAD;
+
new_mode->clock = vm.pixelclock / 1000;
new_mode->vrefresh = 0;
if (mode->vrefresh == drm_mode_vrefresh(new_mode))
diff --git a/drivers/gpu/drm/omapdrm/omap_dmm_tiler.c b/drivers/gpu/drm/omapdrm/omap_dmm_tiler.c
index f9fa1c90b35c..401c02e9e6b2 100644
--- a/drivers/gpu/drm/omapdrm/omap_dmm_tiler.c
+++ b/drivers/gpu/drm/omapdrm/omap_dmm_tiler.c
@@ -401,12 +401,16 @@ int tiler_unpin(struct tiler_block *block)
struct tiler_block *tiler_reserve_2d(enum tiler_fmt fmt, u16 w,
u16 h, u16 align)
{
- struct tiler_block *block = kzalloc(sizeof(*block), GFP_KERNEL);
+ struct tiler_block *block;
u32 min_align = 128;
int ret;
unsigned long flags;
u32 slot_bytes;
+ block = kzalloc(sizeof(*block), GFP_KERNEL);
+ if (!block)
+ return ERR_PTR(-ENOMEM);
+
BUG_ON(!validfmt(fmt));
/* convert width/height to slots */
diff --git a/drivers/gpu/drm/omapdrm/tcm-sita.c b/drivers/gpu/drm/omapdrm/tcm-sita.c
index d7f7bc9f061a..817be3c41863 100644
--- a/drivers/gpu/drm/omapdrm/tcm-sita.c
+++ b/drivers/gpu/drm/omapdrm/tcm-sita.c
@@ -90,7 +90,7 @@ static int l2r_t2b(u16 w, u16 h, u16 a, s16 offset,
{
int i;
unsigned long index;
- bool area_free;
+ bool area_free = false;
unsigned long slots_per_band = PAGE_SIZE / slot_bytes;
unsigned long bit_offset = (offset > 0) ? offset / slot_bytes : 0;
unsigned long curr_bit = bit_offset;
diff --git a/drivers/gpu/drm/rcar-du/rcar_lvds.c b/drivers/gpu/drm/rcar-du/rcar_lvds.c
index 3d2d3bbd1342..155ad840f3c5 100644
--- a/drivers/gpu/drm/rcar-du/rcar_lvds.c
+++ b/drivers/gpu/drm/rcar-du/rcar_lvds.c
@@ -88,6 +88,9 @@ static int rcar_lvds_connector_atomic_check(struct drm_connector *connector,
const struct drm_display_mode *panel_mode;
struct drm_crtc_state *crtc_state;
+ if (!state->crtc)
+ return 0;
+
if (list_empty(&connector->modes)) {
dev_dbg(lvds->dev, "connector: empty modes list\n");
return -EINVAL;
diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc.c b/drivers/gpu/drm/ttm/ttm_page_alloc.c
index f0481b7b60c5..06c94e3a5f15 100644
--- a/drivers/gpu/drm/ttm/ttm_page_alloc.c
+++ b/drivers/gpu/drm/ttm/ttm_page_alloc.c
@@ -910,7 +910,8 @@ static int ttm_get_pages(struct page **pages, unsigned npages, int flags,
while (npages >= HPAGE_PMD_NR) {
gfp_t huge_flags = gfp_flags;
- huge_flags |= GFP_TRANSHUGE;
+ huge_flags |= GFP_TRANSHUGE_LIGHT | __GFP_NORETRY |
+ __GFP_KSWAPD_RECLAIM;
huge_flags &= ~__GFP_MOVABLE;
huge_flags &= ~__GFP_COMP;
p = alloc_pages(huge_flags, HPAGE_PMD_ORDER);
@@ -1027,11 +1028,15 @@ int ttm_page_alloc_init(struct ttm_mem_global *glob, unsigned max_pages)
GFP_USER | GFP_DMA32, "uc dma", 0);
ttm_page_pool_init_locked(&_manager->wc_pool_huge,
- GFP_TRANSHUGE & ~(__GFP_MOVABLE | __GFP_COMP),
+ (GFP_TRANSHUGE_LIGHT | __GFP_NORETRY |
+ __GFP_KSWAPD_RECLAIM) &
+ ~(__GFP_MOVABLE | __GFP_COMP),
"wc huge", order);
ttm_page_pool_init_locked(&_manager->uc_pool_huge,
- GFP_TRANSHUGE & ~(__GFP_MOVABLE | __GFP_COMP)
+ (GFP_TRANSHUGE_LIGHT | __GFP_NORETRY |
+ __GFP_KSWAPD_RECLAIM) &
+ ~(__GFP_MOVABLE | __GFP_COMP)
, "uc huge", order);
_manager->options.max_size = max_pages;
diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c b/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c
index 8a25d1974385..f63d99c302e4 100644
--- a/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c
+++ b/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c
@@ -910,7 +910,8 @@ static gfp_t ttm_dma_pool_gfp_flags(struct ttm_dma_tt *ttm_dma, bool huge)
gfp_flags |= __GFP_ZERO;
if (huge) {
- gfp_flags |= GFP_TRANSHUGE;
+ gfp_flags |= GFP_TRANSHUGE_LIGHT | __GFP_NORETRY |
+ __GFP_KSWAPD_RECLAIM;
gfp_flags &= ~__GFP_MOVABLE;
gfp_flags &= ~__GFP_COMP;
}
diff --git a/drivers/gpu/drm/vc4/vc4_crtc.c b/drivers/gpu/drm/vc4/vc4_crtc.c
index bf4667481935..c61dff594195 100644
--- a/drivers/gpu/drm/vc4/vc4_crtc.c
+++ b/drivers/gpu/drm/vc4/vc4_crtc.c
@@ -760,6 +760,7 @@ static irqreturn_t vc4_crtc_irq_handler(int irq, void *data)
struct vc4_async_flip_state {
struct drm_crtc *crtc;
struct drm_framebuffer *fb;
+ struct drm_framebuffer *old_fb;
struct drm_pending_vblank_event *event;
struct vc4_seqno_cb cb;
@@ -789,6 +790,23 @@ vc4_async_page_flip_complete(struct vc4_seqno_cb *cb)
drm_crtc_vblank_put(crtc);
drm_framebuffer_put(flip_state->fb);
+
+ /* Decrement the BO usecnt in order to keep the inc/dec calls balanced
+ * when the planes are updated through the async update path.
+ * FIXME: we should move to generic async-page-flip when it's
+ * available, so that we can get rid of this hand-made cleanup_fb()
+ * logic.
+ */
+ if (flip_state->old_fb) {
+ struct drm_gem_cma_object *cma_bo;
+ struct vc4_bo *bo;
+
+ cma_bo = drm_fb_cma_get_gem_obj(flip_state->old_fb, 0);
+ bo = to_vc4_bo(&cma_bo->base);
+ vc4_bo_dec_usecnt(bo);
+ drm_framebuffer_put(flip_state->old_fb);
+ }
+
kfree(flip_state);
up(&vc4->async_modeset);
@@ -813,9 +831,22 @@ static int vc4_async_page_flip(struct drm_crtc *crtc,
struct drm_gem_cma_object *cma_bo = drm_fb_cma_get_gem_obj(fb, 0);
struct vc4_bo *bo = to_vc4_bo(&cma_bo->base);
+ /* Increment the BO usecnt here, so that we never end up with an
+ * unbalanced number of vc4_bo_{dec,inc}_usecnt() calls when the
+ * plane is later updated through the non-async path.
+ * FIXME: we should move to generic async-page-flip when it's
+ * available, so that we can get rid of this hand-made prepare_fb()
+ * logic.
+ */
+ ret = vc4_bo_inc_usecnt(bo);
+ if (ret)
+ return ret;
+
flip_state = kzalloc(sizeof(*flip_state), GFP_KERNEL);
- if (!flip_state)
+ if (!flip_state) {
+ vc4_bo_dec_usecnt(bo);
return -ENOMEM;
+ }
drm_framebuffer_get(fb);
flip_state->fb = fb;
@@ -826,10 +857,23 @@ static int vc4_async_page_flip(struct drm_crtc *crtc,
ret = down_interruptible(&vc4->async_modeset);
if (ret) {
drm_framebuffer_put(fb);
+ vc4_bo_dec_usecnt(bo);
kfree(flip_state);
return ret;
}
+ /* Save the current FB before it's replaced by the new one in
+ * drm_atomic_set_fb_for_plane(). We'll need the old FB in
+ * vc4_async_page_flip_complete() to decrement the BO usecnt and keep
+ * it consistent.
+ * FIXME: we should move to generic async-page-flip when it's
+ * available, so that we can get rid of this hand-made cleanup_fb()
+ * logic.
+ */
+ flip_state->old_fb = plane->state->fb;
+ if (flip_state->old_fb)
+ drm_framebuffer_get(flip_state->old_fb);
+
WARN_ON(drm_crtc_vblank_get(crtc) != 0);
/* Immediately update the plane's legacy fb pointer, so that later
diff --git a/drivers/gpu/drm/vc4/vc4_dpi.c b/drivers/gpu/drm/vc4/vc4_dpi.c
index 72c9dbd81d7f..f185812970da 100644
--- a/drivers/gpu/drm/vc4/vc4_dpi.c
+++ b/drivers/gpu/drm/vc4/vc4_dpi.c
@@ -96,7 +96,6 @@ struct vc4_dpi {
struct platform_device *pdev;
struct drm_encoder *encoder;
- struct drm_connector *connector;
void __iomem *regs;
@@ -164,14 +163,31 @@ static void vc4_dpi_encoder_disable(struct drm_encoder *encoder)
static void vc4_dpi_encoder_enable(struct drm_encoder *encoder)
{
+ struct drm_device *dev = encoder->dev;
struct drm_display_mode *mode = &encoder->crtc->mode;
struct vc4_dpi_encoder *vc4_encoder = to_vc4_dpi_encoder(encoder);
struct vc4_dpi *dpi = vc4_encoder->dpi;
+ struct drm_connector_list_iter conn_iter;
+ struct drm_connector *connector = NULL, *connector_scan;
u32 dpi_c = DPI_ENABLE | DPI_OUTPUT_ENABLE_MODE;
int ret;
- if (dpi->connector->display_info.num_bus_formats) {
- u32 bus_format = dpi->connector->display_info.bus_formats[0];
+ /* Look up the connector attached to DPI so we can get the
+ * bus_format. Ideally the bridge would tell us the
+ * bus_format we want, but it doesn't yet, so assume that it's
+ * uniform throughout the bridge chain.
+ */
+ drm_connector_list_iter_begin(dev, &conn_iter);
+ drm_for_each_connector_iter(connector_scan, &conn_iter) {
+ if (connector_scan->encoder == encoder) {
+ connector = connector_scan;
+ break;
+ }
+ }
+ drm_connector_list_iter_end(&conn_iter);
+
+ if (connector && connector->display_info.num_bus_formats) {
+ u32 bus_format = connector->display_info.bus_formats[0];
switch (bus_format) {
case MEDIA_BUS_FMT_RGB888_1X24:
@@ -199,6 +215,9 @@ static void vc4_dpi_encoder_enable(struct drm_encoder *encoder)
DRM_ERROR("Unknown media bus format %d\n", bus_format);
break;
}
+ } else {
+ /* Default to 24bit if no connector found. */
+ dpi_c |= VC4_SET_FIELD(DPI_FORMAT_24BIT_888_RGB, DPI_FORMAT);
}
if (mode->flags & DRM_MODE_FLAG_NHSYNC)
diff --git a/drivers/gpu/drm/vc4/vc4_drv.c b/drivers/gpu/drm/vc4/vc4_drv.c
index 94b99c90425a..7c95ed5c5cac 100644
--- a/drivers/gpu/drm/vc4/vc4_drv.c
+++ b/drivers/gpu/drm/vc4/vc4_drv.c
@@ -130,6 +130,7 @@ static void vc4_close(struct drm_device *dev, struct drm_file *file)
struct vc4_file *vc4file = file->driver_priv;
vc4_perfmon_close_file(vc4file);
+ kfree(vc4file);
}
static const struct vm_operations_struct vc4_vm_ops = {
diff --git a/drivers/gpu/drm/vc4/vc4_plane.c b/drivers/gpu/drm/vc4/vc4_plane.c
index ce39390be389..13dcaad06798 100644
--- a/drivers/gpu/drm/vc4/vc4_plane.c
+++ b/drivers/gpu/drm/vc4/vc4_plane.c
@@ -503,7 +503,7 @@ static int vc4_plane_mode_set(struct drm_plane *plane,
* the scl fields here.
*/
if (num_planes == 1) {
- scl0 = vc4_get_scl_field(state, 1);
+ scl0 = vc4_get_scl_field(state, 0);
scl1 = scl0;
} else {
scl0 = vc4_get_scl_field(state, 1);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
index 70e1a8820a7c..8b770a8e02cd 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
@@ -1278,8 +1278,6 @@ static void vmw_master_drop(struct drm_device *dev,
dev_priv->active_master = &dev_priv->fbdev_master;
ttm_lock_set_kill(&dev_priv->fbdev_master.lock, false, SIGTERM);
ttm_vt_unlock(&dev_priv->fbdev_master.lock);
-
- vmw_fb_refresh(dev_priv);
}
/**
@@ -1483,7 +1481,6 @@ static int vmw_pm_freeze(struct device *kdev)
vmw_kms_resume(dev);
if (dev_priv->enable_fb)
vmw_fb_on(dev_priv);
- vmw_fb_refresh(dev_priv);
return -EBUSY;
}
@@ -1523,8 +1520,6 @@ static int vmw_pm_restore(struct device *kdev)
if (dev_priv->enable_fb)
vmw_fb_on(dev_priv);
- vmw_fb_refresh(dev_priv);
-
return 0;
}
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
index f34f368c1a2e..5fcbe1620d50 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
@@ -910,7 +910,6 @@ int vmw_fb_init(struct vmw_private *vmw_priv);
int vmw_fb_close(struct vmw_private *dev_priv);
int vmw_fb_off(struct vmw_private *vmw_priv);
int vmw_fb_on(struct vmw_private *vmw_priv);
-void vmw_fb_refresh(struct vmw_private *vmw_priv);
/**
* Kernel modesetting - vmwgfx_kms.c
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c
index 2582ffd36bb5..54e300365a5c 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c
@@ -441,11 +441,11 @@ static int vmwgfx_set_config_internal(struct drm_mode_set *set)
struct drm_crtc *crtc = set->crtc;
struct drm_framebuffer *fb;
struct drm_crtc *tmp;
- struct drm_modeset_acquire_ctx *ctx;
struct drm_device *dev = set->crtc->dev;
+ struct drm_modeset_acquire_ctx ctx;
int ret;
- ctx = dev->mode_config.acquire_ctx;
+ drm_modeset_acquire_init(&ctx, 0);
restart:
/*
@@ -458,7 +458,7 @@ restart:
fb = set->fb;
- ret = crtc->funcs->set_config(set, ctx);
+ ret = crtc->funcs->set_config(set, &ctx);
if (ret == 0) {
crtc->primary->crtc = crtc;
crtc->primary->fb = fb;
@@ -473,20 +473,13 @@ restart:
}
if (ret == -EDEADLK) {
- dev->mode_config.acquire_ctx = NULL;
-
-retry_locking:
- drm_modeset_backoff(ctx);
-
- ret = drm_modeset_lock_all_ctx(dev, ctx);
- if (ret)
- goto retry_locking;
-
- dev->mode_config.acquire_ctx = ctx;
-
+ drm_modeset_backoff(&ctx);
goto restart;
}
+ drm_modeset_drop_locks(&ctx);
+ drm_modeset_acquire_fini(&ctx);
+
return ret;
}
@@ -624,7 +617,6 @@ static int vmw_fb_set_par(struct fb_info *info)
}
mutex_lock(&par->bo_mutex);
- drm_modeset_lock_all(vmw_priv->dev);
ret = vmw_fb_kms_framebuffer(info);
if (ret)
goto out_unlock;
@@ -657,7 +649,6 @@ out_unlock:
drm_mode_destroy(vmw_priv->dev, old_mode);
par->set_mode = mode;
- drm_modeset_unlock_all(vmw_priv->dev);
mutex_unlock(&par->bo_mutex);
return ret;
@@ -713,18 +704,14 @@ int vmw_fb_init(struct vmw_private *vmw_priv)
par->max_width = fb_width;
par->max_height = fb_height;
- drm_modeset_lock_all(vmw_priv->dev);
ret = vmw_kms_fbdev_init_data(vmw_priv, 0, par->max_width,
par->max_height, &par->con,
&par->crtc, &init_mode);
- if (ret) {
- drm_modeset_unlock_all(vmw_priv->dev);
+ if (ret)
goto err_kms;
- }
info->var.xres = init_mode->hdisplay;
info->var.yres = init_mode->vdisplay;
- drm_modeset_unlock_all(vmw_priv->dev);
/*
* Create buffers and alloc memory
@@ -832,7 +819,9 @@ int vmw_fb_close(struct vmw_private *vmw_priv)
cancel_delayed_work_sync(&par->local_work);
unregister_framebuffer(info);
+ mutex_lock(&par->bo_mutex);
(void) vmw_fb_kms_detach(par, true, true);
+ mutex_unlock(&par->bo_mutex);
vfree(par->vmalloc);
framebuffer_release(info);
@@ -877,21 +866,13 @@ int vmw_fb_on(struct vmw_private *vmw_priv)
spin_lock_irqsave(&par->dirty.lock, flags);
par->dirty.active = true;
spin_unlock_irqrestore(&par->dirty.lock, flags);
-
- return 0;
-}
-/**
- * vmw_fb_refresh - Refresh fb display
- *
- * @vmw_priv: Pointer to device private
- *
- * Call into kms to show the fbdev display(s).
- */
-void vmw_fb_refresh(struct vmw_private *vmw_priv)
-{
- if (!vmw_priv->fb_info)
- return;
+ /*
+ * Need to reschedule a dirty update, because otherwise that's
+ * only done in dirty_mark() if the previous coalesced
+ * dirty region was empty.
+ */
+ schedule_delayed_work(&par->local_work, 0);
- vmw_fb_set_par(vmw_priv->fb_info);
+ return 0;
}
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
index f11601b6fd74..96fd7a03d2f8 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
@@ -2595,6 +2595,7 @@ void vmw_kms_helper_resource_finish(struct vmw_validation_ctx *ctx,
vmw_kms_helper_buffer_finish(res->dev_priv, NULL, ctx->buf,
out_fence, NULL);
+ vmw_dmabuf_unreference(&ctx->buf);
vmw_resource_unreserve(res, false, NULL, 0);
mutex_unlock(&res->dev_priv->cmdbuf_mutex);
}
@@ -2680,7 +2681,9 @@ int vmw_kms_fbdev_init_data(struct vmw_private *dev_priv,
struct vmw_display_unit *du;
struct drm_display_mode *mode;
int i = 0;
+ int ret = 0;
+ mutex_lock(&dev_priv->dev->mode_config.mutex);
list_for_each_entry(con, &dev_priv->dev->mode_config.connector_list,
head) {
if (i == unit)
@@ -2691,7 +2694,8 @@ int vmw_kms_fbdev_init_data(struct vmw_private *dev_priv,
if (i != unit) {
DRM_ERROR("Could not find initial display unit.\n");
- return -EINVAL;
+ ret = -EINVAL;
+ goto out_unlock;
}
if (list_empty(&con->modes))
@@ -2699,7 +2703,8 @@ int vmw_kms_fbdev_init_data(struct vmw_private *dev_priv,
if (list_empty(&con->modes)) {
DRM_ERROR("Could not find initial display mode.\n");
- return -EINVAL;
+ ret = -EINVAL;
+ goto out_unlock;
}
du = vmw_connector_to_du(con);
@@ -2720,7 +2725,10 @@ int vmw_kms_fbdev_init_data(struct vmw_private *dev_priv,
head);
}
- return 0;
+ out_unlock:
+ mutex_unlock(&dev_priv->dev->mode_config.mutex);
+
+ return ret;
}
/**
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c b/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c
index cdff99211602..21d746bdc922 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c
@@ -329,8 +329,6 @@ int vmw_host_get_guestinfo(const char *guest_info_param,
struct rpc_channel channel;
char *msg, *reply = NULL;
size_t reply_len = 0;
- int ret = 0;
-
if (!vmw_msg_enabled)
return -ENODEV;
@@ -344,15 +342,14 @@ int vmw_host_get_guestinfo(const char *guest_info_param,
return -ENOMEM;
}
- if (vmw_open_channel(&channel, RPCI_PROTOCOL_NUM) ||
- vmw_send_msg(&channel, msg) ||
- vmw_recv_msg(&channel, (void *) &reply, &reply_len) ||
- vmw_close_channel(&channel)) {
- DRM_ERROR("Failed to get %s", guest_info_param);
+ if (vmw_open_channel(&channel, RPCI_PROTOCOL_NUM))
+ goto out_open;
- ret = -EINVAL;
- }
+ if (vmw_send_msg(&channel, msg) ||
+ vmw_recv_msg(&channel, (void *) &reply, &reply_len))
+ goto out_msg;
+ vmw_close_channel(&channel);
if (buffer && reply && reply_len > 0) {
/* Remove reply code, which are the first 2 characters of
* the reply
@@ -369,7 +366,17 @@ int vmw_host_get_guestinfo(const char *guest_info_param,
kfree(reply);
kfree(msg);
- return ret;
+ return 0;
+
+out_msg:
+ vmw_close_channel(&channel);
+ kfree(reply);
+out_open:
+ *length = 0;
+ kfree(msg);
+ DRM_ERROR("Failed to get %s", guest_info_param);
+
+ return -EINVAL;
}
@@ -400,15 +407,22 @@ int vmw_host_log(const char *log)
return -ENOMEM;
}
- if (vmw_open_channel(&channel, RPCI_PROTOCOL_NUM) ||
- vmw_send_msg(&channel, msg) ||
- vmw_close_channel(&channel)) {
- DRM_ERROR("Failed to send log\n");
+ if (vmw_open_channel(&channel, RPCI_PROTOCOL_NUM))
+ goto out_open;
- ret = -EINVAL;
- }
+ if (vmw_send_msg(&channel, msg))
+ goto out_msg;
+ vmw_close_channel(&channel);
kfree(msg);
- return ret;
+ return 0;
+
+out_msg:
+ vmw_close_channel(&channel);
+out_open:
+ kfree(msg);
+ DRM_ERROR("Failed to send log\n");
+
+ return -EINVAL;
}
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_msg.h b/drivers/gpu/drm/vmwgfx/vmwgfx_msg.h
index 557a033fb610..8545488aa0cf 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_msg.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_msg.h
@@ -135,17 +135,24 @@
#else
-/* In the 32-bit version of this macro, we use "m" because there is no
- * more register left for bp
+/*
+ * In the 32-bit version of this macro, we store bp in a memory location
+ * because we've ran out of registers.
+ * Now we can't reference that memory location while we've modified
+ * %esp or %ebp, so we first push it on the stack, just before we push
+ * %ebp, and then when we need it we read it from the stack where we
+ * just pushed it.
*/
#define VMW_PORT_HB_OUT(cmd, in_ecx, in_si, in_di, \
port_num, magic, bp, \
eax, ebx, ecx, edx, si, di) \
({ \
- asm volatile ("push %%ebp;" \
- "mov %12, %%ebp;" \
+ asm volatile ("push %12;" \
+ "push %%ebp;" \
+ "mov 0x04(%%esp), %%ebp;" \
"rep outsb;" \
- "pop %%ebp;" : \
+ "pop %%ebp;" \
+ "add $0x04, %%esp;" : \
"=a"(eax), \
"=b"(ebx), \
"=c"(ecx), \
@@ -167,10 +174,12 @@
port_num, magic, bp, \
eax, ebx, ecx, edx, si, di) \
({ \
- asm volatile ("push %%ebp;" \
- "mov %12, %%ebp;" \
+ asm volatile ("push %12;" \
+ "push %%ebp;" \
+ "mov 0x04(%%esp), %%ebp;" \
"rep insb;" \
- "pop %%ebp" : \
+ "pop %%ebp;" \
+ "add $0x04, %%esp;" : \
"=a"(eax), \
"=b"(ebx), \
"=c"(ecx), \
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c b/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c
index 648f8127f65a..3d667e903beb 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c
@@ -482,6 +482,8 @@ vmw_sou_primary_plane_prepare_fb(struct drm_plane *plane,
return ret;
}
+ vps->dmabuf_size = size;
+
/*
* TTM already thinks the buffer is pinned, but make sure the
* pin_count is upped.
diff --git a/drivers/gpu/host1x/bus.c b/drivers/gpu/host1x/bus.c
index 88a3558b7916..815bdb42e3f0 100644
--- a/drivers/gpu/host1x/bus.c
+++ b/drivers/gpu/host1x/bus.c
@@ -314,6 +314,11 @@ static int host1x_device_match(struct device *dev, struct device_driver *drv)
return strcmp(dev_name(dev), drv->name) == 0;
}
+static int host1x_dma_configure(struct device *dev)
+{
+ return of_dma_configure(dev, dev->of_node, true);
+}
+
static const struct dev_pm_ops host1x_device_pm_ops = {
.suspend = pm_generic_suspend,
.resume = pm_generic_resume,
@@ -326,8 +331,8 @@ static const struct dev_pm_ops host1x_device_pm_ops = {
struct bus_type host1x_bus_type = {
.name = "host1x",
.match = host1x_device_match,
+ .dma_configure = host1x_dma_configure,
.pm = &host1x_device_pm_ops,
- .force_dma = true,
};
static void __host1x_device_del(struct host1x_device *device)
@@ -416,7 +421,7 @@ static int host1x_device_add(struct host1x *host1x,
device->dev.bus = &host1x_bus_type;
device->dev.parent = host1x->dev;
- of_dma_configure(&device->dev, host1x->dev->of_node);
+ of_dma_configure(&device->dev, host1x->dev->of_node, true);
err = host1x_device_parse_dt(device, driver);
if (err < 0) {
diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig
index 60252fd796f6..0000434a1fbd 100644
--- a/drivers/hid/Kconfig
+++ b/drivers/hid/Kconfig
@@ -462,10 +462,11 @@ config HID_LENOVO
select NEW_LEDS
select LEDS_CLASS
---help---
- Support for Lenovo devices that are not fully compliant with HID standard.
+ Support for IBM/Lenovo devices that are not fully compliant with HID standard.
- Say Y if you want support for the non-compliant features of the Lenovo
- Thinkpad standalone keyboards, e.g:
+ Say Y if you want support for horizontal scrolling of the IBM/Lenovo
+ Scrollpoint mice or the non-compliant features of the Lenovo Thinkpad
+ standalone keyboards, e.g:
- ThinkPad USB Keyboard with TrackPoint (supports extra LEDs and trackpoint
configuration)
- ThinkPad Compact Bluetooth Keyboard with TrackPoint (supports Fn keys)
diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h
index 0b5cc910f62e..46f5ecd11bf7 100644
--- a/drivers/hid/hid-ids.h
+++ b/drivers/hid/hid-ids.h
@@ -552,6 +552,13 @@
#define USB_VENDOR_ID_HUION 0x256c
#define USB_DEVICE_ID_HUION_TABLET 0x006e
+#define USB_VENDOR_ID_IBM 0x04b3
+#define USB_DEVICE_ID_IBM_SCROLLPOINT_III 0x3100
+#define USB_DEVICE_ID_IBM_SCROLLPOINT_PRO 0x3103
+#define USB_DEVICE_ID_IBM_SCROLLPOINT_OPTICAL 0x3105
+#define USB_DEVICE_ID_IBM_SCROLLPOINT_800DPI_OPTICAL 0x3108
+#define USB_DEVICE_ID_IBM_SCROLLPOINT_800DPI_OPTICAL_PRO 0x3109
+
#define USB_VENDOR_ID_IDEACOM 0x1cb6
#define USB_DEVICE_ID_IDEACOM_IDC6650 0x6650
#define USB_DEVICE_ID_IDEACOM_IDC6651 0x6651
@@ -684,6 +691,7 @@
#define USB_DEVICE_ID_LENOVO_TPKBD 0x6009
#define USB_DEVICE_ID_LENOVO_CUSBKBD 0x6047
#define USB_DEVICE_ID_LENOVO_CBTKBD 0x6048
+#define USB_DEVICE_ID_LENOVO_SCROLLPOINT_OPTICAL 0x6049
#define USB_DEVICE_ID_LENOVO_TPPRODOCK 0x6067
#define USB_DEVICE_ID_LENOVO_X1_COVER 0x6085
#define USB_DEVICE_ID_LENOVO_X1_TAB 0x60a3
@@ -964,6 +972,7 @@
#define USB_DEVICE_ID_SIS817_TOUCH 0x0817
#define USB_DEVICE_ID_SIS_TS 0x1013
#define USB_DEVICE_ID_SIS1030_TOUCH 0x1030
+#define USB_DEVICE_ID_SIS10FB_TOUCH 0x10fb
#define USB_VENDOR_ID_SKYCABLE 0x1223
#define USB_DEVICE_ID_SKYCABLE_WIRELESS_PRESENTER 0x3F07
diff --git a/drivers/hid/hid-lenovo.c b/drivers/hid/hid-lenovo.c
index 1ac4ff4d57a6..643b6eb54442 100644
--- a/drivers/hid/hid-lenovo.c
+++ b/drivers/hid/hid-lenovo.c
@@ -6,6 +6,17 @@
*
* Copyright (c) 2012 Bernhard Seibold
* Copyright (c) 2014 Jamie Lentin <jm@lentin.co.uk>
+ *
+ * Linux IBM/Lenovo Scrollpoint mouse driver:
+ * - IBM Scrollpoint III
+ * - IBM Scrollpoint Pro
+ * - IBM Scrollpoint Optical
+ * - IBM Scrollpoint Optical 800dpi
+ * - IBM Scrollpoint Optical 800dpi Pro
+ * - Lenovo Scrollpoint Optical
+ *
+ * Copyright (c) 2012 Peter De Wachter <pdewacht@gmail.com>
+ * Copyright (c) 2018 Peter Ganzhorn <peter.ganzhorn@gmail.com>
*/
/*
@@ -160,6 +171,17 @@ static int lenovo_input_mapping_cptkbd(struct hid_device *hdev,
return 0;
}
+static int lenovo_input_mapping_scrollpoint(struct hid_device *hdev,
+ struct hid_input *hi, struct hid_field *field,
+ struct hid_usage *usage, unsigned long **bit, int *max)
+{
+ if (usage->hid == HID_GD_Z) {
+ hid_map_usage(hi, usage, bit, max, EV_REL, REL_HWHEEL);
+ return 1;
+ }
+ return 0;
+}
+
static int lenovo_input_mapping(struct hid_device *hdev,
struct hid_input *hi, struct hid_field *field,
struct hid_usage *usage, unsigned long **bit, int *max)
@@ -172,6 +194,14 @@ static int lenovo_input_mapping(struct hid_device *hdev,
case USB_DEVICE_ID_LENOVO_CBTKBD:
return lenovo_input_mapping_cptkbd(hdev, hi, field,
usage, bit, max);
+ case USB_DEVICE_ID_IBM_SCROLLPOINT_III:
+ case USB_DEVICE_ID_IBM_SCROLLPOINT_PRO:
+ case USB_DEVICE_ID_IBM_SCROLLPOINT_OPTICAL:
+ case USB_DEVICE_ID_IBM_SCROLLPOINT_800DPI_OPTICAL:
+ case USB_DEVICE_ID_IBM_SCROLLPOINT_800DPI_OPTICAL_PRO:
+ case USB_DEVICE_ID_LENOVO_SCROLLPOINT_OPTICAL:
+ return lenovo_input_mapping_scrollpoint(hdev, hi, field,
+ usage, bit, max);
default:
return 0;
}
@@ -883,6 +913,12 @@ static const struct hid_device_id lenovo_devices[] = {
{ HID_USB_DEVICE(USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_CUSBKBD) },
{ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_CBTKBD) },
{ HID_USB_DEVICE(USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_TPPRODOCK) },
+ { HID_USB_DEVICE(USB_VENDOR_ID_IBM, USB_DEVICE_ID_IBM_SCROLLPOINT_III) },
+ { HID_USB_DEVICE(USB_VENDOR_ID_IBM, USB_DEVICE_ID_IBM_SCROLLPOINT_PRO) },
+ { HID_USB_DEVICE(USB_VENDOR_ID_IBM, USB_DEVICE_ID_IBM_SCROLLPOINT_OPTICAL) },
+ { HID_USB_DEVICE(USB_VENDOR_ID_IBM, USB_DEVICE_ID_IBM_SCROLLPOINT_800DPI_OPTICAL) },
+ { HID_USB_DEVICE(USB_VENDOR_ID_IBM, USB_DEVICE_ID_IBM_SCROLLPOINT_800DPI_OPTICAL_PRO) },
+ { HID_USB_DEVICE(USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_SCROLLPOINT_OPTICAL) },
{ }
};
diff --git a/drivers/hid/i2c-hid/i2c-hid.c b/drivers/hid/i2c-hid/i2c-hid.c
index 963328674e93..cc33622253aa 100644
--- a/drivers/hid/i2c-hid/i2c-hid.c
+++ b/drivers/hid/i2c-hid/i2c-hid.c
@@ -174,6 +174,8 @@ static const struct i2c_hid_quirks {
I2C_HID_QUIRK_NO_IRQ_AFTER_RESET },
{ I2C_VENDOR_ID_RAYD, I2C_PRODUCT_ID_RAYD_3118,
I2C_HID_QUIRK_RESEND_REPORT_DESCR },
+ { USB_VENDOR_ID_SIS_TOUCH, USB_DEVICE_ID_SIS10FB_TOUCH,
+ I2C_HID_QUIRK_RESEND_REPORT_DESCR },
{ 0, 0 }
};
diff --git a/drivers/hid/intel-ish-hid/ishtp-hid-client.c b/drivers/hid/intel-ish-hid/ishtp-hid-client.c
index 157b44aacdff..acc2536c8094 100644
--- a/drivers/hid/intel-ish-hid/ishtp-hid-client.c
+++ b/drivers/hid/intel-ish-hid/ishtp-hid-client.c
@@ -77,21 +77,21 @@ static void process_recv(struct ishtp_cl *hid_ishtp_cl, void *recv_buf,
struct ishtp_cl_data *client_data = hid_ishtp_cl->client_data;
int curr_hid_dev = client_data->cur_hid_dev;
- if (data_len < sizeof(struct hostif_msg_hdr)) {
- dev_err(&client_data->cl_device->dev,
- "[hid-ish]: error, received %u which is less than data header %u\n",
- (unsigned int)data_len,
- (unsigned int)sizeof(struct hostif_msg_hdr));
- ++client_data->bad_recv_cnt;
- ish_hw_reset(hid_ishtp_cl->dev);
- return;
- }
-
payload = recv_buf + sizeof(struct hostif_msg_hdr);
total_len = data_len;
cur_pos = 0;
do {
+ if (cur_pos + sizeof(struct hostif_msg) > total_len) {
+ dev_err(&client_data->cl_device->dev,
+ "[hid-ish]: error, received %u which is less than data header %u\n",
+ (unsigned int)data_len,
+ (unsigned int)sizeof(struct hostif_msg_hdr));
+ ++client_data->bad_recv_cnt;
+ ish_hw_reset(hid_ishtp_cl->dev);
+ break;
+ }
+
recv_msg = (struct hostif_msg *)(recv_buf + cur_pos);
payload_len = recv_msg->hdr.size;
@@ -412,9 +412,7 @@ void hid_ishtp_get_report(struct hid_device *hid, int report_id,
{
struct ishtp_hid_data *hid_data = hid->driver_data;
struct ishtp_cl_data *client_data = hid_data->client_data;
- static unsigned char buf[10];
- unsigned int len;
- struct hostif_msg_to_sensor *msg = (struct hostif_msg_to_sensor *)buf;
+ struct hostif_msg_to_sensor msg = {};
int rv;
int i;
@@ -426,14 +424,11 @@ void hid_ishtp_get_report(struct hid_device *hid, int report_id,
return;
}
- len = sizeof(struct hostif_msg_to_sensor);
-
- memset(msg, 0, sizeof(struct hostif_msg_to_sensor));
- msg->hdr.command = (report_type == HID_FEATURE_REPORT) ?
+ msg.hdr.command = (report_type == HID_FEATURE_REPORT) ?
HOSTIF_GET_FEATURE_REPORT : HOSTIF_GET_INPUT_REPORT;
for (i = 0; i < client_data->num_hid_devices; ++i) {
if (hid == client_data->hid_sensor_hubs[i]) {
- msg->hdr.device_id =
+ msg.hdr.device_id =
client_data->hid_devices[i].dev_id;
break;
}
@@ -442,8 +437,9 @@ void hid_ishtp_get_report(struct hid_device *hid, int report_id,
if (i == client_data->num_hid_devices)
return;
- msg->report_id = report_id;
- rv = ishtp_cl_send(client_data->hid_ishtp_cl, buf, len);
+ msg.report_id = report_id;
+ rv = ishtp_cl_send(client_data->hid_ishtp_cl, (uint8_t *)&msg,
+ sizeof(msg));
if (rv)
hid_ishtp_trace(client_data, "%s hid %p send failed\n",
__func__, hid);
diff --git a/drivers/hid/intel-ish-hid/ishtp/bus.c b/drivers/hid/intel-ish-hid/ishtp/bus.c
index f272cdd9bd55..2623a567ffba 100644
--- a/drivers/hid/intel-ish-hid/ishtp/bus.c
+++ b/drivers/hid/intel-ish-hid/ishtp/bus.c
@@ -418,7 +418,7 @@ static struct ishtp_cl_device *ishtp_bus_add_device(struct ishtp_device *dev,
list_del(&device->device_link);
spin_unlock_irqrestore(&dev->device_list_lock, flags);
dev_err(dev->devc, "Failed to register ISHTP client device\n");
- kfree(device);
+ put_device(&device->dev);
return NULL;
}
diff --git a/drivers/hid/wacom_sys.c b/drivers/hid/wacom_sys.c
index b54ef1ffcbec..ee7a37eb159a 100644
--- a/drivers/hid/wacom_sys.c
+++ b/drivers/hid/wacom_sys.c
@@ -1213,8 +1213,10 @@ static int __wacom_devm_sysfs_create_group(struct wacom *wacom,
devres->root = root;
error = sysfs_create_group(devres->root, group);
- if (error)
+ if (error) {
+ devres_free(devres);
return error;
+ }
devres_add(&wacom->hdev->dev, devres);
diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig
index f249a4428458..f10840ad465c 100644
--- a/drivers/hwmon/Kconfig
+++ b/drivers/hwmon/Kconfig
@@ -272,7 +272,7 @@ config SENSORS_K8TEMP
config SENSORS_K10TEMP
tristate "AMD Family 10h+ temperature sensor"
- depends on X86 && PCI
+ depends on X86 && PCI && AMD_NB
help
If you say yes here you get support for the temperature
sensor(s) inside your CPU. Supported are later revisions of
@@ -717,15 +717,12 @@ config SENSORS_LTC2945
be called ltc2945.
config SENSORS_LTC2990
- tristate "Linear Technology LTC2990 (current monitoring mode only)"
+ tristate "Linear Technology LTC2990"
depends on I2C
help
If you say yes here you get support for Linear Technology LTC2990
I2C System Monitor. The LTC2990 supports a combination of voltage,
- current and temperature monitoring, but in addition to the Vcc supply
- voltage and chip temperature, this driver currently only supports
- reading two currents by measuring two differential voltages across
- series resistors.
+ current and temperature monitoring.
This driver can also be built as a module. If so, the module will
be called ltc2990.
diff --git a/drivers/hwmon/asus_atk0110.c b/drivers/hwmon/asus_atk0110.c
index 975c43d446f8..a6636fe42189 100644
--- a/drivers/hwmon/asus_atk0110.c
+++ b/drivers/hwmon/asus_atk0110.c
@@ -125,6 +125,8 @@ struct atk_data {
int temperature_count;
int fan_count;
struct list_head sensor_list;
+ struct attribute_group attr_group;
+ const struct attribute_group *attr_groups[2];
struct {
struct dentry *root;
@@ -188,7 +190,6 @@ static int atk_add(struct acpi_device *device);
static int atk_remove(struct acpi_device *device);
static void atk_print_sensor(struct atk_data *data, union acpi_object *obj);
static int atk_read_value(struct atk_sensor_data *sensor, u64 *value);
-static void atk_free_sensors(struct atk_data *data);
static struct acpi_driver atk_driver = {
.name = ATK_HID,
@@ -262,14 +263,6 @@ static ssize_t atk_limit2_show(struct device *dev,
return sprintf(buf, "%lld\n", value);
}
-static ssize_t atk_name_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- return sprintf(buf, "atk0110\n");
-}
-static struct device_attribute atk_name_attr =
- __ATTR(name, 0444, atk_name_show, NULL);
-
static void atk_init_attribute(struct device_attribute *attr, char *name,
sysfs_show_func show)
{
@@ -912,15 +905,13 @@ static int atk_add_sensor(struct atk_data *data, union acpi_object *obj)
limit1 = atk_get_pack_member(data, obj, HWMON_PACK_LIMIT1);
limit2 = atk_get_pack_member(data, obj, HWMON_PACK_LIMIT2);
- sensor = kzalloc(sizeof(*sensor), GFP_KERNEL);
+ sensor = devm_kzalloc(dev, sizeof(*sensor), GFP_KERNEL);
if (!sensor)
return -ENOMEM;
- sensor->acpi_name = kstrdup(name->string.pointer, GFP_KERNEL);
- if (!sensor->acpi_name) {
- err = -ENOMEM;
- goto out;
- }
+ sensor->acpi_name = devm_kstrdup(dev, name->string.pointer, GFP_KERNEL);
+ if (!sensor->acpi_name)
+ return -ENOMEM;
INIT_LIST_HEAD(&sensor->list);
sensor->type = type;
@@ -961,9 +952,6 @@ static int atk_add_sensor(struct atk_data *data, union acpi_object *obj)
(*num)++;
return 1;
-out:
- kfree(sensor);
- return err;
}
static int atk_enumerate_old_hwmon(struct atk_data *data)
@@ -1004,8 +992,7 @@ static int atk_enumerate_old_hwmon(struct atk_data *data)
dev_warn(dev, METHOD_OLD_ENUM_TMP ": ACPI exception: %s\n",
acpi_format_exception(status));
- ret = -ENODEV;
- goto cleanup;
+ return -ENODEV;
}
pack = buf.pointer;
@@ -1026,8 +1013,7 @@ static int atk_enumerate_old_hwmon(struct atk_data *data)
dev_warn(dev, METHOD_OLD_ENUM_FAN ": ACPI exception: %s\n",
acpi_format_exception(status));
- ret = -ENODEV;
- goto cleanup;
+ return -ENODEV;
}
pack = buf.pointer;
@@ -1041,9 +1027,6 @@ static int atk_enumerate_old_hwmon(struct atk_data *data)
ACPI_FREE(buf.pointer);
return count;
-cleanup:
- atk_free_sensors(data);
- return ret;
}
static int atk_ec_present(struct atk_data *data)
@@ -1193,76 +1176,44 @@ static int atk_enumerate_new_hwmon(struct atk_data *data)
return err;
}
-static int atk_create_files(struct atk_data *data)
+static int atk_init_attribute_groups(struct atk_data *data)
{
+ struct device *dev = &data->acpi_dev->dev;
struct atk_sensor_data *s;
- int err;
+ struct attribute **attrs;
+ int i = 0;
+ int len = (data->voltage_count + data->temperature_count
+ + data->fan_count) * 4 + 1;
- list_for_each_entry(s, &data->sensor_list, list) {
- err = device_create_file(data->hwmon_dev, &s->input_attr);
- if (err)
- return err;
- err = device_create_file(data->hwmon_dev, &s->label_attr);
- if (err)
- return err;
- err = device_create_file(data->hwmon_dev, &s->limit1_attr);
- if (err)
- return err;
- err = device_create_file(data->hwmon_dev, &s->limit2_attr);
- if (err)
- return err;
- }
-
- err = device_create_file(data->hwmon_dev, &atk_name_attr);
-
- return err;
-}
-
-static void atk_remove_files(struct atk_data *data)
-{
- struct atk_sensor_data *s;
+ attrs = devm_kcalloc(dev, len, sizeof(struct attribute *), GFP_KERNEL);
+ if (!attrs)
+ return -ENOMEM;
list_for_each_entry(s, &data->sensor_list, list) {
- device_remove_file(data->hwmon_dev, &s->input_attr);
- device_remove_file(data->hwmon_dev, &s->label_attr);
- device_remove_file(data->hwmon_dev, &s->limit1_attr);
- device_remove_file(data->hwmon_dev, &s->limit2_attr);
+ attrs[i++] = &s->input_attr.attr;
+ attrs[i++] = &s->label_attr.attr;
+ attrs[i++] = &s->limit1_attr.attr;
+ attrs[i++] = &s->limit2_attr.attr;
}
- device_remove_file(data->hwmon_dev, &atk_name_attr);
-}
-static void atk_free_sensors(struct atk_data *data)
-{
- struct list_head *head = &data->sensor_list;
- struct atk_sensor_data *s, *tmp;
+ data->attr_group.attrs = attrs;
+ data->attr_groups[0] = &data->attr_group;
- list_for_each_entry_safe(s, tmp, head, list) {
- kfree(s->acpi_name);
- kfree(s);
- }
+ return 0;
}
static int atk_register_hwmon(struct atk_data *data)
{
struct device *dev = &data->acpi_dev->dev;
- int err;
dev_dbg(dev, "registering hwmon device\n");
- data->hwmon_dev = hwmon_device_register(dev);
+ data->hwmon_dev = hwmon_device_register_with_groups(dev, "atk0110",
+ data,
+ data->attr_groups);
if (IS_ERR(data->hwmon_dev))
return PTR_ERR(data->hwmon_dev);
- dev_dbg(dev, "populating sysfs directory\n");
- err = atk_create_files(data);
- if (err)
- goto remove;
-
return 0;
-remove:
- /* Cleanup the registered files */
- atk_remove_files(data);
- hwmon_device_unregister(data->hwmon_dev);
- return err;
}
static int atk_probe_if(struct atk_data *data)
@@ -1350,7 +1301,7 @@ static int atk_add(struct acpi_device *device)
dev_dbg(&device->dev, "adding...\n");
- data = kzalloc(sizeof(*data), GFP_KERNEL);
+ data = devm_kzalloc(&device->dev, sizeof(*data), GFP_KERNEL);
if (!data)
return -ENOMEM;
@@ -1397,20 +1348,20 @@ static int atk_add(struct acpi_device *device)
goto out;
}
+ err = atk_init_attribute_groups(data);
+ if (err)
+ goto out;
err = atk_register_hwmon(data);
if (err)
- goto cleanup;
+ goto out;
atk_debugfs_init(data);
device->driver_data = data;
return 0;
-cleanup:
- atk_free_sensors(data);
out:
if (data->disable_ec)
atk_ec_ctl(data, 0);
- kfree(data);
return err;
}
@@ -1423,8 +1374,6 @@ static int atk_remove(struct acpi_device *device)
atk_debugfs_cleanup(data);
- atk_remove_files(data);
- atk_free_sensors(data);
hwmon_device_unregister(data->hwmon_dev);
if (data->disable_ec) {
@@ -1432,8 +1381,6 @@ static int atk_remove(struct acpi_device *device)
dev_err(&device->dev, "Failed to disable EC\n");
}
- kfree(data);
-
return 0;
}
diff --git a/drivers/hwmon/fschmd.c b/drivers/hwmon/fschmd.c
index 5e78229ade04..22d3a84f13ef 100644
--- a/drivers/hwmon/fschmd.c
+++ b/drivers/hwmon/fschmd.c
@@ -105,7 +105,7 @@ static const u8 FSCHMD_REG_VOLT[7][6] = {
static const int FSCHMD_NO_VOLT_SENSORS[7] = { 3, 3, 3, 3, 3, 3, 6 };
/*
- * minimum pwm at which the fan is driven (pwm can by increased depending on
+ * minimum pwm at which the fan is driven (pwm can be increased depending on
* the temp. Notice that for the scy some fans share there minimum speed.
* Also notice that with the scy the sensor order is different than with the
* other chips, this order was in the 2.4 driver and kept for consistency.
diff --git a/drivers/hwmon/hwmon.c b/drivers/hwmon/hwmon.c
index 32083e452cde..e88c01961948 100644
--- a/drivers/hwmon/hwmon.c
+++ b/drivers/hwmon/hwmon.c
@@ -698,6 +698,9 @@ hwmon_device_register_with_info(struct device *dev, const char *name,
if (chip && (!chip->ops || !chip->ops->is_visible || !chip->info))
return ERR_PTR(-EINVAL);
+ if (chip && !dev)
+ return ERR_PTR(-EINVAL);
+
return __hwmon_device_register(dev, name, drvdata, chip, extra_groups);
}
EXPORT_SYMBOL_GPL(hwmon_device_register_with_info);
diff --git a/drivers/hwmon/k10temp.c b/drivers/hwmon/k10temp.c
index d2cc55e21374..17c6460ae351 100644
--- a/drivers/hwmon/k10temp.c
+++ b/drivers/hwmon/k10temp.c
@@ -23,6 +23,7 @@
#include <linux/init.h>
#include <linux/module.h>
#include <linux/pci.h>
+#include <asm/amd_nb.h>
#include <asm/processor.h>
MODULE_DESCRIPTION("AMD Family 10h+ CPU core temperature monitor");
@@ -36,12 +37,16 @@ MODULE_PARM_DESC(force, "force loading on processors with erratum 319");
/* Provide lock for writing to NB_SMU_IND_ADDR */
static DEFINE_MUTEX(nb_smu_ind_mutex);
+#ifndef PCI_DEVICE_ID_AMD_15H_M70H_NB_F3
+#define PCI_DEVICE_ID_AMD_15H_M70H_NB_F3 0x15b3
+#endif
+
#ifndef PCI_DEVICE_ID_AMD_17H_DF_F3
#define PCI_DEVICE_ID_AMD_17H_DF_F3 0x1463
#endif
-#ifndef PCI_DEVICE_ID_AMD_17H_RR_NB
-#define PCI_DEVICE_ID_AMD_17H_RR_NB 0x15d0
+#ifndef PCI_DEVICE_ID_AMD_17H_M10H_DF_F3
+#define PCI_DEVICE_ID_AMD_17H_M10H_DF_F3 0x15eb
#endif
/* CPUID function 0x80000001, ebx */
@@ -63,10 +68,12 @@ static DEFINE_MUTEX(nb_smu_ind_mutex);
#define NB_CAP_HTC 0x00000400
/*
- * For F15h M60h, functionality of REG_REPORTED_TEMPERATURE
- * has been moved to D0F0xBC_xD820_0CA4 [Reported Temperature
- * Control]
+ * For F15h M60h and M70h, REG_HARDWARE_THERMAL_CONTROL
+ * and REG_REPORTED_TEMPERATURE have been moved to
+ * D0F0xBC_xD820_0C64 [Hardware Temperature Control]
+ * D0F0xBC_xD820_0CA4 [Reported Temperature Control]
*/
+#define F15H_M60H_HARDWARE_TEMP_CTRL_OFFSET 0xd8200c64
#define F15H_M60H_REPORTED_TEMP_CTRL_OFFSET 0xd8200ca4
/* F17h M01h Access througn SMN */
@@ -74,9 +81,11 @@ static DEFINE_MUTEX(nb_smu_ind_mutex);
struct k10temp_data {
struct pci_dev *pdev;
+ void (*read_htcreg)(struct pci_dev *pdev, u32 *regval);
void (*read_tempreg)(struct pci_dev *pdev, u32 *regval);
int temp_offset;
u32 temp_adjust_mask;
+ bool show_tdie;
};
struct tctl_offset {
@@ -98,6 +107,11 @@ static const struct tctl_offset tctl_offset_table[] = {
{ 0x17, "AMD Ryzen Threadripper 1910", 10000 },
};
+static void read_htcreg_pci(struct pci_dev *pdev, u32 *regval)
+{
+ pci_read_config_dword(pdev, REG_HARDWARE_THERMAL_CONTROL, regval);
+}
+
static void read_tempreg_pci(struct pci_dev *pdev, u32 *regval)
{
pci_read_config_dword(pdev, REG_REPORTED_TEMPERATURE, regval);
@@ -114,6 +128,12 @@ static void amd_nb_index_read(struct pci_dev *pdev, unsigned int devfn,
mutex_unlock(&nb_smu_ind_mutex);
}
+static void read_htcreg_nb_f15(struct pci_dev *pdev, u32 *regval)
+{
+ amd_nb_index_read(pdev, PCI_DEVFN(0, 0), 0xb8,
+ F15H_M60H_HARDWARE_TEMP_CTRL_OFFSET, regval);
+}
+
static void read_tempreg_nb_f15(struct pci_dev *pdev, u32 *regval)
{
amd_nb_index_read(pdev, PCI_DEVFN(0, 0), 0xb8,
@@ -122,21 +142,28 @@ static void read_tempreg_nb_f15(struct pci_dev *pdev, u32 *regval)
static void read_tempreg_nb_f17(struct pci_dev *pdev, u32 *regval)
{
- amd_nb_index_read(pdev, PCI_DEVFN(0, 0), 0x60,
- F17H_M01H_REPORTED_TEMP_CTRL_OFFSET, regval);
+ amd_smn_read(amd_pci_dev_to_node_id(pdev),
+ F17H_M01H_REPORTED_TEMP_CTRL_OFFSET, regval);
}
-static ssize_t temp1_input_show(struct device *dev,
- struct device_attribute *attr, char *buf)
+static unsigned int get_raw_temp(struct k10temp_data *data)
{
- struct k10temp_data *data = dev_get_drvdata(dev);
- u32 regval;
unsigned int temp;
+ u32 regval;
data->read_tempreg(data->pdev, &regval);
temp = (regval >> 21) * 125;
if (regval & data->temp_adjust_mask)
temp -= 49000;
+ return temp;
+}
+
+static ssize_t temp1_input_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct k10temp_data *data = dev_get_drvdata(dev);
+ unsigned int temp = get_raw_temp(data);
+
if (temp > data->temp_offset)
temp -= data->temp_offset;
else
@@ -145,6 +172,23 @@ static ssize_t temp1_input_show(struct device *dev,
return sprintf(buf, "%u\n", temp);
}
+static ssize_t temp2_input_show(struct device *dev,
+ struct device_attribute *devattr, char *buf)
+{
+ struct k10temp_data *data = dev_get_drvdata(dev);
+ unsigned int temp = get_raw_temp(data);
+
+ return sprintf(buf, "%u\n", temp);
+}
+
+static ssize_t temp_label_show(struct device *dev,
+ struct device_attribute *devattr, char *buf)
+{
+ struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr);
+
+ return sprintf(buf, "%s\n", attr->index ? "Tctl" : "Tdie");
+}
+
static ssize_t temp1_max_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
@@ -160,8 +204,7 @@ static ssize_t show_temp_crit(struct device *dev,
u32 regval;
int value;
- pci_read_config_dword(data->pdev,
- REG_HARDWARE_THERMAL_CONTROL, &regval);
+ data->read_htcreg(data->pdev, &regval);
value = ((regval >> 16) & 0x7f) * 500 + 52000;
if (show_hyst)
value -= ((regval >> 24) & 0xf) * 500;
@@ -173,22 +216,39 @@ static DEVICE_ATTR_RO(temp1_max);
static SENSOR_DEVICE_ATTR(temp1_crit, S_IRUGO, show_temp_crit, NULL, 0);
static SENSOR_DEVICE_ATTR(temp1_crit_hyst, S_IRUGO, show_temp_crit, NULL, 1);
+static SENSOR_DEVICE_ATTR(temp1_label, 0444, temp_label_show, NULL, 0);
+static DEVICE_ATTR_RO(temp2_input);
+static SENSOR_DEVICE_ATTR(temp2_label, 0444, temp_label_show, NULL, 1);
+
static umode_t k10temp_is_visible(struct kobject *kobj,
struct attribute *attr, int index)
{
struct device *dev = container_of(kobj, struct device, kobj);
struct k10temp_data *data = dev_get_drvdata(dev);
struct pci_dev *pdev = data->pdev;
-
- if (index >= 2) {
- u32 reg_caps, reg_htc;
+ u32 reg;
+
+ switch (index) {
+ case 0 ... 1: /* temp1_input, temp1_max */
+ default:
+ break;
+ case 2 ... 3: /* temp1_crit, temp1_crit_hyst */
+ if (!data->read_htcreg)
+ return 0;
pci_read_config_dword(pdev, REG_NORTHBRIDGE_CAPABILITIES,
- &reg_caps);
- pci_read_config_dword(pdev, REG_HARDWARE_THERMAL_CONTROL,
- &reg_htc);
- if (!(reg_caps & NB_CAP_HTC) || !(reg_htc & HTC_ENABLE))
+ &reg);
+ if (!(reg & NB_CAP_HTC))
+ return 0;
+
+ data->read_htcreg(data->pdev, &reg);
+ if (!(reg & HTC_ENABLE))
+ return 0;
+ break;
+ case 4 ... 6: /* temp1_label, temp2_input, temp2_label */
+ if (!data->show_tdie)
return 0;
+ break;
}
return attr->mode;
}
@@ -198,6 +258,9 @@ static struct attribute *k10temp_attrs[] = {
&dev_attr_temp1_max.attr,
&sensor_dev_attr_temp1_crit.dev_attr.attr,
&sensor_dev_attr_temp1_crit_hyst.dev_attr.attr,
+ &sensor_dev_attr_temp1_label.dev_attr.attr,
+ &dev_attr_temp2_input.attr,
+ &sensor_dev_attr_temp2_label.dev_attr.attr,
NULL
};
@@ -268,11 +331,14 @@ static int k10temp_probe(struct pci_dev *pdev,
if (boot_cpu_data.x86 == 0x15 && (boot_cpu_data.x86_model == 0x60 ||
boot_cpu_data.x86_model == 0x70)) {
+ data->read_htcreg = read_htcreg_nb_f15;
data->read_tempreg = read_tempreg_nb_f15;
} else if (boot_cpu_data.x86 == 0x17) {
data->temp_adjust_mask = 0x80000;
data->read_tempreg = read_tempreg_nb_f17;
+ data->show_tdie = true;
} else {
+ data->read_htcreg = read_htcreg_pci;
data->read_tempreg = read_tempreg_pci;
}
@@ -299,10 +365,11 @@ static const struct pci_device_id k10temp_id_table[] = {
{ PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_15H_M10H_F3) },
{ PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_15H_M30H_NB_F3) },
{ PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_15H_M60H_NB_F3) },
+ { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_15H_M70H_NB_F3) },
{ PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_16H_NB_F3) },
{ PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_16H_M30H_NB_F3) },
{ PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_17H_DF_F3) },
- { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_17H_RR_NB) },
+ { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_17H_M10H_DF_F3) },
{}
};
MODULE_DEVICE_TABLE(pci, k10temp_id_table);
diff --git a/drivers/hwmon/ltc2990.c b/drivers/hwmon/ltc2990.c
index 8f8fe059ab48..2aefdc58b242 100644
--- a/drivers/hwmon/ltc2990.c
+++ b/drivers/hwmon/ltc2990.c
@@ -5,18 +5,16 @@
* Author: Mike Looijmans <mike.looijmans@topic.nl>
*
* License: GPLv2
- *
- * This driver assumes the chip is wired as a dual current monitor, and
- * reports the voltage drop across two series resistors. It also reports
- * the chip's internal temperature and Vcc power supply voltage.
*/
+#include <linux/bitops.h>
#include <linux/err.h>
#include <linux/hwmon.h>
#include <linux/hwmon-sysfs.h>
#include <linux/i2c.h>
#include <linux/kernel.h>
#include <linux/module.h>
+#include <linux/of.h>
#define LTC2990_STATUS 0x00
#define LTC2990_CONTROL 0x01
@@ -28,45 +26,108 @@
#define LTC2990_V4_MSB 0x0C
#define LTC2990_VCC_MSB 0x0E
-#define LTC2990_CONTROL_KELVIN BIT(7)
-#define LTC2990_CONTROL_SINGLE BIT(6)
-#define LTC2990_CONTROL_MEASURE_ALL (0x3 << 3)
-#define LTC2990_CONTROL_MODE_CURRENT 0x06
-#define LTC2990_CONTROL_MODE_VOLTAGE 0x07
+#define LTC2990_IN0 BIT(0)
+#define LTC2990_IN1 BIT(1)
+#define LTC2990_IN2 BIT(2)
+#define LTC2990_IN3 BIT(3)
+#define LTC2990_IN4 BIT(4)
+#define LTC2990_CURR1 BIT(5)
+#define LTC2990_CURR2 BIT(6)
+#define LTC2990_TEMP1 BIT(7)
+#define LTC2990_TEMP2 BIT(8)
+#define LTC2990_TEMP3 BIT(9)
+#define LTC2990_NONE 0
+#define LTC2990_ALL GENMASK(9, 0)
-/* convert raw register value to sign-extended integer in 16-bit range */
-static int ltc2990_voltage_to_int(int raw)
-{
- if (raw & BIT(14))
- return -(0x4000 - (raw & 0x3FFF)) << 2;
- else
- return (raw & 0x3FFF) << 2;
-}
+#define LTC2990_MODE0_SHIFT 0
+#define LTC2990_MODE0_MASK GENMASK(2, 0)
+#define LTC2990_MODE1_SHIFT 3
+#define LTC2990_MODE1_MASK GENMASK(1, 0)
+
+/* Enabled measurements for mode bits 2..0 */
+static const int ltc2990_attrs_ena_0[] = {
+ LTC2990_IN1 | LTC2990_IN2 | LTC2990_TEMP3,
+ LTC2990_CURR1 | LTC2990_TEMP3,
+ LTC2990_CURR1 | LTC2990_IN3 | LTC2990_IN4,
+ LTC2990_TEMP2 | LTC2990_IN3 | LTC2990_IN4,
+ LTC2990_TEMP2 | LTC2990_CURR2,
+ LTC2990_TEMP2 | LTC2990_TEMP3,
+ LTC2990_CURR1 | LTC2990_CURR2,
+ LTC2990_IN1 | LTC2990_IN2 | LTC2990_IN3 | LTC2990_IN4
+};
+
+/* Enabled measurements for mode bits 4..3 */
+static const int ltc2990_attrs_ena_1[] = {
+ LTC2990_NONE,
+ LTC2990_TEMP2 | LTC2990_IN1 | LTC2990_CURR1,
+ LTC2990_TEMP3 | LTC2990_IN3 | LTC2990_CURR2,
+ LTC2990_ALL
+};
+
+struct ltc2990_data {
+ struct i2c_client *i2c;
+ u32 mode[2];
+};
/* Return the converted value from the given register in uV or mC */
-static int ltc2990_get_value(struct i2c_client *i2c, u8 reg, int *result)
+static int ltc2990_get_value(struct i2c_client *i2c, int index, int *result)
{
int val;
+ u8 reg;
+
+ switch (index) {
+ case LTC2990_IN0:
+ reg = LTC2990_VCC_MSB;
+ break;
+ case LTC2990_IN1:
+ case LTC2990_CURR1:
+ case LTC2990_TEMP2:
+ reg = LTC2990_V1_MSB;
+ break;
+ case LTC2990_IN2:
+ reg = LTC2990_V2_MSB;
+ break;
+ case LTC2990_IN3:
+ case LTC2990_CURR2:
+ case LTC2990_TEMP3:
+ reg = LTC2990_V3_MSB;
+ break;
+ case LTC2990_IN4:
+ reg = LTC2990_V4_MSB;
+ break;
+ case LTC2990_TEMP1:
+ reg = LTC2990_TINT_MSB;
+ break;
+ default:
+ return -EINVAL;
+ }
val = i2c_smbus_read_word_swapped(i2c, reg);
if (unlikely(val < 0))
return val;
- switch (reg) {
- case LTC2990_TINT_MSB:
- /* internal temp, 0.0625 degrees/LSB, 13-bit */
- val = (val & 0x1FFF) << 3;
- *result = (val * 1000) >> 7;
+ switch (index) {
+ case LTC2990_TEMP1:
+ case LTC2990_TEMP2:
+ case LTC2990_TEMP3:
+ /* temp, 0.0625 degrees/LSB */
+ *result = sign_extend32(val, 12) * 1000 / 16;
break;
- case LTC2990_V1_MSB:
- case LTC2990_V3_MSB:
- /* Vx-Vy, 19.42uV/LSB. Depends on mode. */
- *result = ltc2990_voltage_to_int(val) * 1942 / (4 * 100);
+ case LTC2990_CURR1:
+ case LTC2990_CURR2:
+ /* Vx-Vy, 19.42uV/LSB */
+ *result = sign_extend32(val, 14) * 1942 / 100;
break;
- case LTC2990_VCC_MSB:
- /* Vcc, 305.18μV/LSB, 2.5V offset */
- *result = (ltc2990_voltage_to_int(val) * 30518 /
- (4 * 100 * 1000)) + 2500;
+ case LTC2990_IN0:
+ /* Vcc, 305.18uV/LSB, 2.5V offset */
+ *result = sign_extend32(val, 14) * 30518 / (100 * 1000) + 2500;
+ break;
+ case LTC2990_IN1:
+ case LTC2990_IN2:
+ case LTC2990_IN3:
+ case LTC2990_IN4:
+ /* Vx, 305.18uV/LSB */
+ *result = sign_extend32(val, 14) * 30518 / (100 * 1000);
break;
default:
return -EINVAL; /* won't happen, keep compiler happy */
@@ -79,48 +140,117 @@ static ssize_t ltc2990_show_value(struct device *dev,
struct device_attribute *da, char *buf)
{
struct sensor_device_attribute *attr = to_sensor_dev_attr(da);
+ struct ltc2990_data *data = dev_get_drvdata(dev);
int value;
int ret;
- ret = ltc2990_get_value(dev_get_drvdata(dev), attr->index, &value);
+ ret = ltc2990_get_value(data->i2c, attr->index, &value);
if (unlikely(ret < 0))
return ret;
return snprintf(buf, PAGE_SIZE, "%d\n", value);
}
+static umode_t ltc2990_attrs_visible(struct kobject *kobj,
+ struct attribute *a, int n)
+{
+ struct device *dev = container_of(kobj, struct device, kobj);
+ struct ltc2990_data *data = dev_get_drvdata(dev);
+ struct device_attribute *da =
+ container_of(a, struct device_attribute, attr);
+ struct sensor_device_attribute *attr = to_sensor_dev_attr(da);
+
+ int attrs_mask = LTC2990_IN0 | LTC2990_TEMP1 |
+ (ltc2990_attrs_ena_0[data->mode[0]] &
+ ltc2990_attrs_ena_1[data->mode[1]]);
+
+ if (attr->index & attrs_mask)
+ return a->mode;
+
+ return 0;
+}
+
static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, ltc2990_show_value, NULL,
- LTC2990_TINT_MSB);
+ LTC2990_TEMP1);
+static SENSOR_DEVICE_ATTR(temp2_input, S_IRUGO, ltc2990_show_value, NULL,
+ LTC2990_TEMP2);
+static SENSOR_DEVICE_ATTR(temp3_input, S_IRUGO, ltc2990_show_value, NULL,
+ LTC2990_TEMP3);
static SENSOR_DEVICE_ATTR(curr1_input, S_IRUGO, ltc2990_show_value, NULL,
- LTC2990_V1_MSB);
+ LTC2990_CURR1);
static SENSOR_DEVICE_ATTR(curr2_input, S_IRUGO, ltc2990_show_value, NULL,
- LTC2990_V3_MSB);
+ LTC2990_CURR2);
static SENSOR_DEVICE_ATTR(in0_input, S_IRUGO, ltc2990_show_value, NULL,
- LTC2990_VCC_MSB);
+ LTC2990_IN0);
+static SENSOR_DEVICE_ATTR(in1_input, S_IRUGO, ltc2990_show_value, NULL,
+ LTC2990_IN1);
+static SENSOR_DEVICE_ATTR(in2_input, S_IRUGO, ltc2990_show_value, NULL,
+ LTC2990_IN2);
+static SENSOR_DEVICE_ATTR(in3_input, S_IRUGO, ltc2990_show_value, NULL,
+ LTC2990_IN3);
+static SENSOR_DEVICE_ATTR(in4_input, S_IRUGO, ltc2990_show_value, NULL,
+ LTC2990_IN4);
static struct attribute *ltc2990_attrs[] = {
&sensor_dev_attr_temp1_input.dev_attr.attr,
+ &sensor_dev_attr_temp2_input.dev_attr.attr,
+ &sensor_dev_attr_temp3_input.dev_attr.attr,
&sensor_dev_attr_curr1_input.dev_attr.attr,
&sensor_dev_attr_curr2_input.dev_attr.attr,
&sensor_dev_attr_in0_input.dev_attr.attr,
+ &sensor_dev_attr_in1_input.dev_attr.attr,
+ &sensor_dev_attr_in2_input.dev_attr.attr,
+ &sensor_dev_attr_in3_input.dev_attr.attr,
+ &sensor_dev_attr_in4_input.dev_attr.attr,
NULL,
};
-ATTRIBUTE_GROUPS(ltc2990);
+
+static const struct attribute_group ltc2990_group = {
+ .attrs = ltc2990_attrs,
+ .is_visible = ltc2990_attrs_visible,
+};
+__ATTRIBUTE_GROUPS(ltc2990);
static int ltc2990_i2c_probe(struct i2c_client *i2c,
const struct i2c_device_id *id)
{
int ret;
struct device *hwmon_dev;
+ struct ltc2990_data *data;
+ struct device_node *of_node = i2c->dev.of_node;
if (!i2c_check_functionality(i2c->adapter, I2C_FUNC_SMBUS_BYTE_DATA |
I2C_FUNC_SMBUS_WORD_DATA))
return -ENODEV;
- /* Setup continuous mode, current monitor */
+ data = devm_kzalloc(&i2c->dev, sizeof(struct ltc2990_data), GFP_KERNEL);
+ if (unlikely(!data))
+ return -ENOMEM;
+
+ data->i2c = i2c;
+
+ if (of_node) {
+ ret = of_property_read_u32_array(of_node, "lltc,meas-mode",
+ data->mode, 2);
+ if (ret < 0)
+ return ret;
+
+ if (data->mode[0] & ~LTC2990_MODE0_MASK ||
+ data->mode[1] & ~LTC2990_MODE1_MASK)
+ return -EINVAL;
+ } else {
+ ret = i2c_smbus_read_byte_data(i2c, LTC2990_CONTROL);
+ if (ret < 0)
+ return ret;
+
+ data->mode[0] = ret >> LTC2990_MODE0_SHIFT & LTC2990_MODE0_MASK;
+ data->mode[1] = ret >> LTC2990_MODE1_SHIFT & LTC2990_MODE1_MASK;
+ }
+
+ /* Setup continuous mode */
ret = i2c_smbus_write_byte_data(i2c, LTC2990_CONTROL,
- LTC2990_CONTROL_MEASURE_ALL |
- LTC2990_CONTROL_MODE_CURRENT);
+ data->mode[0] << LTC2990_MODE0_SHIFT |
+ data->mode[1] << LTC2990_MODE1_SHIFT);
if (ret < 0) {
dev_err(&i2c->dev, "Error: Failed to set control mode.\n");
return ret;
@@ -134,7 +264,7 @@ static int ltc2990_i2c_probe(struct i2c_client *i2c,
hwmon_dev = devm_hwmon_device_register_with_groups(&i2c->dev,
i2c->name,
- i2c,
+ data,
ltc2990_groups);
return PTR_ERR_OR_ZERO(hwmon_dev);
diff --git a/drivers/hwmon/mc13783-adc.c b/drivers/hwmon/mc13783-adc.c
index 960a1db6f269..67860ad2e3d9 100644
--- a/drivers/hwmon/mc13783-adc.c
+++ b/drivers/hwmon/mc13783-adc.c
@@ -63,6 +63,10 @@ static int mc13783_adc_read(struct device *dev,
if (ret)
return ret;
+ /* ADIN7 subchannels */
+ if (channel >= 16)
+ channel = 7;
+
channel &= 0x7;
*val = (sample[channel % 4] >> (channel > 3 ? 14 : 2)) & 0x3ff;
@@ -111,6 +115,57 @@ static ssize_t mc13783_adc_read_gp(struct device *dev,
return sprintf(buf, "%u\n", val);
}
+static ssize_t mc13783_adc_read_uid(struct device *dev,
+ struct device_attribute *devattr, char *buf)
+{
+ unsigned int val;
+ struct platform_device *pdev = to_platform_device(dev);
+ kernel_ulong_t driver_data = platform_get_device_id(pdev)->driver_data;
+ int ret = mc13783_adc_read(dev, devattr, &val);
+
+ if (ret)
+ return ret;
+
+ if (driver_data & MC13783_ADC_BPDIV2)
+ /* MC13892 have 1/2 divider, input range is [0, 4.800V] */
+ val = DIV_ROUND_CLOSEST(val * 4800, 1024);
+ else
+ /* MC13783 have 0.9 divider, input range is [0, 2.555V] */
+ val = DIV_ROUND_CLOSEST(val * 2555, 1024);
+
+ return sprintf(buf, "%u\n", val);
+}
+
+static ssize_t mc13783_adc_read_temp(struct device *dev,
+ struct device_attribute *devattr, char *buf)
+{
+ unsigned int val;
+ struct platform_device *pdev = to_platform_device(dev);
+ kernel_ulong_t driver_data = platform_get_device_id(pdev)->driver_data;
+ int ret = mc13783_adc_read(dev, devattr, &val);
+
+ if (ret)
+ return ret;
+
+ if (driver_data & MC13783_ADC_BPDIV2) {
+ /*
+ * MC13892:
+ * Die Temperature Read Out Code at 25C 680
+ * Temperature change per LSB +0.4244C
+ */
+ ret = DIV_ROUND_CLOSEST(-2635920 + val * 4244, 10);
+ } else {
+ /*
+ * MC13783:
+ * Die Temperature Read Out Code at 25C 282
+ * Temperature change per LSB -1.14C
+ */
+ ret = 346480 - 1140 * val;
+ }
+
+ return sprintf(buf, "%d\n", ret);
+}
+
static DEVICE_ATTR_RO(name);
static SENSOR_DEVICE_ATTR(in2_input, S_IRUGO, mc13783_adc_read_bp, NULL, 2);
static SENSOR_DEVICE_ATTR(in5_input, S_IRUGO, mc13783_adc_read_gp, NULL, 5);
@@ -124,6 +179,9 @@ static SENSOR_DEVICE_ATTR(in12_input, S_IRUGO, mc13783_adc_read_gp, NULL, 12);
static SENSOR_DEVICE_ATTR(in13_input, S_IRUGO, mc13783_adc_read_gp, NULL, 13);
static SENSOR_DEVICE_ATTR(in14_input, S_IRUGO, mc13783_adc_read_gp, NULL, 14);
static SENSOR_DEVICE_ATTR(in15_input, S_IRUGO, mc13783_adc_read_gp, NULL, 15);
+static SENSOR_DEVICE_ATTR(in16_input, S_IRUGO, mc13783_adc_read_uid, NULL, 16);
+static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO,
+ mc13783_adc_read_temp, NULL, 17);
static struct attribute *mc13783_attr_base[] = {
&dev_attr_name.attr,
@@ -131,6 +189,8 @@ static struct attribute *mc13783_attr_base[] = {
&sensor_dev_attr_in5_input.dev_attr.attr,
&sensor_dev_attr_in6_input.dev_attr.attr,
&sensor_dev_attr_in7_input.dev_attr.attr,
+ &sensor_dev_attr_in16_input.dev_attr.attr,
+ &sensor_dev_attr_temp1_input.dev_attr.attr,
NULL
};
diff --git a/drivers/hwtracing/intel_th/msu.c b/drivers/hwtracing/intel_th/msu.c
index ede388309376..634f58042c77 100644
--- a/drivers/hwtracing/intel_th/msu.c
+++ b/drivers/hwtracing/intel_th/msu.c
@@ -733,8 +733,8 @@ err_nomem:
/* Reset the page to write-back before releasing */
set_memory_wb((unsigned long)win->block[i].bdesc, 1);
#endif
- dma_free_coherent(msc_dev(msc), size, win->block[i].bdesc,
- win->block[i].addr);
+ dma_free_coherent(msc_dev(msc)->parent->parent, size,
+ win->block[i].bdesc, win->block[i].addr);
}
kfree(win);
@@ -769,7 +769,7 @@ static void msc_buffer_win_free(struct msc *msc, struct msc_window *win)
/* Reset the page to write-back before releasing */
set_memory_wb((unsigned long)win->block[i].bdesc, 1);
#endif
- dma_free_coherent(msc_dev(win->msc), PAGE_SIZE,
+ dma_free_coherent(msc_dev(win->msc)->parent->parent, PAGE_SIZE,
win->block[i].bdesc, win->block[i].addr);
}
diff --git a/drivers/hwtracing/stm/core.c b/drivers/hwtracing/stm/core.c
index 05386b76465e..10bcb5d73f90 100644
--- a/drivers/hwtracing/stm/core.c
+++ b/drivers/hwtracing/stm/core.c
@@ -19,6 +19,7 @@
#include <linux/stm.h>
#include <linux/fs.h>
#include <linux/mm.h>
+#include <linux/vmalloc.h>
#include "stm.h"
#include <uapi/linux/stm.h>
@@ -674,7 +675,7 @@ static void stm_device_release(struct device *dev)
{
struct stm_device *stm = to_stm_device(dev);
- kfree(stm);
+ vfree(stm);
}
int stm_register_device(struct device *parent, struct stm_data *stm_data,
@@ -691,7 +692,7 @@ int stm_register_device(struct device *parent, struct stm_data *stm_data,
return -EINVAL;
nmasters = stm_data->sw_end - stm_data->sw_start + 1;
- stm = kzalloc(sizeof(*stm) + nmasters * sizeof(void *), GFP_KERNEL);
+ stm = vzalloc(sizeof(*stm) + nmasters * sizeof(void *));
if (!stm)
return -ENOMEM;
@@ -744,7 +745,7 @@ err_device:
/* matches device_initialize() above */
put_device(&stm->dev);
err_free:
- kfree(stm);
+ vfree(stm);
return err;
}
diff --git a/drivers/i2c/busses/i2c-designware-master.c b/drivers/i2c/busses/i2c-designware-master.c
index fd36c39ddf4e..0cdba29ae0a9 100644
--- a/drivers/i2c/busses/i2c-designware-master.c
+++ b/drivers/i2c/busses/i2c-designware-master.c
@@ -209,7 +209,10 @@ static void i2c_dw_xfer_init(struct dw_i2c_dev *dev)
i2c_dw_disable_int(dev);
/* Enable the adapter */
- __i2c_dw_enable_and_wait(dev, true);
+ __i2c_dw_enable(dev, true);
+
+ /* Dummy read to avoid the register getting stuck on Bay Trail */
+ dw_readl(dev, DW_IC_ENABLE_STATUS);
/* Clear and enable interrupts */
dw_readl(dev, DW_IC_CLR_INTR);
diff --git a/drivers/i2c/busses/i2c-ocores.c b/drivers/i2c/busses/i2c-ocores.c
index 8c42ca7107b2..45ae3c025bf6 100644
--- a/drivers/i2c/busses/i2c-ocores.c
+++ b/drivers/i2c/busses/i2c-ocores.c
@@ -1,6 +1,6 @@
/*
* i2c-ocores.c: I2C bus driver for OpenCores I2C controller
- * (http://www.opencores.org/projects.cgi/web/i2c/overview).
+ * (https://opencores.org/project/i2c/overview)
*
* Peter Korsgaard <jacmet@sunsite.dk>
*
diff --git a/drivers/i2c/busses/i2c-pmcmsp.c b/drivers/i2c/busses/i2c-pmcmsp.c
index 2aa0e83174c5..dae8ac618a52 100644
--- a/drivers/i2c/busses/i2c-pmcmsp.c
+++ b/drivers/i2c/busses/i2c-pmcmsp.c
@@ -564,10 +564,10 @@ static int pmcmsptwi_master_xfer(struct i2c_adapter *adap,
* TODO: We could potentially loop and retry in the case
* of MSP_TWI_XFER_TIMEOUT.
*/
- return -1;
+ return -EIO;
}
- return 0;
+ return num;
}
static u32 pmcmsptwi_i2c_func(struct i2c_adapter *adapter)
diff --git a/drivers/i2c/busses/i2c-viperboard.c b/drivers/i2c/busses/i2c-viperboard.c
index e4be86b3de9a..7235c7302bb7 100644
--- a/drivers/i2c/busses/i2c-viperboard.c
+++ b/drivers/i2c/busses/i2c-viperboard.c
@@ -337,7 +337,7 @@ static int vprbrd_i2c_xfer(struct i2c_adapter *i2c, struct i2c_msg *msgs,
}
mutex_unlock(&vb->lock);
}
- return 0;
+ return num;
error:
mutex_unlock(&vb->lock);
return error;
diff --git a/drivers/i2c/i2c-core-acpi.c b/drivers/i2c/i2c-core-acpi.c
index a9126b3cda61..7c3b4740b94b 100644
--- a/drivers/i2c/i2c-core-acpi.c
+++ b/drivers/i2c/i2c-core-acpi.c
@@ -445,10 +445,17 @@ static int acpi_gsb_i2c_read_bytes(struct i2c_client *client,
msgs[1].buf = buffer;
ret = i2c_transfer(client->adapter, msgs, ARRAY_SIZE(msgs));
- if (ret < 0)
- dev_err(&client->adapter->dev, "i2c read failed\n");
- else
+ if (ret < 0) {
+ /* Getting a NACK is unfortunately normal with some DSTDs */
+ if (ret == -EREMOTEIO)
+ dev_dbg(&client->adapter->dev, "i2c read %d bytes from client@%#x starting at reg %#x failed, error: %d\n",
+ data_len, client->addr, cmd, ret);
+ else
+ dev_err(&client->adapter->dev, "i2c read %d bytes from client@%#x starting at reg %#x failed, error: %d\n",
+ data_len, client->addr, cmd, ret);
+ } else {
memcpy(data, buffer, data_len);
+ }
kfree(buffer);
return ret;
diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c
index 0e6bc631a1ca..8b2b72b93885 100644
--- a/drivers/ide/ide-atapi.c
+++ b/drivers/ide/ide-atapi.c
@@ -92,7 +92,7 @@ int ide_queue_pc_tail(ide_drive_t *drive, struct gendisk *disk,
struct request *rq;
int error;
- rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM);
+ rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, 0);
ide_req(rq)->type = ATA_PRIV_MISC;
rq->special = (char *)pc;
diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c
index 5a8e8e3c22cd..5f178384876f 100644
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c
@@ -437,7 +437,7 @@ int ide_cd_queue_pc(ide_drive_t *drive, const unsigned char *cmd,
bool delay = false;
rq = blk_get_request(drive->queue,
- write ? REQ_OP_DRV_OUT : REQ_OP_DRV_IN, __GFP_RECLAIM);
+ write ? REQ_OP_DRV_OUT : REQ_OP_DRV_IN, 0);
memcpy(scsi_req(rq)->cmd, cmd, BLK_MAX_CDB);
ide_req(rq)->type = ATA_PRIV_PC;
rq->rq_flags |= rq_flags;
@@ -1426,21 +1426,8 @@ static int idecd_capacity_proc_show(struct seq_file *m, void *v)
return 0;
}
-static int idecd_capacity_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, idecd_capacity_proc_show, PDE_DATA(inode));
-}
-
-static const struct file_operations idecd_capacity_proc_fops = {
- .owner = THIS_MODULE,
- .open = idecd_capacity_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
static ide_proc_entry_t idecd_proc[] = {
- { "capacity", S_IFREG|S_IRUGO, &idecd_capacity_proc_fops },
+ { "capacity", S_IFREG|S_IRUGO, idecd_capacity_proc_show },
{}
};
diff --git a/drivers/ide/ide-cd_ioctl.c b/drivers/ide/ide-cd_ioctl.c
index 2acca12b9c94..b1322400887b 100644
--- a/drivers/ide/ide-cd_ioctl.c
+++ b/drivers/ide/ide-cd_ioctl.c
@@ -304,7 +304,7 @@ int ide_cdrom_reset(struct cdrom_device_info *cdi)
struct request *rq;
int ret;
- rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM);
+ rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, 0);
ide_req(rq)->type = ATA_PRIV_MISC;
rq->rq_flags = RQF_QUIET;
blk_execute_rq(drive->queue, cd->disk, rq, 0);
diff --git a/drivers/ide/ide-devsets.c b/drivers/ide/ide-devsets.c
index 4e20747af32e..f4f8afdf8bbe 100644
--- a/drivers/ide/ide-devsets.c
+++ b/drivers/ide/ide-devsets.c
@@ -166,7 +166,7 @@ int ide_devset_execute(ide_drive_t *drive, const struct ide_devset *setting,
if (!(setting->flags & DS_SYNC))
return setting->set(drive, arg);
- rq = blk_get_request(q, REQ_OP_DRV_IN, __GFP_RECLAIM);
+ rq = blk_get_request(q, REQ_OP_DRV_IN, 0);
ide_req(rq)->type = ATA_PRIV_MISC;
scsi_req(rq)->cmd_len = 5;
scsi_req(rq)->cmd[0] = REQ_DEVSET_EXEC;
diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c
index f1a7c58fe418..e3b4e659082d 100644
--- a/drivers/ide/ide-disk.c
+++ b/drivers/ide/ide-disk.c
@@ -478,7 +478,7 @@ static int set_multcount(ide_drive_t *drive, int arg)
if (drive->special_flags & IDE_SFLAG_SET_MULTMODE)
return -EBUSY;
- rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM);
+ rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, 0);
ide_req(rq)->type = ATA_PRIV_TASKFILE;
drive->mult_req = arg;
diff --git a/drivers/ide/ide-disk_proc.c b/drivers/ide/ide-disk_proc.c
index 82a36ced4e96..95d239b2f646 100644
--- a/drivers/ide/ide-disk_proc.c
+++ b/drivers/ide/ide-disk_proc.c
@@ -52,19 +52,6 @@ static int idedisk_cache_proc_show(struct seq_file *m, void *v)
return 0;
}
-static int idedisk_cache_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, idedisk_cache_proc_show, PDE_DATA(inode));
-}
-
-static const struct file_operations idedisk_cache_proc_fops = {
- .owner = THIS_MODULE,
- .open = idedisk_cache_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
static int idedisk_capacity_proc_show(struct seq_file *m, void *v)
{
ide_drive_t*drive = (ide_drive_t *)m->private;
@@ -73,19 +60,6 @@ static int idedisk_capacity_proc_show(struct seq_file *m, void *v)
return 0;
}
-static int idedisk_capacity_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, idedisk_capacity_proc_show, PDE_DATA(inode));
-}
-
-static const struct file_operations idedisk_capacity_proc_fops = {
- .owner = THIS_MODULE,
- .open = idedisk_capacity_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
static int __idedisk_proc_show(struct seq_file *m, ide_drive_t *drive, u8 sub_cmd)
{
u8 *buf;
@@ -114,43 +88,17 @@ static int idedisk_sv_proc_show(struct seq_file *m, void *v)
return __idedisk_proc_show(m, m->private, ATA_SMART_READ_VALUES);
}
-static int idedisk_sv_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, idedisk_sv_proc_show, PDE_DATA(inode));
-}
-
-static const struct file_operations idedisk_sv_proc_fops = {
- .owner = THIS_MODULE,
- .open = idedisk_sv_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
static int idedisk_st_proc_show(struct seq_file *m, void *v)
{
return __idedisk_proc_show(m, m->private, ATA_SMART_READ_THRESHOLDS);
}
-static int idedisk_st_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, idedisk_st_proc_show, PDE_DATA(inode));
-}
-
-static const struct file_operations idedisk_st_proc_fops = {
- .owner = THIS_MODULE,
- .open = idedisk_st_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
ide_proc_entry_t ide_disk_proc[] = {
- { "cache", S_IFREG|S_IRUGO, &idedisk_cache_proc_fops },
- { "capacity", S_IFREG|S_IRUGO, &idedisk_capacity_proc_fops },
- { "geometry", S_IFREG|S_IRUGO, &ide_geometry_proc_fops },
- { "smart_values", S_IFREG|S_IRUSR, &idedisk_sv_proc_fops },
- { "smart_thresholds", S_IFREG|S_IRUSR, &idedisk_st_proc_fops },
+ { "cache", S_IFREG|S_IRUGO, idedisk_cache_proc_show },
+ { "capacity", S_IFREG|S_IRUGO, idedisk_capacity_proc_show },
+ { "geometry", S_IFREG|S_IRUGO, ide_geometry_proc_show },
+ { "smart_values", S_IFREG|S_IRUSR, idedisk_sv_proc_show },
+ { "smart_thresholds", S_IFREG|S_IRUSR, idedisk_st_proc_show },
{}
};
diff --git a/drivers/ide/ide-dma.c b/drivers/ide/ide-dma.c
index 54d4d78ca46a..6f344654ef22 100644
--- a/drivers/ide/ide-dma.c
+++ b/drivers/ide/ide-dma.c
@@ -180,7 +180,6 @@ EXPORT_SYMBOL_GPL(ide_dma_unmap_sg);
void ide_dma_off_quietly(ide_drive_t *drive)
{
drive->dev_flags &= ~IDE_DFLAG_USING_DMA;
- ide_toggle_bounce(drive, 0);
drive->hwif->dma_ops->dma_host_set(drive, 0);
}
@@ -211,7 +210,6 @@ EXPORT_SYMBOL(ide_dma_off);
void ide_dma_on(ide_drive_t *drive)
{
drive->dev_flags |= IDE_DFLAG_USING_DMA;
- ide_toggle_bounce(drive, 1);
drive->hwif->dma_ops->dma_host_set(drive, 1);
}
diff --git a/drivers/ide/ide-floppy_proc.c b/drivers/ide/ide-floppy_proc.c
index 471457ebea67..7f697ddb5fe5 100644
--- a/drivers/ide/ide-floppy_proc.c
+++ b/drivers/ide/ide-floppy_proc.c
@@ -14,22 +14,9 @@ static int idefloppy_capacity_proc_show(struct seq_file *m, void *v)
return 0;
}
-static int idefloppy_capacity_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, idefloppy_capacity_proc_show, PDE_DATA(inode));
-}
-
-static const struct file_operations idefloppy_capacity_proc_fops = {
- .owner = THIS_MODULE,
- .open = idefloppy_capacity_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
ide_proc_entry_t ide_floppy_proc[] = {
- { "capacity", S_IFREG|S_IRUGO, &idefloppy_capacity_proc_fops },
- { "geometry", S_IFREG|S_IRUGO, &ide_geometry_proc_fops },
+ { "capacity", S_IFREG|S_IRUGO, idefloppy_capacity_proc_show },
+ { "geometry", S_IFREG|S_IRUGO, ide_geometry_proc_show },
{}
};
diff --git a/drivers/ide/ide-ioctls.c b/drivers/ide/ide-ioctls.c
index 3661abb16a5f..af5119a73689 100644
--- a/drivers/ide/ide-ioctls.c
+++ b/drivers/ide/ide-ioctls.c
@@ -125,7 +125,7 @@ static int ide_cmd_ioctl(ide_drive_t *drive, unsigned long arg)
if (NULL == (void *) arg) {
struct request *rq;
- rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM);
+ rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, 0);
ide_req(rq)->type = ATA_PRIV_TASKFILE;
blk_execute_rq(drive->queue, NULL, rq, 0);
err = scsi_req(rq)->result ? -EIO : 0;
@@ -222,7 +222,7 @@ static int generic_drive_reset(ide_drive_t *drive)
struct request *rq;
int ret = 0;
- rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM);
+ rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, 0);
ide_req(rq)->type = ATA_PRIV_MISC;
scsi_req(rq)->cmd_len = 1;
scsi_req(rq)->cmd[0] = REQ_DRIVE_RESET;
diff --git a/drivers/ide/ide-lib.c b/drivers/ide/ide-lib.c
index e1180fa46196..78cb79eddc8b 100644
--- a/drivers/ide/ide-lib.c
+++ b/drivers/ide/ide-lib.c
@@ -6,32 +6,6 @@
#include <linux/ide.h>
#include <linux/bitops.h>
-/**
- * ide_toggle_bounce - handle bounce buffering
- * @drive: drive to update
- * @on: on/off boolean
- *
- * Enable or disable bounce buffering for the device. Drives move
- * between PIO and DMA and that changes the rules we need.
- */
-
-void ide_toggle_bounce(ide_drive_t *drive, int on)
-{
- u64 addr = BLK_BOUNCE_HIGH; /* dma64_addr_t */
-
- if (!PCI_DMA_BUS_IS_PHYS) {
- addr = BLK_BOUNCE_ANY;
- } else if (on && drive->media == ide_disk) {
- struct device *dev = drive->hwif->dev;
-
- if (dev && dev->dma_mask)
- addr = *dev->dma_mask;
- }
-
- if (drive->queue)
- blk_queue_bounce_limit(drive->queue, addr);
-}
-
u64 ide_get_lba_addr(struct ide_cmd *cmd, int lba48)
{
struct ide_taskfile *tf = &cmd->tf;
diff --git a/drivers/ide/ide-park.c b/drivers/ide/ide-park.c
index 6465bcc7cea6..622f0edb3945 100644
--- a/drivers/ide/ide-park.c
+++ b/drivers/ide/ide-park.c
@@ -32,7 +32,7 @@ static void issue_park_cmd(ide_drive_t *drive, unsigned long timeout)
}
spin_unlock_irq(&hwif->lock);
- rq = blk_get_request(q, REQ_OP_DRV_IN, __GFP_RECLAIM);
+ rq = blk_get_request(q, REQ_OP_DRV_IN, 0);
scsi_req(rq)->cmd[0] = REQ_PARK_HEADS;
scsi_req(rq)->cmd_len = 1;
ide_req(rq)->type = ATA_PRIV_MISC;
@@ -47,7 +47,7 @@ static void issue_park_cmd(ide_drive_t *drive, unsigned long timeout)
* Make sure that *some* command is sent to the drive after the
* timeout has expired, so power management will be reenabled.
*/
- rq = blk_get_request(q, REQ_OP_DRV_IN, GFP_NOWAIT);
+ rq = blk_get_request(q, REQ_OP_DRV_IN, BLK_MQ_REQ_NOWAIT);
if (IS_ERR(rq))
goto out;
diff --git a/drivers/ide/ide-pm.c b/drivers/ide/ide-pm.c
index ad8a125defdd..59217aa1d1fb 100644
--- a/drivers/ide/ide-pm.c
+++ b/drivers/ide/ide-pm.c
@@ -19,7 +19,7 @@ int generic_ide_suspend(struct device *dev, pm_message_t mesg)
}
memset(&rqpm, 0, sizeof(rqpm));
- rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM);
+ rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, 0);
ide_req(rq)->type = ATA_PRIV_PM_SUSPEND;
rq->special = &rqpm;
rqpm.pm_step = IDE_PM_START_SUSPEND;
@@ -90,8 +90,7 @@ int generic_ide_resume(struct device *dev)
}
memset(&rqpm, 0, sizeof(rqpm));
- rq = blk_get_request_flags(drive->queue, REQ_OP_DRV_IN,
- BLK_MQ_REQ_PREEMPT);
+ rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, BLK_MQ_REQ_PREEMPT);
ide_req(rq)->type = ATA_PRIV_PM_RESUME;
rq->special = &rqpm;
rqpm.pm_step = IDE_PM_START_RESUME;
diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
index 2019e66eada7..56d7bc228cb3 100644
--- a/drivers/ide/ide-probe.c
+++ b/drivers/ide/ide-probe.c
@@ -796,8 +796,7 @@ static int ide_init_queue(ide_drive_t *drive)
* This will be fixed once we teach pci_map_sg() about our boundary
* requirements, hopefully soon. *FIXME*
*/
- if (!PCI_DMA_BUS_IS_PHYS)
- max_sg_entries >>= 1;
+ max_sg_entries >>= 1;
#endif /* CONFIG_PCI */
blk_queue_max_segments(q, max_sg_entries);
@@ -805,9 +804,6 @@ static int ide_init_queue(ide_drive_t *drive)
/* assign drive queue */
drive->queue = q;
- /* needs drive->queue to be set */
- ide_toggle_bounce(drive, 1);
-
return 0;
}
diff --git a/drivers/ide/ide-proc.c b/drivers/ide/ide-proc.c
index 863db44c7916..45c997430332 100644
--- a/drivers/ide/ide-proc.c
+++ b/drivers/ide/ide-proc.c
@@ -56,19 +56,6 @@ static int ide_imodel_proc_show(struct seq_file *m, void *v)
return 0;
}
-static int ide_imodel_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, ide_imodel_proc_show, PDE_DATA(inode));
-}
-
-static const struct file_operations ide_imodel_proc_fops = {
- .owner = THIS_MODULE,
- .open = ide_imodel_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
static int ide_mate_proc_show(struct seq_file *m, void *v)
{
ide_hwif_t *hwif = (ide_hwif_t *) m->private;
@@ -80,19 +67,6 @@ static int ide_mate_proc_show(struct seq_file *m, void *v)
return 0;
}
-static int ide_mate_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, ide_mate_proc_show, PDE_DATA(inode));
-}
-
-static const struct file_operations ide_mate_proc_fops = {
- .owner = THIS_MODULE,
- .open = ide_mate_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
static int ide_channel_proc_show(struct seq_file *m, void *v)
{
ide_hwif_t *hwif = (ide_hwif_t *) m->private;
@@ -101,19 +75,6 @@ static int ide_channel_proc_show(struct seq_file *m, void *v)
return 0;
}
-static int ide_channel_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, ide_channel_proc_show, PDE_DATA(inode));
-}
-
-static const struct file_operations ide_channel_proc_fops = {
- .owner = THIS_MODULE,
- .open = ide_channel_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
static int ide_identify_proc_show(struct seq_file *m, void *v)
{
ide_drive_t *drive = (ide_drive_t *)m->private;
@@ -141,19 +102,6 @@ static int ide_identify_proc_show(struct seq_file *m, void *v)
return 0;
}
-static int ide_identify_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, ide_identify_proc_show, PDE_DATA(inode));
-}
-
-static const struct file_operations ide_identify_proc_fops = {
- .owner = THIS_MODULE,
- .open = ide_identify_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
/**
* ide_find_setting - find a specific setting
* @st: setting table pointer
@@ -441,27 +389,14 @@ static const struct file_operations ide_settings_proc_fops = {
.write = ide_settings_proc_write,
};
-static int ide_capacity_proc_show(struct seq_file *m, void *v)
+int ide_capacity_proc_show(struct seq_file *m, void *v)
{
seq_printf(m, "%llu\n", (long long)0x7fffffff);
return 0;
}
+EXPORT_SYMBOL_GPL(ide_capacity_proc_show);
-static int ide_capacity_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, ide_capacity_proc_show, NULL);
-}
-
-const struct file_operations ide_capacity_proc_fops = {
- .owner = THIS_MODULE,
- .open = ide_capacity_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-EXPORT_SYMBOL_GPL(ide_capacity_proc_fops);
-
-static int ide_geometry_proc_show(struct seq_file *m, void *v)
+int ide_geometry_proc_show(struct seq_file *m, void *v)
{
ide_drive_t *drive = (ide_drive_t *) m->private;
@@ -471,20 +406,7 @@ static int ide_geometry_proc_show(struct seq_file *m, void *v)
drive->bios_cyl, drive->bios_head, drive->bios_sect);
return 0;
}
-
-static int ide_geometry_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, ide_geometry_proc_show, PDE_DATA(inode));
-}
-
-const struct file_operations ide_geometry_proc_fops = {
- .owner = THIS_MODULE,
- .open = ide_geometry_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-EXPORT_SYMBOL(ide_geometry_proc_fops);
+EXPORT_SYMBOL(ide_geometry_proc_show);
static int ide_dmodel_proc_show(struct seq_file *seq, void *v)
{
@@ -495,19 +417,6 @@ static int ide_dmodel_proc_show(struct seq_file *seq, void *v)
return 0;
}
-static int ide_dmodel_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, ide_dmodel_proc_show, PDE_DATA(inode));
-}
-
-static const struct file_operations ide_dmodel_proc_fops = {
- .owner = THIS_MODULE,
- .open = ide_dmodel_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
static int ide_driver_proc_show(struct seq_file *m, void *v)
{
ide_drive_t *drive = (ide_drive_t *)m->private;
@@ -523,65 +432,6 @@ static int ide_driver_proc_show(struct seq_file *m, void *v)
return 0;
}
-static int ide_driver_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, ide_driver_proc_show, PDE_DATA(inode));
-}
-
-static int ide_replace_subdriver(ide_drive_t *drive, const char *driver)
-{
- struct device *dev = &drive->gendev;
- int ret = 1;
- int err;
-
- device_release_driver(dev);
- /* FIXME: device can still be in use by previous driver */
- strlcpy(drive->driver_req, driver, sizeof(drive->driver_req));
- err = device_attach(dev);
- if (err < 0)
- printk(KERN_WARNING "IDE: %s: device_attach error: %d\n",
- __func__, err);
- drive->driver_req[0] = 0;
- if (dev->driver == NULL) {
- err = device_attach(dev);
- if (err < 0)
- printk(KERN_WARNING
- "IDE: %s: device_attach(2) error: %d\n",
- __func__, err);
- }
- if (dev->driver && !strcmp(dev->driver->name, driver))
- ret = 0;
-
- return ret;
-}
-
-static ssize_t ide_driver_proc_write(struct file *file, const char __user *buffer,
- size_t count, loff_t *pos)
-{
- ide_drive_t *drive = PDE_DATA(file_inode(file));
- char name[32];
-
- if (!capable(CAP_SYS_ADMIN))
- return -EACCES;
- if (count > 31)
- count = 31;
- if (copy_from_user(name, buffer, count))
- return -EFAULT;
- name[count] = '\0';
- if (ide_replace_subdriver(drive, name))
- return -EINVAL;
- return count;
-}
-
-static const struct file_operations ide_driver_proc_fops = {
- .owner = THIS_MODULE,
- .open = ide_driver_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
- .write = ide_driver_proc_write,
-};
-
static int ide_media_proc_show(struct seq_file *m, void *v)
{
ide_drive_t *drive = (ide_drive_t *) m->private;
@@ -613,11 +463,10 @@ static const struct file_operations ide_media_proc_fops = {
};
static ide_proc_entry_t generic_drive_entries[] = {
- { "driver", S_IFREG|S_IRUGO, &ide_driver_proc_fops },
- { "identify", S_IFREG|S_IRUSR, &ide_identify_proc_fops},
- { "media", S_IFREG|S_IRUGO, &ide_media_proc_fops },
- { "model", S_IFREG|S_IRUGO, &ide_dmodel_proc_fops },
- { "settings", S_IFREG|S_IRUSR|S_IWUSR, &ide_settings_proc_fops},
+ { "driver", S_IFREG|S_IRUGO, ide_driver_proc_show },
+ { "identify", S_IFREG|S_IRUSR, ide_identify_proc_show },
+ { "media", S_IFREG|S_IRUGO, ide_media_proc_show },
+ { "model", S_IFREG|S_IRUGO, ide_dmodel_proc_show },
{}
};
@@ -628,7 +477,7 @@ static void ide_add_proc_entries(struct proc_dir_entry *dir, ide_proc_entry_t *p
if (!dir || !p)
return;
while (p->name != NULL) {
- ent = proc_create_data(p->name, p->mode, dir, p->proc_fops, data);
+ ent = proc_create_single_data(p->name, p->mode, dir, p->show, data);
if (!ent) return;
p++;
}
@@ -693,8 +542,12 @@ void ide_proc_port_register_devices(ide_hwif_t *hwif)
continue;
drive->proc = proc_mkdir(drive->name, parent);
- if (drive->proc)
+ if (drive->proc) {
ide_add_proc_entries(drive->proc, generic_drive_entries, drive);
+ proc_create_data("setting", S_IFREG|S_IRUSR|S_IWUSR,
+ drive->proc, &ide_settings_proc_fops,
+ drive);
+ }
sprintf(name, "ide%d/%s", (drive->name[2]-'a')/2, drive->name);
ent = proc_symlink(drive->name, proc_ide_root, name);
if (!ent) return;
@@ -704,6 +557,7 @@ void ide_proc_port_register_devices(ide_hwif_t *hwif)
void ide_proc_unregister_device(ide_drive_t *drive)
{
if (drive->proc) {
+ remove_proc_entry("settings", drive->proc);
ide_remove_proc_entries(drive->proc, generic_drive_entries);
remove_proc_entry(drive->name, proc_ide_root);
remove_proc_entry(drive->name, drive->hwif->proc);
@@ -712,9 +566,9 @@ void ide_proc_unregister_device(ide_drive_t *drive)
}
static ide_proc_entry_t hwif_entries[] = {
- { "channel", S_IFREG|S_IRUGO, &ide_channel_proc_fops },
- { "mate", S_IFREG|S_IRUGO, &ide_mate_proc_fops },
- { "model", S_IFREG|S_IRUGO, &ide_imodel_proc_fops },
+ { "channel", S_IFREG|S_IRUGO, ide_channel_proc_show },
+ { "mate", S_IFREG|S_IRUGO, ide_mate_proc_show },
+ { "model", S_IFREG|S_IRUGO, ide_imodel_proc_show },
{}
};
diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c
index fd57e8ccc47a..aee7b46d2330 100644
--- a/drivers/ide/ide-tape.c
+++ b/drivers/ide/ide-tape.c
@@ -854,7 +854,7 @@ static int idetape_queue_rw_tail(ide_drive_t *drive, int cmd, int size)
BUG_ON(cmd != REQ_IDETAPE_READ && cmd != REQ_IDETAPE_WRITE);
BUG_ON(size < 0 || size % tape->blk_size);
- rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM);
+ rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, 0);
ide_req(rq)->type = ATA_PRIV_MISC;
scsi_req(rq)->cmd[13] = cmd;
rq->rq_disk = tape->disk;
@@ -862,7 +862,7 @@ static int idetape_queue_rw_tail(ide_drive_t *drive, int cmd, int size)
if (size) {
ret = blk_rq_map_kern(drive->queue, rq, tape->buf, size,
- __GFP_RECLAIM);
+ GFP_NOIO);
if (ret)
goto out_put;
}
@@ -1847,22 +1847,9 @@ static int idetape_name_proc_show(struct seq_file *m, void *v)
return 0;
}
-static int idetape_name_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, idetape_name_proc_show, PDE_DATA(inode));
-}
-
-static const struct file_operations idetape_name_proc_fops = {
- .owner = THIS_MODULE,
- .open = idetape_name_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
static ide_proc_entry_t idetape_proc[] = {
- { "capacity", S_IFREG|S_IRUGO, &ide_capacity_proc_fops },
- { "name", S_IFREG|S_IRUGO, &idetape_name_proc_fops },
+ { "capacity", S_IFREG|S_IRUGO, ide_capacity_proc_show },
+ { "name", S_IFREG|S_IRUGO, idetape_name_proc_show },
{}
};
diff --git a/drivers/ide/ide-taskfile.c b/drivers/ide/ide-taskfile.c
index abe0822dd429..c034cd965831 100644
--- a/drivers/ide/ide-taskfile.c
+++ b/drivers/ide/ide-taskfile.c
@@ -431,7 +431,7 @@ int ide_raw_taskfile(ide_drive_t *drive, struct ide_cmd *cmd, u8 *buf,
rq = blk_get_request(drive->queue,
(cmd->tf_flags & IDE_TFLAG_WRITE) ?
- REQ_OP_DRV_OUT : REQ_OP_DRV_IN, __GFP_RECLAIM);
+ REQ_OP_DRV_OUT : REQ_OP_DRV_IN, 0);
ide_req(rq)->type = ATA_PRIV_TASKFILE;
/*
@@ -442,7 +442,7 @@ int ide_raw_taskfile(ide_drive_t *drive, struct ide_cmd *cmd, u8 *buf,
*/
if (nsect) {
error = blk_rq_map_kern(drive->queue, rq, buf,
- nsect * SECTOR_SIZE, __GFP_RECLAIM);
+ nsect * SECTOR_SIZE, GFP_NOIO);
if (error)
goto put_req;
}
diff --git a/drivers/iio/adc/Kconfig b/drivers/iio/adc/Kconfig
index 15606f237480..9da79070357c 100644
--- a/drivers/iio/adc/Kconfig
+++ b/drivers/iio/adc/Kconfig
@@ -158,6 +158,7 @@ config AT91_SAMA5D2_ADC
depends on ARCH_AT91 || COMPILE_TEST
depends on HAS_IOMEM
depends on HAS_DMA
+ select IIO_BUFFER
select IIO_TRIGGERED_BUFFER
help
Say yes here to build support for Atmel SAMA5D2 ADC which is
diff --git a/drivers/iio/adc/ad7793.c b/drivers/iio/adc/ad7793.c
index 801afb61310b..d4bbe5b53318 100644
--- a/drivers/iio/adc/ad7793.c
+++ b/drivers/iio/adc/ad7793.c
@@ -348,55 +348,6 @@ static const u16 ad7793_sample_freq_avail[16] = {0, 470, 242, 123, 62, 50, 39,
static const u16 ad7797_sample_freq_avail[16] = {0, 0, 0, 123, 62, 50, 0,
33, 0, 17, 16, 12, 10, 8, 6, 4};
-static ssize_t ad7793_read_frequency(struct device *dev,
- struct device_attribute *attr,
- char *buf)
-{
- struct iio_dev *indio_dev = dev_to_iio_dev(dev);
- struct ad7793_state *st = iio_priv(indio_dev);
-
- return sprintf(buf, "%d\n",
- st->chip_info->sample_freq_avail[AD7793_MODE_RATE(st->mode)]);
-}
-
-static ssize_t ad7793_write_frequency(struct device *dev,
- struct device_attribute *attr,
- const char *buf,
- size_t len)
-{
- struct iio_dev *indio_dev = dev_to_iio_dev(dev);
- struct ad7793_state *st = iio_priv(indio_dev);
- long lval;
- int i, ret;
-
- ret = kstrtol(buf, 10, &lval);
- if (ret)
- return ret;
-
- if (lval == 0)
- return -EINVAL;
-
- for (i = 0; i < 16; i++)
- if (lval == st->chip_info->sample_freq_avail[i])
- break;
- if (i == 16)
- return -EINVAL;
-
- ret = iio_device_claim_direct_mode(indio_dev);
- if (ret)
- return ret;
- st->mode &= ~AD7793_MODE_RATE(-1);
- st->mode |= AD7793_MODE_RATE(i);
- ad_sd_write_reg(&st->sd, AD7793_REG_MODE, sizeof(st->mode), st->mode);
- iio_device_release_direct_mode(indio_dev);
-
- return len;
-}
-
-static IIO_DEV_ATTR_SAMP_FREQ(S_IWUSR | S_IRUGO,
- ad7793_read_frequency,
- ad7793_write_frequency);
-
static IIO_CONST_ATTR_SAMP_FREQ_AVAIL(
"470 242 123 62 50 39 33 19 17 16 12 10 8 6 4");
@@ -424,7 +375,6 @@ static IIO_DEVICE_ATTR_NAMED(in_m_in_scale_available,
ad7793_show_scale_available, NULL, 0);
static struct attribute *ad7793_attributes[] = {
- &iio_dev_attr_sampling_frequency.dev_attr.attr,
&iio_const_attr_sampling_frequency_available.dev_attr.attr,
&iio_dev_attr_in_m_in_scale_available.dev_attr.attr,
NULL
@@ -435,7 +385,6 @@ static const struct attribute_group ad7793_attribute_group = {
};
static struct attribute *ad7797_attributes[] = {
- &iio_dev_attr_sampling_frequency.dev_attr.attr,
&iio_const_attr_sampling_frequency_available_ad7797.dev_attr.attr,
NULL
};
@@ -505,6 +454,10 @@ static int ad7793_read_raw(struct iio_dev *indio_dev,
*val -= offset;
}
return IIO_VAL_INT;
+ case IIO_CHAN_INFO_SAMP_FREQ:
+ *val = st->chip_info
+ ->sample_freq_avail[AD7793_MODE_RATE(st->mode)];
+ return IIO_VAL_INT;
}
return -EINVAL;
}
@@ -542,6 +495,26 @@ static int ad7793_write_raw(struct iio_dev *indio_dev,
break;
}
break;
+ case IIO_CHAN_INFO_SAMP_FREQ:
+ if (!val) {
+ ret = -EINVAL;
+ break;
+ }
+
+ for (i = 0; i < 16; i++)
+ if (val == st->chip_info->sample_freq_avail[i])
+ break;
+
+ if (i == 16) {
+ ret = -EINVAL;
+ break;
+ }
+
+ st->mode &= ~AD7793_MODE_RATE(-1);
+ st->mode |= AD7793_MODE_RATE(i);
+ ad_sd_write_reg(&st->sd, AD7793_REG_MODE, sizeof(st->mode),
+ st->mode);
+ break;
default:
ret = -EINVAL;
}
diff --git a/drivers/iio/adc/at91-sama5d2_adc.c b/drivers/iio/adc/at91-sama5d2_adc.c
index 4eff8351ce29..8729d6524b4d 100644
--- a/drivers/iio/adc/at91-sama5d2_adc.c
+++ b/drivers/iio/adc/at91-sama5d2_adc.c
@@ -333,6 +333,27 @@ static const struct iio_chan_spec at91_adc_channels[] = {
+ AT91_SAMA5D2_DIFF_CHAN_CNT + 1),
};
+static int at91_adc_chan_xlate(struct iio_dev *indio_dev, int chan)
+{
+ int i;
+
+ for (i = 0; i < indio_dev->num_channels; i++) {
+ if (indio_dev->channels[i].scan_index == chan)
+ return i;
+ }
+ return -EINVAL;
+}
+
+static inline struct iio_chan_spec const *
+at91_adc_chan_get(struct iio_dev *indio_dev, int chan)
+{
+ int index = at91_adc_chan_xlate(indio_dev, chan);
+
+ if (index < 0)
+ return NULL;
+ return indio_dev->channels + index;
+}
+
static int at91_adc_configure_trigger(struct iio_trigger *trig, bool state)
{
struct iio_dev *indio = iio_trigger_get_drvdata(trig);
@@ -350,8 +371,10 @@ static int at91_adc_configure_trigger(struct iio_trigger *trig, bool state)
at91_adc_writel(st, AT91_SAMA5D2_TRGR, status);
for_each_set_bit(bit, indio->active_scan_mask, indio->num_channels) {
- struct iio_chan_spec const *chan = indio->channels + bit;
+ struct iio_chan_spec const *chan = at91_adc_chan_get(indio, bit);
+ if (!chan)
+ continue;
if (state) {
at91_adc_writel(st, AT91_SAMA5D2_CHER,
BIT(chan->channel));
@@ -448,7 +471,11 @@ static int at91_adc_dma_start(struct iio_dev *indio_dev)
for_each_set_bit(bit, indio_dev->active_scan_mask,
indio_dev->num_channels) {
- struct iio_chan_spec const *chan = indio_dev->channels + bit;
+ struct iio_chan_spec const *chan =
+ at91_adc_chan_get(indio_dev, bit);
+
+ if (!chan)
+ continue;
st->dma_st.rx_buf_sz += chan->scan_type.storagebits / 8;
}
@@ -526,8 +553,11 @@ static int at91_adc_buffer_predisable(struct iio_dev *indio_dev)
*/
for_each_set_bit(bit, indio_dev->active_scan_mask,
indio_dev->num_channels) {
- struct iio_chan_spec const *chan = indio_dev->channels + bit;
+ struct iio_chan_spec const *chan =
+ at91_adc_chan_get(indio_dev, bit);
+ if (!chan)
+ continue;
if (st->dma_st.dma_chan)
at91_adc_readl(st, chan->address);
}
@@ -587,8 +617,11 @@ static void at91_adc_trigger_handler_nodma(struct iio_dev *indio_dev,
for_each_set_bit(bit, indio_dev->active_scan_mask,
indio_dev->num_channels) {
- struct iio_chan_spec const *chan = indio_dev->channels + bit;
+ struct iio_chan_spec const *chan =
+ at91_adc_chan_get(indio_dev, bit);
+ if (!chan)
+ continue;
st->buffer[i] = at91_adc_readl(st, chan->address);
i++;
}
diff --git a/drivers/iio/adc/stm32-dfsdm-adc.c b/drivers/iio/adc/stm32-dfsdm-adc.c
index 01422d11753c..b28a716a23b2 100644
--- a/drivers/iio/adc/stm32-dfsdm-adc.c
+++ b/drivers/iio/adc/stm32-dfsdm-adc.c
@@ -144,6 +144,7 @@ static int stm32_dfsdm_set_osrs(struct stm32_dfsdm_filter *fl,
* Leave as soon as if exact resolution if reached.
* Otherwise the higher resolution below 32 bits is kept.
*/
+ fl->res = 0;
for (fosr = 1; fosr <= DFSDM_MAX_FL_OVERSAMPLING; fosr++) {
for (iosr = 1; iosr <= DFSDM_MAX_INT_OVERSAMPLING; iosr++) {
if (fast)
@@ -193,7 +194,7 @@ static int stm32_dfsdm_set_osrs(struct stm32_dfsdm_filter *fl,
}
}
- if (!fl->fosr)
+ if (!fl->res)
return -EINVAL;
return 0;
@@ -770,7 +771,7 @@ static int stm32_dfsdm_write_raw(struct iio_dev *indio_dev,
struct stm32_dfsdm_adc *adc = iio_priv(indio_dev);
struct stm32_dfsdm_filter *fl = &adc->dfsdm->fl_list[adc->fl_id];
struct stm32_dfsdm_channel *ch = &adc->dfsdm->ch_list[chan->channel];
- unsigned int spi_freq = adc->spi_freq;
+ unsigned int spi_freq;
int ret = -EINVAL;
switch (mask) {
@@ -784,8 +785,18 @@ static int stm32_dfsdm_write_raw(struct iio_dev *indio_dev,
case IIO_CHAN_INFO_SAMP_FREQ:
if (!val)
return -EINVAL;
- if (ch->src != DFSDM_CHANNEL_SPI_CLOCK_EXTERNAL)
+
+ switch (ch->src) {
+ case DFSDM_CHANNEL_SPI_CLOCK_INTERNAL:
spi_freq = adc->dfsdm->spi_master_freq;
+ break;
+ case DFSDM_CHANNEL_SPI_CLOCK_INTERNAL_DIV2_FALLING:
+ case DFSDM_CHANNEL_SPI_CLOCK_INTERNAL_DIV2_RISING:
+ spi_freq = adc->dfsdm->spi_master_freq / 2;
+ break;
+ default:
+ spi_freq = adc->spi_freq;
+ }
if (spi_freq % val)
dev_warn(&indio_dev->dev,
diff --git a/drivers/iio/buffer/industrialio-buffer-dma.c b/drivers/iio/buffer/industrialio-buffer-dma.c
index 05e0c353e089..b32bf57910ca 100644
--- a/drivers/iio/buffer/industrialio-buffer-dma.c
+++ b/drivers/iio/buffer/industrialio-buffer-dma.c
@@ -587,7 +587,7 @@ EXPORT_SYMBOL_GPL(iio_dma_buffer_set_bytes_per_datum);
* Should be used as the set_length callback for iio_buffer_access_ops
* struct for DMA buffers.
*/
-int iio_dma_buffer_set_length(struct iio_buffer *buffer, int length)
+int iio_dma_buffer_set_length(struct iio_buffer *buffer, unsigned int length)
{
/* Avoid an invalid state */
if (length < 2)
diff --git a/drivers/iio/buffer/kfifo_buf.c b/drivers/iio/buffer/kfifo_buf.c
index 047fe757ab97..70c302a93d7f 100644
--- a/drivers/iio/buffer/kfifo_buf.c
+++ b/drivers/iio/buffer/kfifo_buf.c
@@ -22,11 +22,18 @@ struct iio_kfifo {
#define iio_to_kfifo(r) container_of(r, struct iio_kfifo, buffer)
static inline int __iio_allocate_kfifo(struct iio_kfifo *buf,
- int bytes_per_datum, int length)
+ size_t bytes_per_datum, unsigned int length)
{
if ((length == 0) || (bytes_per_datum == 0))
return -EINVAL;
+ /*
+ * Make sure we don't overflow an unsigned int after kfifo rounds up to
+ * the next power of 2.
+ */
+ if (roundup_pow_of_two(length) > UINT_MAX / bytes_per_datum)
+ return -EINVAL;
+
return __kfifo_alloc((struct __kfifo *)&buf->kf, length,
bytes_per_datum, GFP_KERNEL);
}
@@ -67,7 +74,7 @@ static int iio_set_bytes_per_datum_kfifo(struct iio_buffer *r, size_t bpd)
return 0;
}
-static int iio_set_length_kfifo(struct iio_buffer *r, int length)
+static int iio_set_length_kfifo(struct iio_buffer *r, unsigned int length)
{
/* Avoid an invalid state */
if (length < 2)
diff --git a/drivers/iio/common/hid-sensors/hid-sensor-trigger.c b/drivers/iio/common/hid-sensors/hid-sensor-trigger.c
index cfb6588565ba..4905a997a7ec 100644
--- a/drivers/iio/common/hid-sensors/hid-sensor-trigger.c
+++ b/drivers/iio/common/hid-sensors/hid-sensor-trigger.c
@@ -178,14 +178,14 @@ int hid_sensor_power_state(struct hid_sensor_common *st, bool state)
#ifdef CONFIG_PM
int ret;
- atomic_set(&st->user_requested_state, state);
-
if (atomic_add_unless(&st->runtime_pm_enable, 1, 1))
pm_runtime_enable(&st->pdev->dev);
- if (state)
+ if (state) {
+ atomic_inc(&st->user_requested_state);
ret = pm_runtime_get_sync(&st->pdev->dev);
- else {
+ } else {
+ atomic_dec(&st->user_requested_state);
pm_runtime_mark_last_busy(&st->pdev->dev);
pm_runtime_use_autosuspend(&st->pdev->dev);
ret = pm_runtime_put_autosuspend(&st->pdev->dev);
diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig
index ee270e065ba9..2a972ed6851b 100644
--- a/drivers/infiniband/Kconfig
+++ b/drivers/infiniband/Kconfig
@@ -61,9 +61,12 @@ config INFINIBAND_ON_DEMAND_PAGING
pages on demand instead.
config INFINIBAND_ADDR_TRANS
- bool
+ bool "RDMA/CM"
depends on INFINIBAND
default y
+ ---help---
+ Support for RDMA communication manager (CM).
+ This allows for a generic connection abstraction over RDMA.
config INFINIBAND_ADDR_TRANS_CONFIGFS
bool
diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c
index e337b08de2ff..ecc55e98ddd3 100644
--- a/drivers/infiniband/core/cache.c
+++ b/drivers/infiniband/core/cache.c
@@ -291,14 +291,18 @@ static int find_gid(struct ib_gid_table *table, const union ib_gid *gid,
* so lookup free slot only if requested.
*/
if (pempty && empty < 0) {
- if (data->props & GID_TABLE_ENTRY_INVALID) {
- /* Found an invalid (free) entry; allocate it */
- if (data->props & GID_TABLE_ENTRY_DEFAULT) {
- if (default_gid)
- empty = curr_index;
- } else {
- empty = curr_index;
- }
+ if (data->props & GID_TABLE_ENTRY_INVALID &&
+ (default_gid ==
+ !!(data->props & GID_TABLE_ENTRY_DEFAULT))) {
+ /*
+ * Found an invalid (free) entry; allocate it.
+ * If default GID is requested, then our
+ * found slot must be one of the DEFAULT
+ * reserved slots or we fail.
+ * This ensures that only DEFAULT reserved
+ * slots are used for default property GIDs.
+ */
+ empty = curr_index;
}
}
@@ -420,8 +424,10 @@ int ib_cache_gid_add(struct ib_device *ib_dev, u8 port,
return ret;
}
-int ib_cache_gid_del(struct ib_device *ib_dev, u8 port,
- union ib_gid *gid, struct ib_gid_attr *attr)
+static int
+_ib_cache_gid_del(struct ib_device *ib_dev, u8 port,
+ union ib_gid *gid, struct ib_gid_attr *attr,
+ unsigned long mask, bool default_gid)
{
struct ib_gid_table *table;
int ret = 0;
@@ -431,11 +437,7 @@ int ib_cache_gid_del(struct ib_device *ib_dev, u8 port,
mutex_lock(&table->lock);
- ix = find_gid(table, gid, attr, false,
- GID_ATTR_FIND_MASK_GID |
- GID_ATTR_FIND_MASK_GID_TYPE |
- GID_ATTR_FIND_MASK_NETDEV,
- NULL);
+ ix = find_gid(table, gid, attr, default_gid, mask, NULL);
if (ix < 0) {
ret = -EINVAL;
goto out_unlock;
@@ -452,6 +454,17 @@ out_unlock:
return ret;
}
+int ib_cache_gid_del(struct ib_device *ib_dev, u8 port,
+ union ib_gid *gid, struct ib_gid_attr *attr)
+{
+ unsigned long mask = GID_ATTR_FIND_MASK_GID |
+ GID_ATTR_FIND_MASK_GID_TYPE |
+ GID_ATTR_FIND_MASK_DEFAULT |
+ GID_ATTR_FIND_MASK_NETDEV;
+
+ return _ib_cache_gid_del(ib_dev, port, gid, attr, mask, false);
+}
+
int ib_cache_gid_del_all_netdev_gids(struct ib_device *ib_dev, u8 port,
struct net_device *ndev)
{
@@ -489,7 +502,7 @@ static int __ib_cache_gid_get(struct ib_device *ib_dev, u8 port, int index,
return -EINVAL;
if (table->data_vec[index].props & GID_TABLE_ENTRY_INVALID)
- return -EAGAIN;
+ return -EINVAL;
memcpy(gid, &table->data_vec[index].gid, sizeof(*gid));
if (attr) {
@@ -728,7 +741,7 @@ void ib_cache_gid_set_default_gid(struct ib_device *ib_dev, u8 port,
unsigned long gid_type_mask,
enum ib_cache_gid_default_mode mode)
{
- union ib_gid gid;
+ union ib_gid gid = { };
struct ib_gid_attr gid_attr;
struct ib_gid_table *table;
unsigned int gid_type;
@@ -736,7 +749,9 @@ void ib_cache_gid_set_default_gid(struct ib_device *ib_dev, u8 port,
table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid;
- make_default_gid(ndev, &gid);
+ mask = GID_ATTR_FIND_MASK_GID_TYPE |
+ GID_ATTR_FIND_MASK_DEFAULT |
+ GID_ATTR_FIND_MASK_NETDEV;
memset(&gid_attr, 0, sizeof(gid_attr));
gid_attr.ndev = ndev;
@@ -747,12 +762,12 @@ void ib_cache_gid_set_default_gid(struct ib_device *ib_dev, u8 port,
gid_attr.gid_type = gid_type;
if (mode == IB_CACHE_GID_DEFAULT_MODE_SET) {
- mask = GID_ATTR_FIND_MASK_GID_TYPE |
- GID_ATTR_FIND_MASK_DEFAULT;
+ make_default_gid(ndev, &gid);
__ib_cache_gid_add(ib_dev, port, &gid,
&gid_attr, mask, true);
} else if (mode == IB_CACHE_GID_DEFAULT_MODE_DELETE) {
- ib_cache_gid_del(ib_dev, port, &gid, &gid_attr);
+ _ib_cache_gid_del(ib_dev, port, &gid,
+ &gid_attr, mask, true);
}
}
}
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 51a641002e10..a693fcd4c513 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -382,6 +382,8 @@ struct cma_hdr {
#define CMA_VERSION 0x00
struct cma_req_info {
+ struct sockaddr_storage listen_addr_storage;
+ struct sockaddr_storage src_addr_storage;
struct ib_device *device;
int port;
union ib_gid local_gid;
@@ -866,7 +868,6 @@ static int cma_modify_qp_rtr(struct rdma_id_private *id_priv,
{
struct ib_qp_attr qp_attr;
int qp_attr_mask, ret;
- union ib_gid sgid;
mutex_lock(&id_priv->qp_mutex);
if (!id_priv->id.qp) {
@@ -889,12 +890,6 @@ static int cma_modify_qp_rtr(struct rdma_id_private *id_priv,
if (ret)
goto out;
- ret = ib_query_gid(id_priv->id.device, id_priv->id.port_num,
- rdma_ah_read_grh(&qp_attr.ah_attr)->sgid_index,
- &sgid, NULL);
- if (ret)
- goto out;
-
BUG_ON(id_priv->cma_dev->device != id_priv->id.device);
if (conn_param)
@@ -1340,11 +1335,11 @@ static bool validate_net_dev(struct net_device *net_dev,
}
static struct net_device *cma_get_net_dev(struct ib_cm_event *ib_event,
- const struct cma_req_info *req)
+ struct cma_req_info *req)
{
- struct sockaddr_storage listen_addr_storage, src_addr_storage;
- struct sockaddr *listen_addr = (struct sockaddr *)&listen_addr_storage,
- *src_addr = (struct sockaddr *)&src_addr_storage;
+ struct sockaddr *listen_addr =
+ (struct sockaddr *)&req->listen_addr_storage;
+ struct sockaddr *src_addr = (struct sockaddr *)&req->src_addr_storage;
struct net_device *net_dev;
const union ib_gid *gid = req->has_gid ? &req->local_gid : NULL;
int err;
@@ -1359,11 +1354,6 @@ static struct net_device *cma_get_net_dev(struct ib_cm_event *ib_event,
if (!net_dev)
return ERR_PTR(-ENODEV);
- if (!validate_net_dev(net_dev, listen_addr, src_addr)) {
- dev_put(net_dev);
- return ERR_PTR(-EHOSTUNREACH);
- }
-
return net_dev;
}
@@ -1490,15 +1480,51 @@ static struct rdma_id_private *cma_id_from_event(struct ib_cm_id *cm_id,
}
}
+ /*
+ * Net namespace might be getting deleted while route lookup,
+ * cm_id lookup is in progress. Therefore, perform netdevice
+ * validation, cm_id lookup under rcu lock.
+ * RCU lock along with netdevice state check, synchronizes with
+ * netdevice migrating to different net namespace and also avoids
+ * case where net namespace doesn't get deleted while lookup is in
+ * progress.
+ * If the device state is not IFF_UP, its properties such as ifindex
+ * and nd_net cannot be trusted to remain valid without rcu lock.
+ * net/core/dev.c change_net_namespace() ensures to synchronize with
+ * ongoing operations on net device after device is closed using
+ * synchronize_net().
+ */
+ rcu_read_lock();
+ if (*net_dev) {
+ /*
+ * If netdevice is down, it is likely that it is administratively
+ * down or it might be migrating to different namespace.
+ * In that case avoid further processing, as the net namespace
+ * or ifindex may change.
+ */
+ if (((*net_dev)->flags & IFF_UP) == 0) {
+ id_priv = ERR_PTR(-EHOSTUNREACH);
+ goto err;
+ }
+
+ if (!validate_net_dev(*net_dev,
+ (struct sockaddr *)&req.listen_addr_storage,
+ (struct sockaddr *)&req.src_addr_storage)) {
+ id_priv = ERR_PTR(-EHOSTUNREACH);
+ goto err;
+ }
+ }
+
bind_list = cma_ps_find(*net_dev ? dev_net(*net_dev) : &init_net,
rdma_ps_from_service_id(req.service_id),
cma_port_from_service_id(req.service_id));
id_priv = cma_find_listener(bind_list, cm_id, ib_event, &req, *net_dev);
+err:
+ rcu_read_unlock();
if (IS_ERR(id_priv) && *net_dev) {
dev_put(*net_dev);
*net_dev = NULL;
}
-
return id_priv;
}
diff --git a/drivers/infiniband/core/iwpm_util.c b/drivers/infiniband/core/iwpm_util.c
index 9821ae900f6d..da12da1c36f6 100644
--- a/drivers/infiniband/core/iwpm_util.c
+++ b/drivers/infiniband/core/iwpm_util.c
@@ -114,7 +114,7 @@ int iwpm_create_mapinfo(struct sockaddr_storage *local_sockaddr,
struct sockaddr_storage *mapped_sockaddr,
u8 nl_client)
{
- struct hlist_head *hash_bucket_head;
+ struct hlist_head *hash_bucket_head = NULL;
struct iwpm_mapping_info *map_info;
unsigned long flags;
int ret = -EINVAL;
@@ -142,6 +142,9 @@ int iwpm_create_mapinfo(struct sockaddr_storage *local_sockaddr,
}
}
spin_unlock_irqrestore(&iwpm_mapinfo_lock, flags);
+
+ if (!hash_bucket_head)
+ kfree(map_info);
return ret;
}
diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
index c50596f7f98a..b28452a55a08 100644
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@@ -59,7 +59,7 @@ module_param_named(recv_queue_size, mad_recvq_size, int, 0444);
MODULE_PARM_DESC(recv_queue_size, "Size of receive queue in number of work requests");
static struct list_head ib_mad_port_list;
-static u32 ib_mad_client_id = 0;
+static atomic_t ib_mad_client_id = ATOMIC_INIT(0);
/* Port list lock */
static DEFINE_SPINLOCK(ib_mad_port_list_lock);
@@ -377,7 +377,7 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
}
spin_lock_irqsave(&port_priv->reg_lock, flags);
- mad_agent_priv->agent.hi_tid = ++ib_mad_client_id;
+ mad_agent_priv->agent.hi_tid = atomic_inc_return(&ib_mad_client_id);
/*
* Make sure MAD registration (if supplied)
diff --git a/drivers/infiniband/core/roce_gid_mgmt.c b/drivers/infiniband/core/roce_gid_mgmt.c
index cc2966380c0c..c0e4fd55e2cc 100644
--- a/drivers/infiniband/core/roce_gid_mgmt.c
+++ b/drivers/infiniband/core/roce_gid_mgmt.c
@@ -255,6 +255,7 @@ static void bond_delete_netdev_default_gids(struct ib_device *ib_dev,
struct net_device *rdma_ndev)
{
struct net_device *real_dev = rdma_vlan_dev_real_dev(event_ndev);
+ unsigned long gid_type_mask;
if (!rdma_ndev)
return;
@@ -264,21 +265,22 @@ static void bond_delete_netdev_default_gids(struct ib_device *ib_dev,
rcu_read_lock();
- if (rdma_is_upper_dev_rcu(rdma_ndev, event_ndev) &&
- is_eth_active_slave_of_bonding_rcu(rdma_ndev, real_dev) ==
- BONDING_SLAVE_STATE_INACTIVE) {
- unsigned long gid_type_mask;
-
+ if (((rdma_ndev != event_ndev &&
+ !rdma_is_upper_dev_rcu(rdma_ndev, event_ndev)) ||
+ is_eth_active_slave_of_bonding_rcu(rdma_ndev, real_dev)
+ ==
+ BONDING_SLAVE_STATE_INACTIVE)) {
rcu_read_unlock();
+ return;
+ }
- gid_type_mask = roce_gid_type_mask_support(ib_dev, port);
+ rcu_read_unlock();
- ib_cache_gid_set_default_gid(ib_dev, port, rdma_ndev,
- gid_type_mask,
- IB_CACHE_GID_DEFAULT_MODE_DELETE);
- } else {
- rcu_read_unlock();
- }
+ gid_type_mask = roce_gid_type_mask_support(ib_dev, port);
+
+ ib_cache_gid_set_default_gid(ib_dev, port, rdma_ndev,
+ gid_type_mask,
+ IB_CACHE_GID_DEFAULT_MODE_DELETE);
}
static void enum_netdev_ipv4_ips(struct ib_device *ib_dev,
diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c
index 74329483af6d..eab43b17e9cf 100644
--- a/drivers/infiniband/core/ucma.c
+++ b/drivers/infiniband/core/ucma.c
@@ -159,6 +159,23 @@ static void ucma_put_ctx(struct ucma_context *ctx)
complete(&ctx->comp);
}
+/*
+ * Same as ucm_get_ctx but requires that ->cm_id->device is valid, eg that the
+ * CM_ID is bound.
+ */
+static struct ucma_context *ucma_get_ctx_dev(struct ucma_file *file, int id)
+{
+ struct ucma_context *ctx = ucma_get_ctx(file, id);
+
+ if (IS_ERR(ctx))
+ return ctx;
+ if (!ctx->cm_id->device) {
+ ucma_put_ctx(ctx);
+ return ERR_PTR(-EINVAL);
+ }
+ return ctx;
+}
+
static void ucma_close_event_id(struct work_struct *work)
{
struct ucma_event *uevent_close = container_of(work, struct ucma_event, close_work);
@@ -683,7 +700,7 @@ static ssize_t ucma_resolve_ip(struct ucma_file *file,
if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
return -EFAULT;
- if (!rdma_addr_size_in6(&cmd.src_addr) ||
+ if ((cmd.src_addr.sin6_family && !rdma_addr_size_in6(&cmd.src_addr)) ||
!rdma_addr_size_in6(&cmd.dst_addr))
return -EINVAL;
@@ -734,7 +751,7 @@ static ssize_t ucma_resolve_route(struct ucma_file *file,
if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
return -EFAULT;
- ctx = ucma_get_ctx(file, cmd.id);
+ ctx = ucma_get_ctx_dev(file, cmd.id);
if (IS_ERR(ctx))
return PTR_ERR(ctx);
@@ -1050,7 +1067,7 @@ static ssize_t ucma_connect(struct ucma_file *file, const char __user *inbuf,
if (!cmd.conn_param.valid)
return -EINVAL;
- ctx = ucma_get_ctx(file, cmd.id);
+ ctx = ucma_get_ctx_dev(file, cmd.id);
if (IS_ERR(ctx))
return PTR_ERR(ctx);
@@ -1092,7 +1109,7 @@ static ssize_t ucma_accept(struct ucma_file *file, const char __user *inbuf,
if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
return -EFAULT;
- ctx = ucma_get_ctx(file, cmd.id);
+ ctx = ucma_get_ctx_dev(file, cmd.id);
if (IS_ERR(ctx))
return PTR_ERR(ctx);
@@ -1120,7 +1137,7 @@ static ssize_t ucma_reject(struct ucma_file *file, const char __user *inbuf,
if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
return -EFAULT;
- ctx = ucma_get_ctx(file, cmd.id);
+ ctx = ucma_get_ctx_dev(file, cmd.id);
if (IS_ERR(ctx))
return PTR_ERR(ctx);
@@ -1139,7 +1156,7 @@ static ssize_t ucma_disconnect(struct ucma_file *file, const char __user *inbuf,
if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
return -EFAULT;
- ctx = ucma_get_ctx(file, cmd.id);
+ ctx = ucma_get_ctx_dev(file, cmd.id);
if (IS_ERR(ctx))
return PTR_ERR(ctx);
@@ -1167,15 +1184,10 @@ static ssize_t ucma_init_qp_attr(struct ucma_file *file,
if (cmd.qp_state > IB_QPS_ERR)
return -EINVAL;
- ctx = ucma_get_ctx(file, cmd.id);
+ ctx = ucma_get_ctx_dev(file, cmd.id);
if (IS_ERR(ctx))
return PTR_ERR(ctx);
- if (!ctx->cm_id->device) {
- ret = -EINVAL;
- goto out;
- }
-
resp.qp_attr_mask = 0;
memset(&qp_attr, 0, sizeof qp_attr);
qp_attr.qp_state = cmd.qp_state;
@@ -1316,13 +1328,13 @@ static ssize_t ucma_set_option(struct ucma_file *file, const char __user *inbuf,
if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
return -EFAULT;
+ if (unlikely(cmd.optlen > KMALLOC_MAX_SIZE))
+ return -EINVAL;
+
ctx = ucma_get_ctx(file, cmd.id);
if (IS_ERR(ctx))
return PTR_ERR(ctx);
- if (unlikely(cmd.optlen > KMALLOC_MAX_SIZE))
- return -EINVAL;
-
optval = memdup_user(u64_to_user_ptr(cmd.optval),
cmd.optlen);
if (IS_ERR(optval)) {
@@ -1384,7 +1396,7 @@ static ssize_t ucma_process_join(struct ucma_file *file,
else
return -EINVAL;
- ctx = ucma_get_ctx(file, cmd->id);
+ ctx = ucma_get_ctx_dev(file, cmd->id);
if (IS_ERR(ctx))
return PTR_ERR(ctx);
diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
index 9a4e899d94b3..2b6c9b516070 100644
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -119,7 +119,6 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
umem->length = size;
umem->address = addr;
umem->page_shift = PAGE_SHIFT;
- umem->pid = get_task_pid(current, PIDTYPE_PID);
/*
* We ask for writable memory if any of the following
* access flags are set. "Local write" and "remote write"
@@ -132,7 +131,6 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
IB_ACCESS_REMOTE_ATOMIC | IB_ACCESS_MW_BIND));
if (access & IB_ACCESS_ON_DEMAND) {
- put_pid(umem->pid);
ret = ib_umem_odp_get(context, umem, access);
if (ret) {
kfree(umem);
@@ -148,7 +146,6 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
page_list = (struct page **) __get_free_page(GFP_KERNEL);
if (!page_list) {
- put_pid(umem->pid);
kfree(umem);
return ERR_PTR(-ENOMEM);
}
@@ -231,7 +228,6 @@ out:
if (ret < 0) {
if (need_release)
__ib_umem_release(context->device, umem, 0);
- put_pid(umem->pid);
kfree(umem);
} else
current->mm->pinned_vm = locked;
@@ -274,8 +270,7 @@ void ib_umem_release(struct ib_umem *umem)
__ib_umem_release(umem->context->device, umem, 1);
- task = get_pid_task(umem->pid, PIDTYPE_PID);
- put_pid(umem->pid);
+ task = get_pid_task(umem->context->tgid, PIDTYPE_PID);
if (!task)
goto out;
mm = get_task_mm(task);
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 13cb5e4deb86..21a887c9523b 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -691,6 +691,7 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
mr->device = pd->device;
mr->pd = pd;
+ mr->dm = NULL;
mr->uobject = uobj;
atomic_inc(&pd->usecnt);
mr->res.type = RDMA_RESTRACK_MR;
@@ -765,6 +766,11 @@ ssize_t ib_uverbs_rereg_mr(struct ib_uverbs_file *file,
mr = uobj->object;
+ if (mr->dm) {
+ ret = -EINVAL;
+ goto put_uobjs;
+ }
+
if (cmd.flags & IB_MR_REREG_ACCESS) {
ret = ib_check_mr_access(cmd.access_flags);
if (ret)
diff --git a/drivers/infiniband/core/uverbs_ioctl.c b/drivers/infiniband/core/uverbs_ioctl.c
index 8c93970dc8f1..8d32c4ae368c 100644
--- a/drivers/infiniband/core/uverbs_ioctl.c
+++ b/drivers/infiniband/core/uverbs_ioctl.c
@@ -234,6 +234,15 @@ static int uverbs_validate_kernel_mandatory(const struct uverbs_method_spec *met
return -EINVAL;
}
+ for (; i < method_spec->num_buckets; i++) {
+ struct uverbs_attr_spec_hash *attr_spec_bucket =
+ method_spec->attr_buckets[i];
+
+ if (!bitmap_empty(attr_spec_bucket->mandatory_attrs_bitmask,
+ attr_spec_bucket->num_attrs))
+ return -EINVAL;
+ }
+
return 0;
}
diff --git a/drivers/infiniband/core/uverbs_std_types_flow_action.c b/drivers/infiniband/core/uverbs_std_types_flow_action.c
index cbcec3da12f6..b4f016dfa23d 100644
--- a/drivers/infiniband/core/uverbs_std_types_flow_action.c
+++ b/drivers/infiniband/core/uverbs_std_types_flow_action.c
@@ -363,28 +363,28 @@ static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY)(struct ib_device
static const struct uverbs_attr_spec uverbs_flow_action_esp_keymat[] = {
[IB_UVERBS_FLOW_ACTION_ESP_KEYMAT_AES_GCM] = {
- .ptr = {
+ { .ptr = {
.type = UVERBS_ATTR_TYPE_PTR_IN,
UVERBS_ATTR_TYPE(struct ib_uverbs_flow_action_esp_keymat_aes_gcm),
.flags = UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO,
- },
+ } },
},
};
static const struct uverbs_attr_spec uverbs_flow_action_esp_replay[] = {
[IB_UVERBS_FLOW_ACTION_ESP_REPLAY_NONE] = {
- .ptr = {
+ { .ptr = {
.type = UVERBS_ATTR_TYPE_PTR_IN,
/* No need to specify any data */
.len = 0,
- }
+ } }
},
[IB_UVERBS_FLOW_ACTION_ESP_REPLAY_BMP] = {
- .ptr = {
+ { .ptr = {
.type = UVERBS_ATTR_TYPE_PTR_IN,
UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp_replay_bmp, size),
.flags = UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO,
- }
+ } }
},
};
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index 7eff3aeffe01..6ddfb1fade79 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -1656,6 +1656,7 @@ struct ib_mr *ib_alloc_mr(struct ib_pd *pd,
if (!IS_ERR(mr)) {
mr->device = pd->device;
mr->pd = pd;
+ mr->dm = NULL;
mr->uobject = NULL;
atomic_inc(&pd->usecnt);
mr->need_inval = false;
diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c
index f6c739ec8b62..20b9f31052bf 100644
--- a/drivers/infiniband/hw/bnxt_re/main.c
+++ b/drivers/infiniband/hw/bnxt_re/main.c
@@ -185,12 +185,65 @@ static void bnxt_re_shutdown(void *p)
bnxt_re_ib_unreg(rdev, false);
}
+static void bnxt_re_stop_irq(void *handle)
+{
+ struct bnxt_re_dev *rdev = (struct bnxt_re_dev *)handle;
+ struct bnxt_qplib_rcfw *rcfw = &rdev->rcfw;
+ struct bnxt_qplib_nq *nq;
+ int indx;
+
+ for (indx = BNXT_RE_NQ_IDX; indx < rdev->num_msix; indx++) {
+ nq = &rdev->nq[indx - 1];
+ bnxt_qplib_nq_stop_irq(nq, false);
+ }
+
+ bnxt_qplib_rcfw_stop_irq(rcfw, false);
+}
+
+static void bnxt_re_start_irq(void *handle, struct bnxt_msix_entry *ent)
+{
+ struct bnxt_re_dev *rdev = (struct bnxt_re_dev *)handle;
+ struct bnxt_msix_entry *msix_ent = rdev->msix_entries;
+ struct bnxt_qplib_rcfw *rcfw = &rdev->rcfw;
+ struct bnxt_qplib_nq *nq;
+ int indx, rc;
+
+ if (!ent) {
+ /* Not setting the f/w timeout bit in rcfw.
+ * During the driver unload the first command
+ * to f/w will timeout and that will set the
+ * timeout bit.
+ */
+ dev_err(rdev_to_dev(rdev), "Failed to re-start IRQs\n");
+ return;
+ }
+
+ /* Vectors may change after restart, so update with new vectors
+ * in device sctructure.
+ */
+ for (indx = 0; indx < rdev->num_msix; indx++)
+ rdev->msix_entries[indx].vector = ent[indx].vector;
+
+ bnxt_qplib_rcfw_start_irq(rcfw, msix_ent[BNXT_RE_AEQ_IDX].vector,
+ false);
+ for (indx = BNXT_RE_NQ_IDX ; indx < rdev->num_msix; indx++) {
+ nq = &rdev->nq[indx - 1];
+ rc = bnxt_qplib_nq_start_irq(nq, indx - 1,
+ msix_ent[indx].vector, false);
+ if (rc)
+ dev_warn(rdev_to_dev(rdev),
+ "Failed to reinit NQ index %d\n", indx - 1);
+ }
+}
+
static struct bnxt_ulp_ops bnxt_re_ulp_ops = {
.ulp_async_notifier = NULL,
.ulp_stop = bnxt_re_stop,
.ulp_start = bnxt_re_start,
.ulp_sriov_config = bnxt_re_sriov_config,
- .ulp_shutdown = bnxt_re_shutdown
+ .ulp_shutdown = bnxt_re_shutdown,
+ .ulp_irq_stop = bnxt_re_stop_irq,
+ .ulp_irq_restart = bnxt_re_start_irq
};
/* RoCE -> Net driver */
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c
index 3a78faba8d91..50d8f1fc98d5 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c
@@ -336,22 +336,32 @@ static irqreturn_t bnxt_qplib_nq_irq(int irq, void *dev_instance)
return IRQ_HANDLED;
}
+void bnxt_qplib_nq_stop_irq(struct bnxt_qplib_nq *nq, bool kill)
+{
+ tasklet_disable(&nq->worker);
+ /* Mask h/w interrupt */
+ NQ_DB(nq->bar_reg_iomem, nq->hwq.cons, nq->hwq.max_elements);
+ /* Sync with last running IRQ handler */
+ synchronize_irq(nq->vector);
+ if (kill)
+ tasklet_kill(&nq->worker);
+ if (nq->requested) {
+ irq_set_affinity_hint(nq->vector, NULL);
+ free_irq(nq->vector, nq);
+ nq->requested = false;
+ }
+}
+
void bnxt_qplib_disable_nq(struct bnxt_qplib_nq *nq)
{
if (nq->cqn_wq) {
destroy_workqueue(nq->cqn_wq);
nq->cqn_wq = NULL;
}
+
/* Make sure the HW is stopped! */
- synchronize_irq(nq->vector);
- tasklet_disable(&nq->worker);
- tasklet_kill(&nq->worker);
+ bnxt_qplib_nq_stop_irq(nq, true);
- if (nq->requested) {
- irq_set_affinity_hint(nq->vector, NULL);
- free_irq(nq->vector, nq);
- nq->requested = false;
- }
if (nq->bar_reg_iomem)
iounmap(nq->bar_reg_iomem);
nq->bar_reg_iomem = NULL;
@@ -361,6 +371,40 @@ void bnxt_qplib_disable_nq(struct bnxt_qplib_nq *nq)
nq->vector = 0;
}
+int bnxt_qplib_nq_start_irq(struct bnxt_qplib_nq *nq, int nq_indx,
+ int msix_vector, bool need_init)
+{
+ int rc;
+
+ if (nq->requested)
+ return -EFAULT;
+
+ nq->vector = msix_vector;
+ if (need_init)
+ tasklet_init(&nq->worker, bnxt_qplib_service_nq,
+ (unsigned long)nq);
+ else
+ tasklet_enable(&nq->worker);
+
+ snprintf(nq->name, sizeof(nq->name), "bnxt_qplib_nq-%d", nq_indx);
+ rc = request_irq(nq->vector, bnxt_qplib_nq_irq, 0, nq->name, nq);
+ if (rc)
+ return rc;
+
+ cpumask_clear(&nq->mask);
+ cpumask_set_cpu(nq_indx, &nq->mask);
+ rc = irq_set_affinity_hint(nq->vector, &nq->mask);
+ if (rc) {
+ dev_warn(&nq->pdev->dev,
+ "QPLIB: set affinity failed; vector: %d nq_idx: %d\n",
+ nq->vector, nq_indx);
+ }
+ nq->requested = true;
+ NQ_DB_REARM(nq->bar_reg_iomem, nq->hwq.cons, nq->hwq.max_elements);
+
+ return rc;
+}
+
int bnxt_qplib_enable_nq(struct pci_dev *pdev, struct bnxt_qplib_nq *nq,
int nq_idx, int msix_vector, int bar_reg_offset,
int (*cqn_handler)(struct bnxt_qplib_nq *nq,
@@ -372,41 +416,17 @@ int bnxt_qplib_enable_nq(struct pci_dev *pdev, struct bnxt_qplib_nq *nq,
resource_size_t nq_base;
int rc = -1;
- nq->pdev = pdev;
- nq->vector = msix_vector;
if (cqn_handler)
nq->cqn_handler = cqn_handler;
if (srqn_handler)
nq->srqn_handler = srqn_handler;
- tasklet_init(&nq->worker, bnxt_qplib_service_nq, (unsigned long)nq);
-
/* Have a task to schedule CQ notifiers in post send case */
nq->cqn_wq = create_singlethread_workqueue("bnxt_qplib_nq");
if (!nq->cqn_wq)
- goto fail;
-
- nq->requested = false;
- memset(nq->name, 0, 32);
- sprintf(nq->name, "bnxt_qplib_nq-%d", nq_idx);
- rc = request_irq(nq->vector, bnxt_qplib_nq_irq, 0, nq->name, nq);
- if (rc) {
- dev_err(&nq->pdev->dev,
- "Failed to request IRQ for NQ: %#x", rc);
- goto fail;
- }
-
- cpumask_clear(&nq->mask);
- cpumask_set_cpu(nq_idx, &nq->mask);
- rc = irq_set_affinity_hint(nq->vector, &nq->mask);
- if (rc) {
- dev_warn(&nq->pdev->dev,
- "QPLIB: set affinity failed; vector: %d nq_idx: %d\n",
- nq->vector, nq_idx);
- }
+ return -ENOMEM;
- nq->requested = true;
nq->bar_reg = NQ_CONS_PCI_BAR_REGION;
nq->bar_reg_off = bar_reg_offset;
nq_base = pci_resource_start(pdev, nq->bar_reg);
@@ -419,7 +439,13 @@ int bnxt_qplib_enable_nq(struct pci_dev *pdev, struct bnxt_qplib_nq *nq,
rc = -ENOMEM;
goto fail;
}
- NQ_DB_REARM(nq->bar_reg_iomem, nq->hwq.cons, nq->hwq.max_elements);
+
+ rc = bnxt_qplib_nq_start_irq(nq, nq_idx, msix_vector, true);
+ if (rc) {
+ dev_err(&nq->pdev->dev,
+ "QPLIB: Failed to request irq for nq-idx %d", nq_idx);
+ goto fail;
+ }
return 0;
fail:
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.h b/drivers/infiniband/hw/bnxt_re/qplib_fp.h
index ade9f13c0fd1..72352ca80ace 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_fp.h
+++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.h
@@ -467,7 +467,10 @@ struct bnxt_qplib_nq_work {
struct bnxt_qplib_cq *cq;
};
+void bnxt_qplib_nq_stop_irq(struct bnxt_qplib_nq *nq, bool kill);
void bnxt_qplib_disable_nq(struct bnxt_qplib_nq *nq);
+int bnxt_qplib_nq_start_irq(struct bnxt_qplib_nq *nq, int nq_indx,
+ int msix_vector, bool need_init);
int bnxt_qplib_enable_nq(struct pci_dev *pdev, struct bnxt_qplib_nq *nq,
int nq_idx, int msix_vector, int bar_reg_offset,
int (*cqn_handler)(struct bnxt_qplib_nq *nq,
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
index 80027a494730..2852d350ada1 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
@@ -582,19 +582,29 @@ fail:
return -ENOMEM;
}
-void bnxt_qplib_disable_rcfw_channel(struct bnxt_qplib_rcfw *rcfw)
+void bnxt_qplib_rcfw_stop_irq(struct bnxt_qplib_rcfw *rcfw, bool kill)
{
- unsigned long indx;
-
- /* Make sure the HW channel is stopped! */
- synchronize_irq(rcfw->vector);
tasklet_disable(&rcfw->worker);
- tasklet_kill(&rcfw->worker);
+ /* Mask h/w interrupts */
+ CREQ_DB(rcfw->creq_bar_reg_iomem, rcfw->creq.cons,
+ rcfw->creq.max_elements);
+ /* Sync with last running IRQ-handler */
+ synchronize_irq(rcfw->vector);
+ if (kill)
+ tasklet_kill(&rcfw->worker);
if (rcfw->requested) {
free_irq(rcfw->vector, rcfw);
rcfw->requested = false;
}
+}
+
+void bnxt_qplib_disable_rcfw_channel(struct bnxt_qplib_rcfw *rcfw)
+{
+ unsigned long indx;
+
+ bnxt_qplib_rcfw_stop_irq(rcfw, true);
+
if (rcfw->cmdq_bar_reg_iomem)
iounmap(rcfw->cmdq_bar_reg_iomem);
rcfw->cmdq_bar_reg_iomem = NULL;
@@ -614,6 +624,31 @@ void bnxt_qplib_disable_rcfw_channel(struct bnxt_qplib_rcfw *rcfw)
rcfw->vector = 0;
}
+int bnxt_qplib_rcfw_start_irq(struct bnxt_qplib_rcfw *rcfw, int msix_vector,
+ bool need_init)
+{
+ int rc;
+
+ if (rcfw->requested)
+ return -EFAULT;
+
+ rcfw->vector = msix_vector;
+ if (need_init)
+ tasklet_init(&rcfw->worker,
+ bnxt_qplib_service_creq, (unsigned long)rcfw);
+ else
+ tasklet_enable(&rcfw->worker);
+ rc = request_irq(rcfw->vector, bnxt_qplib_creq_irq, 0,
+ "bnxt_qplib_creq", rcfw);
+ if (rc)
+ return rc;
+ rcfw->requested = true;
+ CREQ_DB_REARM(rcfw->creq_bar_reg_iomem, rcfw->creq.cons,
+ rcfw->creq.max_elements);
+
+ return 0;
+}
+
int bnxt_qplib_enable_rcfw_channel(struct pci_dev *pdev,
struct bnxt_qplib_rcfw *rcfw,
int msix_vector,
@@ -675,27 +710,17 @@ int bnxt_qplib_enable_rcfw_channel(struct pci_dev *pdev,
rcfw->creq_qp_event_processed = 0;
rcfw->creq_func_event_processed = 0;
- rcfw->vector = msix_vector;
if (aeq_handler)
rcfw->aeq_handler = aeq_handler;
+ init_waitqueue_head(&rcfw->waitq);
- tasklet_init(&rcfw->worker, bnxt_qplib_service_creq,
- (unsigned long)rcfw);
-
- rcfw->requested = false;
- rc = request_irq(rcfw->vector, bnxt_qplib_creq_irq, 0,
- "bnxt_qplib_creq", rcfw);
+ rc = bnxt_qplib_rcfw_start_irq(rcfw, msix_vector, true);
if (rc) {
dev_err(&rcfw->pdev->dev,
"QPLIB: Failed to request IRQ for CREQ rc = 0x%x", rc);
bnxt_qplib_disable_rcfw_channel(rcfw);
return rc;
}
- rcfw->requested = true;
-
- init_waitqueue_head(&rcfw->waitq);
-
- CREQ_DB_REARM(rcfw->creq_bar_reg_iomem, 0, rcfw->creq.max_elements);
init.cmdq_pbl = cpu_to_le64(rcfw->cmdq.pbl[PBL_LVL_0].pg_map_arr[0]);
init.cmdq_size_cmdq_lvl = cpu_to_le16(
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h
index c7cce2e4185e..46416dfe8830 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h
+++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h
@@ -195,7 +195,10 @@ struct bnxt_qplib_rcfw {
void bnxt_qplib_free_rcfw_channel(struct bnxt_qplib_rcfw *rcfw);
int bnxt_qplib_alloc_rcfw_channel(struct pci_dev *pdev,
struct bnxt_qplib_rcfw *rcfw, int qp_tbl_sz);
+void bnxt_qplib_rcfw_stop_irq(struct bnxt_qplib_rcfw *rcfw, bool kill);
void bnxt_qplib_disable_rcfw_channel(struct bnxt_qplib_rcfw *rcfw);
+int bnxt_qplib_rcfw_start_irq(struct bnxt_qplib_rcfw *rcfw, int msix_vector,
+ bool need_init);
int bnxt_qplib_enable_rcfw_channel(struct pci_dev *pdev,
struct bnxt_qplib_rcfw *rcfw,
int msix_vector,
diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c
index 6f2b26126c64..2be2e1ac1b5f 100644
--- a/drivers/infiniband/hw/cxgb4/cq.c
+++ b/drivers/infiniband/hw/cxgb4/cq.c
@@ -315,7 +315,7 @@ static void advance_oldest_read(struct t4_wq *wq)
* Deal with out-of-order and/or completions that complete
* prior unsignalled WRs.
*/
-void c4iw_flush_hw_cq(struct c4iw_cq *chp)
+void c4iw_flush_hw_cq(struct c4iw_cq *chp, struct c4iw_qp *flush_qhp)
{
struct t4_cqe *hw_cqe, *swcqe, read_cqe;
struct c4iw_qp *qhp;
@@ -339,6 +339,13 @@ void c4iw_flush_hw_cq(struct c4iw_cq *chp)
if (qhp == NULL)
goto next_cqe;
+ if (flush_qhp != qhp) {
+ spin_lock(&qhp->lock);
+
+ if (qhp->wq.flushed == 1)
+ goto next_cqe;
+ }
+
if (CQE_OPCODE(hw_cqe) == FW_RI_TERMINATE)
goto next_cqe;
@@ -390,6 +397,8 @@ void c4iw_flush_hw_cq(struct c4iw_cq *chp)
next_cqe:
t4_hwcq_consume(&chp->cq);
ret = t4_next_hw_cqe(&chp->cq, &hw_cqe);
+ if (qhp && flush_qhp != qhp)
+ spin_unlock(&qhp->lock);
}
}
diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c
index feeb8ee6f4a2..44161ca4d2a8 100644
--- a/drivers/infiniband/hw/cxgb4/device.c
+++ b/drivers/infiniband/hw/cxgb4/device.c
@@ -875,6 +875,11 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev)
rdev->status_page->db_off = 0;
+ init_completion(&rdev->rqt_compl);
+ init_completion(&rdev->pbl_compl);
+ kref_init(&rdev->rqt_kref);
+ kref_init(&rdev->pbl_kref);
+
return 0;
err_free_status_page_and_wr_log:
if (c4iw_wr_log && rdev->wr_log)
@@ -893,13 +898,15 @@ destroy_resource:
static void c4iw_rdev_close(struct c4iw_rdev *rdev)
{
- destroy_workqueue(rdev->free_workq);
kfree(rdev->wr_log);
c4iw_release_dev_ucontext(rdev, &rdev->uctx);
free_page((unsigned long)rdev->status_page);
c4iw_pblpool_destroy(rdev);
c4iw_rqtpool_destroy(rdev);
+ wait_for_completion(&rdev->pbl_compl);
+ wait_for_completion(&rdev->rqt_compl);
c4iw_ocqp_pool_destroy(rdev);
+ destroy_workqueue(rdev->free_workq);
c4iw_destroy_resource(&rdev->resource);
}
diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
index cc929002c05e..831027717121 100644
--- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
+++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
@@ -185,6 +185,10 @@ struct c4iw_rdev {
struct wr_log_entry *wr_log;
int wr_log_size;
struct workqueue_struct *free_workq;
+ struct completion rqt_compl;
+ struct completion pbl_compl;
+ struct kref rqt_kref;
+ struct kref pbl_kref;
};
static inline int c4iw_fatal_error(struct c4iw_rdev *rdev)
@@ -1049,7 +1053,7 @@ u32 c4iw_pblpool_alloc(struct c4iw_rdev *rdev, int size);
void c4iw_pblpool_free(struct c4iw_rdev *rdev, u32 addr, int size);
u32 c4iw_ocqp_pool_alloc(struct c4iw_rdev *rdev, int size);
void c4iw_ocqp_pool_free(struct c4iw_rdev *rdev, u32 addr, int size);
-void c4iw_flush_hw_cq(struct c4iw_cq *chp);
+void c4iw_flush_hw_cq(struct c4iw_cq *chp, struct c4iw_qp *flush_qhp);
void c4iw_count_rcqes(struct t4_cq *cq, struct t4_wq *wq, int *count);
int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp);
int c4iw_flush_rq(struct t4_wq *wq, struct t4_cq *cq, int count);
diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c
index e90f2fd8dc16..1445918e3239 100644
--- a/drivers/infiniband/hw/cxgb4/mem.c
+++ b/drivers/infiniband/hw/cxgb4/mem.c
@@ -489,10 +489,10 @@ struct ib_mr *c4iw_get_dma_mr(struct ib_pd *pd, int acc)
err_dereg_mem:
dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size,
mhp->attr.pbl_addr, mhp->dereg_skb, mhp->wr_waitp);
-err_free_wr_wait:
- c4iw_put_wr_wait(mhp->wr_waitp);
err_free_skb:
kfree_skb(mhp->dereg_skb);
+err_free_wr_wait:
+ c4iw_put_wr_wait(mhp->wr_waitp);
err_free_mhp:
kfree(mhp);
return ERR_PTR(ret);
diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
index de77b6027d69..ae167b686608 100644
--- a/drivers/infiniband/hw/cxgb4/qp.c
+++ b/drivers/infiniband/hw/cxgb4/qp.c
@@ -1343,12 +1343,12 @@ static void __flush_qp(struct c4iw_qp *qhp, struct c4iw_cq *rchp,
qhp->wq.flushed = 1;
t4_set_wq_in_error(&qhp->wq);
- c4iw_flush_hw_cq(rchp);
+ c4iw_flush_hw_cq(rchp, qhp);
c4iw_count_rcqes(&rchp->cq, &qhp->wq, &count);
rq_flushed = c4iw_flush_rq(&qhp->wq, &rchp->cq, count);
if (schp != rchp)
- c4iw_flush_hw_cq(schp);
+ c4iw_flush_hw_cq(schp, qhp);
sq_flushed = c4iw_flush_sq(qhp);
spin_unlock(&qhp->lock);
diff --git a/drivers/infiniband/hw/cxgb4/resource.c b/drivers/infiniband/hw/cxgb4/resource.c
index 3cf25997ed2b..0ef25ae05e6f 100644
--- a/drivers/infiniband/hw/cxgb4/resource.c
+++ b/drivers/infiniband/hw/cxgb4/resource.c
@@ -260,12 +260,22 @@ u32 c4iw_pblpool_alloc(struct c4iw_rdev *rdev, int size)
rdev->stats.pbl.cur += roundup(size, 1 << MIN_PBL_SHIFT);
if (rdev->stats.pbl.cur > rdev->stats.pbl.max)
rdev->stats.pbl.max = rdev->stats.pbl.cur;
+ kref_get(&rdev->pbl_kref);
} else
rdev->stats.pbl.fail++;
mutex_unlock(&rdev->stats.lock);
return (u32)addr;
}
+static void destroy_pblpool(struct kref *kref)
+{
+ struct c4iw_rdev *rdev;
+
+ rdev = container_of(kref, struct c4iw_rdev, pbl_kref);
+ gen_pool_destroy(rdev->pbl_pool);
+ complete(&rdev->pbl_compl);
+}
+
void c4iw_pblpool_free(struct c4iw_rdev *rdev, u32 addr, int size)
{
pr_debug("addr 0x%x size %d\n", addr, size);
@@ -273,6 +283,7 @@ void c4iw_pblpool_free(struct c4iw_rdev *rdev, u32 addr, int size)
rdev->stats.pbl.cur -= roundup(size, 1 << MIN_PBL_SHIFT);
mutex_unlock(&rdev->stats.lock);
gen_pool_free(rdev->pbl_pool, (unsigned long)addr, size);
+ kref_put(&rdev->pbl_kref, destroy_pblpool);
}
int c4iw_pblpool_create(struct c4iw_rdev *rdev)
@@ -310,7 +321,7 @@ int c4iw_pblpool_create(struct c4iw_rdev *rdev)
void c4iw_pblpool_destroy(struct c4iw_rdev *rdev)
{
- gen_pool_destroy(rdev->pbl_pool);
+ kref_put(&rdev->pbl_kref, destroy_pblpool);
}
/*
@@ -331,12 +342,22 @@ u32 c4iw_rqtpool_alloc(struct c4iw_rdev *rdev, int size)
rdev->stats.rqt.cur += roundup(size << 6, 1 << MIN_RQT_SHIFT);
if (rdev->stats.rqt.cur > rdev->stats.rqt.max)
rdev->stats.rqt.max = rdev->stats.rqt.cur;
+ kref_get(&rdev->rqt_kref);
} else
rdev->stats.rqt.fail++;
mutex_unlock(&rdev->stats.lock);
return (u32)addr;
}
+static void destroy_rqtpool(struct kref *kref)
+{
+ struct c4iw_rdev *rdev;
+
+ rdev = container_of(kref, struct c4iw_rdev, rqt_kref);
+ gen_pool_destroy(rdev->rqt_pool);
+ complete(&rdev->rqt_compl);
+}
+
void c4iw_rqtpool_free(struct c4iw_rdev *rdev, u32 addr, int size)
{
pr_debug("addr 0x%x size %d\n", addr, size << 6);
@@ -344,6 +365,7 @@ void c4iw_rqtpool_free(struct c4iw_rdev *rdev, u32 addr, int size)
rdev->stats.rqt.cur -= roundup(size << 6, 1 << MIN_RQT_SHIFT);
mutex_unlock(&rdev->stats.lock);
gen_pool_free(rdev->rqt_pool, (unsigned long)addr, size << 6);
+ kref_put(&rdev->rqt_kref, destroy_rqtpool);
}
int c4iw_rqtpool_create(struct c4iw_rdev *rdev)
@@ -380,7 +402,7 @@ int c4iw_rqtpool_create(struct c4iw_rdev *rdev)
void c4iw_rqtpool_destroy(struct c4iw_rdev *rdev)
{
- gen_pool_destroy(rdev->rqt_pool);
+ kref_put(&rdev->rqt_kref, destroy_rqtpool);
}
/*
diff --git a/drivers/infiniband/hw/hfi1/affinity.c b/drivers/infiniband/hw/hfi1/affinity.c
index a97055dd4fbd..b5fab55cc275 100644
--- a/drivers/infiniband/hw/hfi1/affinity.c
+++ b/drivers/infiniband/hw/hfi1/affinity.c
@@ -412,7 +412,6 @@ static void hfi1_cleanup_sdma_notifier(struct hfi1_msix_entry *msix)
static int get_irq_affinity(struct hfi1_devdata *dd,
struct hfi1_msix_entry *msix)
{
- int ret;
cpumask_var_t diff;
struct hfi1_affinity_node *entry;
struct cpu_mask_set *set = NULL;
@@ -424,10 +423,6 @@ static int get_irq_affinity(struct hfi1_devdata *dd,
extra[0] = '\0';
cpumask_clear(&msix->mask);
- ret = zalloc_cpumask_var(&diff, GFP_KERNEL);
- if (!ret)
- return -ENOMEM;
-
entry = node_affinity_lookup(dd->node);
switch (msix->type) {
@@ -458,6 +453,9 @@ static int get_irq_affinity(struct hfi1_devdata *dd,
* finds its CPU here.
*/
if (cpu == -1 && set) {
+ if (!zalloc_cpumask_var(&diff, GFP_KERNEL))
+ return -ENOMEM;
+
if (cpumask_equal(&set->mask, &set->used)) {
/*
* We've used up all the CPUs, bump up the generation
@@ -469,6 +467,8 @@ static int get_irq_affinity(struct hfi1_devdata *dd,
cpumask_andnot(diff, &set->mask, &set->used);
cpu = cpumask_first(diff);
cpumask_set_cpu(cpu, &set->used);
+
+ free_cpumask_var(diff);
}
cpumask_set_cpu(cpu, &msix->mask);
@@ -482,7 +482,6 @@ static int get_irq_affinity(struct hfi1_devdata *dd,
hfi1_setup_sdma_notifier(msix);
}
- free_cpumask_var(diff);
return 0;
}
diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c
index e6a60fa59f2b..e6bdd0c1e80a 100644
--- a/drivers/infiniband/hw/hfi1/chip.c
+++ b/drivers/infiniband/hw/hfi1/chip.c
@@ -5944,6 +5944,7 @@ static void is_sendctxt_err_int(struct hfi1_devdata *dd,
u64 status;
u32 sw_index;
int i = 0;
+ unsigned long irq_flags;
sw_index = dd->hw_to_sw[hw_context];
if (sw_index >= dd->num_send_contexts) {
@@ -5953,10 +5954,12 @@ static void is_sendctxt_err_int(struct hfi1_devdata *dd,
return;
}
sci = &dd->send_contexts[sw_index];
+ spin_lock_irqsave(&dd->sc_lock, irq_flags);
sc = sci->sc;
if (!sc) {
dd_dev_err(dd, "%s: context %u(%u): no sc?\n", __func__,
sw_index, hw_context);
+ spin_unlock_irqrestore(&dd->sc_lock, irq_flags);
return;
}
@@ -5978,6 +5981,7 @@ static void is_sendctxt_err_int(struct hfi1_devdata *dd,
*/
if (sc->type != SC_USER)
queue_work(dd->pport->hfi1_wq, &sc->halt_work);
+ spin_unlock_irqrestore(&dd->sc_lock, irq_flags);
/*
* Update the counters for the corresponding status bits.
diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c
index 46d1475b2154..bd837a048bf4 100644
--- a/drivers/infiniband/hw/hfi1/driver.c
+++ b/drivers/infiniband/hw/hfi1/driver.c
@@ -433,31 +433,43 @@ void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt,
bool do_cnp)
{
struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
+ struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
struct ib_other_headers *ohdr = pkt->ohdr;
struct ib_grh *grh = pkt->grh;
u32 rqpn = 0, bth1;
- u16 pkey, rlid, dlid = ib_get_dlid(pkt->hdr);
+ u16 pkey;
+ u32 rlid, slid, dlid = 0;
u8 hdr_type, sc, svc_type;
bool is_mcast = false;
+ /* can be called from prescan */
if (pkt->etype == RHF_RCV_TYPE_BYPASS) {
is_mcast = hfi1_is_16B_mcast(dlid);
pkey = hfi1_16B_get_pkey(pkt->hdr);
sc = hfi1_16B_get_sc(pkt->hdr);
+ dlid = hfi1_16B_get_dlid(pkt->hdr);
+ slid = hfi1_16B_get_slid(pkt->hdr);
hdr_type = HFI1_PKT_TYPE_16B;
} else {
is_mcast = (dlid > be16_to_cpu(IB_MULTICAST_LID_BASE)) &&
(dlid != be16_to_cpu(IB_LID_PERMISSIVE));
pkey = ib_bth_get_pkey(ohdr);
sc = hfi1_9B_get_sc5(pkt->hdr, pkt->rhf);
+ dlid = ib_get_dlid(pkt->hdr);
+ slid = ib_get_slid(pkt->hdr);
hdr_type = HFI1_PKT_TYPE_9B;
}
switch (qp->ibqp.qp_type) {
+ case IB_QPT_UD:
+ dlid = ppd->lid;
+ rlid = slid;
+ rqpn = ib_get_sqpn(pkt->ohdr);
+ svc_type = IB_CC_SVCTYPE_UD;
+ break;
case IB_QPT_SMI:
case IB_QPT_GSI:
- case IB_QPT_UD:
- rlid = ib_get_slid(pkt->hdr);
+ rlid = slid;
rqpn = ib_get_sqpn(pkt->ohdr);
svc_type = IB_CC_SVCTYPE_UD;
break;
@@ -482,7 +494,6 @@ void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt,
dlid, rlid, sc, grh);
if (!is_mcast && (bth1 & IB_BECN_SMASK)) {
- struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
u32 lqpn = bth1 & RVT_QPN_MASK;
u8 sl = ibp->sc_to_sl[sc];
diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h
index 32c48265405e..cac2c62bc42d 100644
--- a/drivers/infiniband/hw/hfi1/hfi.h
+++ b/drivers/infiniband/hw/hfi1/hfi.h
@@ -1537,13 +1537,13 @@ void set_link_ipg(struct hfi1_pportdata *ppd);
void process_becn(struct hfi1_pportdata *ppd, u8 sl, u32 rlid, u32 lqpn,
u32 rqpn, u8 svc_type);
void return_cnp(struct hfi1_ibport *ibp, struct rvt_qp *qp, u32 remote_qpn,
- u32 pkey, u32 slid, u32 dlid, u8 sc5,
+ u16 pkey, u32 slid, u32 dlid, u8 sc5,
const struct ib_grh *old_grh);
void return_cnp_16B(struct hfi1_ibport *ibp, struct rvt_qp *qp,
- u32 remote_qpn, u32 pkey, u32 slid, u32 dlid,
+ u32 remote_qpn, u16 pkey, u32 slid, u32 dlid,
u8 sc5, const struct ib_grh *old_grh);
typedef void (*hfi1_handle_cnp)(struct hfi1_ibport *ibp, struct rvt_qp *qp,
- u32 remote_qpn, u32 pkey, u32 slid, u32 dlid,
+ u32 remote_qpn, u16 pkey, u32 slid, u32 dlid,
u8 sc5, const struct ib_grh *old_grh);
#define PKEY_CHECK_INVALID -1
@@ -2437,7 +2437,7 @@ static inline void hfi1_make_16b_hdr(struct hfi1_16b_header *hdr,
((slid >> OPA_16B_SLID_SHIFT) << OPA_16B_SLID_HIGH_SHIFT);
lrh2 = (lrh2 & ~OPA_16B_DLID_MASK) |
((dlid >> OPA_16B_DLID_SHIFT) << OPA_16B_DLID_HIGH_SHIFT);
- lrh2 = (lrh2 & ~OPA_16B_PKEY_MASK) | (pkey << OPA_16B_PKEY_SHIFT);
+ lrh2 = (lrh2 & ~OPA_16B_PKEY_MASK) | ((u32)pkey << OPA_16B_PKEY_SHIFT);
lrh2 = (lrh2 & ~OPA_16B_L4_MASK) | l4;
hdr->lrh[0] = lrh0;
diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c
index 33eba2356742..6309edf811df 100644
--- a/drivers/infiniband/hw/hfi1/init.c
+++ b/drivers/infiniband/hw/hfi1/init.c
@@ -88,9 +88,9 @@
* pio buffers per ctxt, etc.) Zero means use one user context per CPU.
*/
int num_user_contexts = -1;
-module_param_named(num_user_contexts, num_user_contexts, uint, S_IRUGO);
+module_param_named(num_user_contexts, num_user_contexts, int, 0444);
MODULE_PARM_DESC(
- num_user_contexts, "Set max number of user contexts to use");
+ num_user_contexts, "Set max number of user contexts to use (default: -1 will use the real (non-HT) CPU count)");
uint krcvqs[RXE_NUM_DATA_VL];
int krcvqsset;
@@ -1209,30 +1209,49 @@ static void finalize_asic_data(struct hfi1_devdata *dd,
kfree(ad);
}
-static void __hfi1_free_devdata(struct kobject *kobj)
+/**
+ * hfi1_clean_devdata - cleans up per-unit data structure
+ * @dd: pointer to a valid devdata structure
+ *
+ * It cleans up all data structures set up by
+ * by hfi1_alloc_devdata().
+ */
+static void hfi1_clean_devdata(struct hfi1_devdata *dd)
{
- struct hfi1_devdata *dd =
- container_of(kobj, struct hfi1_devdata, kobj);
struct hfi1_asic_data *ad;
unsigned long flags;
spin_lock_irqsave(&hfi1_devs_lock, flags);
- idr_remove(&hfi1_unit_table, dd->unit);
- list_del(&dd->list);
+ if (!list_empty(&dd->list)) {
+ idr_remove(&hfi1_unit_table, dd->unit);
+ list_del_init(&dd->list);
+ }
ad = release_asic_data(dd);
spin_unlock_irqrestore(&hfi1_devs_lock, flags);
- if (ad)
- finalize_asic_data(dd, ad);
+
+ finalize_asic_data(dd, ad);
free_platform_config(dd);
rcu_barrier(); /* wait for rcu callbacks to complete */
free_percpu(dd->int_counter);
free_percpu(dd->rcv_limit);
free_percpu(dd->send_schedule);
free_percpu(dd->tx_opstats);
+ dd->int_counter = NULL;
+ dd->rcv_limit = NULL;
+ dd->send_schedule = NULL;
+ dd->tx_opstats = NULL;
sdma_clean(dd, dd->num_sdma);
rvt_dealloc_device(&dd->verbs_dev.rdi);
}
+static void __hfi1_free_devdata(struct kobject *kobj)
+{
+ struct hfi1_devdata *dd =
+ container_of(kobj, struct hfi1_devdata, kobj);
+
+ hfi1_clean_devdata(dd);
+}
+
static struct kobj_type hfi1_devdata_type = {
.release = __hfi1_free_devdata,
};
@@ -1265,6 +1284,8 @@ struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra)
return ERR_PTR(-ENOMEM);
dd->num_pports = nports;
dd->pport = (struct hfi1_pportdata *)(dd + 1);
+ dd->pcidev = pdev;
+ pci_set_drvdata(pdev, dd);
INIT_LIST_HEAD(&dd->list);
idr_preload(GFP_KERNEL);
@@ -1331,9 +1352,7 @@ struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra)
return dd;
bail:
- if (!list_empty(&dd->list))
- list_del_init(&dd->list);
- rvt_dealloc_device(&dd->verbs_dev.rdi);
+ hfi1_clean_devdata(dd);
return ERR_PTR(ret);
}
diff --git a/drivers/infiniband/hw/hfi1/pcie.c b/drivers/infiniband/hw/hfi1/pcie.c
index 83d66e862207..c1c982908b4b 100644
--- a/drivers/infiniband/hw/hfi1/pcie.c
+++ b/drivers/infiniband/hw/hfi1/pcie.c
@@ -163,9 +163,6 @@ int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev)
resource_size_t addr;
int ret = 0;
- dd->pcidev = pdev;
- pci_set_drvdata(pdev, dd);
-
addr = pci_resource_start(pdev, 0);
len = pci_resource_len(pdev, 0);
diff --git a/drivers/infiniband/hw/hfi1/platform.c b/drivers/infiniband/hw/hfi1/platform.c
index d486355880cb..cbf7faa5038c 100644
--- a/drivers/infiniband/hw/hfi1/platform.c
+++ b/drivers/infiniband/hw/hfi1/platform.c
@@ -199,6 +199,7 @@ void free_platform_config(struct hfi1_devdata *dd)
{
/* Release memory allocated for eprom or fallback file read. */
kfree(dd->platform_config.data);
+ dd->platform_config.data = NULL;
}
void get_port_type(struct hfi1_pportdata *ppd)
diff --git a/drivers/infiniband/hw/hfi1/qsfp.c b/drivers/infiniband/hw/hfi1/qsfp.c
index 1869f639c3ae..b5966991d647 100644
--- a/drivers/infiniband/hw/hfi1/qsfp.c
+++ b/drivers/infiniband/hw/hfi1/qsfp.c
@@ -204,6 +204,8 @@ static void clean_i2c_bus(struct hfi1_i2c_bus *bus)
void clean_up_i2c(struct hfi1_devdata *dd, struct hfi1_asic_data *ad)
{
+ if (!ad)
+ return;
clean_i2c_bus(ad->i2c_bus0);
ad->i2c_bus0 = NULL;
clean_i2c_bus(ad->i2c_bus1);
diff --git a/drivers/infiniband/hw/hfi1/ruc.c b/drivers/infiniband/hw/hfi1/ruc.c
index 3daa94bdae3a..c0071ca4147a 100644
--- a/drivers/infiniband/hw/hfi1/ruc.c
+++ b/drivers/infiniband/hw/hfi1/ruc.c
@@ -733,6 +733,20 @@ static inline void hfi1_make_ruc_bth(struct rvt_qp *qp,
ohdr->bth[2] = cpu_to_be32(bth2);
}
+/**
+ * hfi1_make_ruc_header_16B - build a 16B header
+ * @qp: the queue pair
+ * @ohdr: a pointer to the destination header memory
+ * @bth0: bth0 passed in from the RC/UC builder
+ * @bth2: bth2 passed in from the RC/UC builder
+ * @middle: non zero implies indicates ahg "could" be used
+ * @ps: the current packet state
+ *
+ * This routine may disarm ahg under these situations:
+ * - packet needs a GRH
+ * - BECN needed
+ * - migration state not IB_MIG_MIGRATED
+ */
static inline void hfi1_make_ruc_header_16B(struct rvt_qp *qp,
struct ib_other_headers *ohdr,
u32 bth0, u32 bth2, int middle,
@@ -777,6 +791,12 @@ static inline void hfi1_make_ruc_header_16B(struct rvt_qp *qp,
else
middle = 0;
+ if (qp->s_flags & RVT_S_ECN) {
+ qp->s_flags &= ~RVT_S_ECN;
+ /* we recently received a FECN, so return a BECN */
+ becn = true;
+ middle = 0;
+ }
if (middle)
build_ahg(qp, bth2);
else
@@ -784,11 +804,6 @@ static inline void hfi1_make_ruc_header_16B(struct rvt_qp *qp,
bth0 |= pkey;
bth0 |= extra_bytes << 20;
- if (qp->s_flags & RVT_S_ECN) {
- qp->s_flags &= ~RVT_S_ECN;
- /* we recently received a FECN, so return a BECN */
- becn = true;
- }
hfi1_make_ruc_bth(qp, ohdr, bth0, bth1, bth2);
if (!ppd->lid)
@@ -806,6 +821,20 @@ static inline void hfi1_make_ruc_header_16B(struct rvt_qp *qp,
pkey, becn, 0, l4, priv->s_sc);
}
+/**
+ * hfi1_make_ruc_header_9B - build a 9B header
+ * @qp: the queue pair
+ * @ohdr: a pointer to the destination header memory
+ * @bth0: bth0 passed in from the RC/UC builder
+ * @bth2: bth2 passed in from the RC/UC builder
+ * @middle: non zero implies indicates ahg "could" be used
+ * @ps: the current packet state
+ *
+ * This routine may disarm ahg under these situations:
+ * - packet needs a GRH
+ * - BECN needed
+ * - migration state not IB_MIG_MIGRATED
+ */
static inline void hfi1_make_ruc_header_9B(struct rvt_qp *qp,
struct ib_other_headers *ohdr,
u32 bth0, u32 bth2, int middle,
@@ -839,6 +868,12 @@ static inline void hfi1_make_ruc_header_9B(struct rvt_qp *qp,
else
middle = 0;
+ if (qp->s_flags & RVT_S_ECN) {
+ qp->s_flags &= ~RVT_S_ECN;
+ /* we recently received a FECN, so return a BECN */
+ bth1 |= (IB_BECN_MASK << IB_BECN_SHIFT);
+ middle = 0;
+ }
if (middle)
build_ahg(qp, bth2);
else
@@ -846,11 +881,6 @@ static inline void hfi1_make_ruc_header_9B(struct rvt_qp *qp,
bth0 |= pkey;
bth0 |= extra_bytes << 20;
- if (qp->s_flags & RVT_S_ECN) {
- qp->s_flags &= ~RVT_S_ECN;
- /* we recently received a FECN, so return a BECN */
- bth1 |= (IB_BECN_MASK << IB_BECN_SHIFT);
- }
hfi1_make_ruc_bth(qp, ohdr, bth0, bth1, bth2);
hfi1_make_ib_hdr(&ps->s_txreq->phdr.hdr.ibh,
lrh0,
diff --git a/drivers/infiniband/hw/hfi1/ud.c b/drivers/infiniband/hw/hfi1/ud.c
index bcf3b0bebac8..69c17a5ef038 100644
--- a/drivers/infiniband/hw/hfi1/ud.c
+++ b/drivers/infiniband/hw/hfi1/ud.c
@@ -628,7 +628,7 @@ int hfi1_lookup_pkey_idx(struct hfi1_ibport *ibp, u16 pkey)
}
void return_cnp_16B(struct hfi1_ibport *ibp, struct rvt_qp *qp,
- u32 remote_qpn, u32 pkey, u32 slid, u32 dlid,
+ u32 remote_qpn, u16 pkey, u32 slid, u32 dlid,
u8 sc5, const struct ib_grh *old_grh)
{
u64 pbc, pbc_flags = 0;
@@ -687,7 +687,7 @@ void return_cnp_16B(struct hfi1_ibport *ibp, struct rvt_qp *qp,
}
void return_cnp(struct hfi1_ibport *ibp, struct rvt_qp *qp, u32 remote_qpn,
- u32 pkey, u32 slid, u32 dlid, u8 sc5,
+ u16 pkey, u32 slid, u32 dlid, u8 sc5,
const struct ib_grh *old_grh)
{
u64 pbc, pbc_flags = 0;
diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c
index 14734d0d0b76..3a485f50fede 100644
--- a/drivers/infiniband/hw/hns/hns_roce_cq.c
+++ b/drivers/infiniband/hw/hns/hns_roce_cq.c
@@ -377,6 +377,7 @@ struct ib_cq *hns_roce_ib_create_cq(struct ib_device *ib_dev,
hr_cq->set_ci_db = hr_cq->db.db_record;
*hr_cq->set_ci_db = 0;
+ hr_cq->db_en = 1;
}
/* Init mmt table and write buff address to mtt table */
diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.c b/drivers/infiniband/hw/hns/hns_roce_hem.c
index 0eeabfbee192..63b5b3edabcb 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hem.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hem.c
@@ -912,7 +912,7 @@ int hns_roce_init_hem_table(struct hns_roce_dev *hr_dev,
obj_per_chunk = buf_chunk_size / obj_size;
num_hem = (nobj + obj_per_chunk - 1) / obj_per_chunk;
bt_chunk_num = bt_chunk_size / 8;
- if (table->type >= HEM_TYPE_MTT)
+ if (type >= HEM_TYPE_MTT)
num_bt_l0 = bt_chunk_num;
table->hem = kcalloc(num_hem, sizeof(*table->hem),
@@ -920,7 +920,7 @@ int hns_roce_init_hem_table(struct hns_roce_dev *hr_dev,
if (!table->hem)
goto err_kcalloc_hem_buf;
- if (check_whether_bt_num_3(table->type, hop_num)) {
+ if (check_whether_bt_num_3(type, hop_num)) {
unsigned long num_bt_l1;
num_bt_l1 = (num_hem + bt_chunk_num - 1) /
@@ -939,8 +939,8 @@ int hns_roce_init_hem_table(struct hns_roce_dev *hr_dev,
goto err_kcalloc_l1_dma;
}
- if (check_whether_bt_num_2(table->type, hop_num) ||
- check_whether_bt_num_3(table->type, hop_num)) {
+ if (check_whether_bt_num_2(type, hop_num) ||
+ check_whether_bt_num_3(type, hop_num)) {
table->bt_l0 = kcalloc(num_bt_l0, sizeof(*table->bt_l0),
GFP_KERNEL);
if (!table->bt_l0)
@@ -1039,14 +1039,14 @@ void hns_roce_cleanup_hem_table(struct hns_roce_dev *hr_dev,
void hns_roce_cleanup_hem(struct hns_roce_dev *hr_dev)
{
hns_roce_cleanup_hem_table(hr_dev, &hr_dev->cq_table.table);
- hns_roce_cleanup_hem_table(hr_dev, &hr_dev->qp_table.irrl_table);
if (hr_dev->caps.trrl_entry_sz)
hns_roce_cleanup_hem_table(hr_dev,
&hr_dev->qp_table.trrl_table);
+ hns_roce_cleanup_hem_table(hr_dev, &hr_dev->qp_table.irrl_table);
hns_roce_cleanup_hem_table(hr_dev, &hr_dev->qp_table.qp_table);
hns_roce_cleanup_hem_table(hr_dev, &hr_dev->mr_table.mtpt_table);
- hns_roce_cleanup_hem_table(hr_dev, &hr_dev->mr_table.mtt_table);
if (hns_roce_check_whether_mhop(hr_dev, HEM_TYPE_CQE))
hns_roce_cleanup_hem_table(hr_dev,
&hr_dev->mr_table.mtt_cqe_table);
+ hns_roce_cleanup_hem_table(hr_dev, &hr_dev->mr_table.mtt_table);
}
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
index 47e1b6ac1e1a..8013d69c5ac4 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
@@ -722,6 +722,7 @@ static int hns_roce_v1_rsv_lp_qp(struct hns_roce_dev *hr_dev)
free_mr->mr_free_pd = to_hr_pd(pd);
free_mr->mr_free_pd->ibpd.device = &hr_dev->ib_dev;
free_mr->mr_free_pd->ibpd.uobject = NULL;
+ free_mr->mr_free_pd->ibpd.__internal_mr = NULL;
atomic_set(&free_mr->mr_free_pd->ibpd.usecnt, 0);
attr.qp_access_flags = IB_ACCESS_REMOTE_WRITE;
@@ -1036,7 +1037,7 @@ static void hns_roce_v1_mr_free_work_fn(struct work_struct *work)
do {
ret = hns_roce_v1_poll_cq(&mr_free_cq->ib_cq, ne, wc);
- if (ret < 0) {
+ if (ret < 0 && hr_qp) {
dev_err(dev,
"(qp:0x%lx) starts, Poll cqe failed(%d) for mr 0x%x free! Remain %d cqe\n",
hr_qp->qpn, ret, hr_mr->key, ne);
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
index 8b84ab7800d8..1f0965bb64ee 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
@@ -71,6 +71,11 @@ static int set_rwqe_data_seg(struct ib_qp *ibqp, struct ib_send_wr *wr,
return -EINVAL;
}
+ if (wr->opcode == IB_WR_RDMA_READ) {
+ dev_err(hr_dev->dev, "Not support inline data!\n");
+ return -EINVAL;
+ }
+
for (i = 0; i < wr->num_sge; i++) {
memcpy(wqe, ((void *)wr->sg_list[i].addr),
wr->sg_list[i].length);
@@ -137,8 +142,8 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
unsigned long flags;
unsigned int ind;
void *wqe = NULL;
- u32 tmp_len = 0;
bool loopback;
+ u32 tmp_len;
int ret = 0;
u8 *smac;
int nreq;
@@ -148,7 +153,7 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
ibqp->qp_type != IB_QPT_GSI &&
ibqp->qp_type != IB_QPT_UD)) {
dev_err(dev, "Not supported QP(0x%x)type!\n", ibqp->qp_type);
- *bad_wr = NULL;
+ *bad_wr = wr;
return -EOPNOTSUPP;
}
@@ -182,7 +187,9 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
qp->sq.wrid[(qp->sq.head + nreq) & (qp->sq.wqe_cnt - 1)] =
wr->wr_id;
- owner_bit = ~(qp->sq.head >> ilog2(qp->sq.wqe_cnt)) & 0x1;
+ owner_bit =
+ ~(((qp->sq.head + nreq) >> ilog2(qp->sq.wqe_cnt)) & 0x1);
+ tmp_len = 0;
/* Corresponding to the QP type, wqe process separately */
if (ibqp->qp_type == IB_QPT_GSI) {
@@ -456,6 +463,7 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
} else {
dev_err(dev, "Illegal qp_type(0x%x)\n", ibqp->qp_type);
spin_unlock_irqrestore(&qp->sq.lock, flags);
+ *bad_wr = wr;
return -EOPNOTSUPP;
}
}
@@ -540,16 +548,20 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
}
if (i < hr_qp->rq.max_gs) {
- dseg[i].lkey = cpu_to_le32(HNS_ROCE_INVALID_LKEY);
- dseg[i].addr = 0;
+ dseg->lkey = cpu_to_le32(HNS_ROCE_INVALID_LKEY);
+ dseg->addr = 0;
}
/* rq support inline data */
- sge_list = hr_qp->rq_inl_buf.wqe_list[ind].sg_list;
- hr_qp->rq_inl_buf.wqe_list[ind].sge_cnt = (u32)wr->num_sge;
- for (i = 0; i < wr->num_sge; i++) {
- sge_list[i].addr = (void *)(u64)wr->sg_list[i].addr;
- sge_list[i].len = wr->sg_list[i].length;
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) {
+ sge_list = hr_qp->rq_inl_buf.wqe_list[ind].sg_list;
+ hr_qp->rq_inl_buf.wqe_list[ind].sge_cnt =
+ (u32)wr->num_sge;
+ for (i = 0; i < wr->num_sge; i++) {
+ sge_list[i].addr =
+ (void *)(u64)wr->sg_list[i].addr;
+ sge_list[i].len = wr->sg_list[i].length;
+ }
}
hr_qp->rq.wrid[ind] = wr->wr_id;
@@ -606,6 +618,8 @@ static void hns_roce_free_cmq_desc(struct hns_roce_dev *hr_dev,
dma_unmap_single(hr_dev->dev, ring->desc_dma_addr,
ring->desc_num * sizeof(struct hns_roce_cmq_desc),
DMA_BIDIRECTIONAL);
+
+ ring->desc_dma_addr = 0;
kfree(ring->desc);
}
@@ -1074,6 +1088,7 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
if (ret) {
dev_err(hr_dev->dev, "Configure global param fail, ret = %d.\n",
ret);
+ return ret;
}
/* Get pf resource owned by every pf */
@@ -1365,6 +1380,8 @@ static int hns_roce_v2_write_mtpt(void *mb_buf, struct hns_roce_mr *mr,
roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_PA_S,
mr->type == MR_TYPE_MR ? 0 : 1);
+ roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_INNER_PA_VLD_S,
+ 1);
mpt_entry->byte_12_mw_pa = cpu_to_le32(mpt_entry->byte_12_mw_pa);
mpt_entry->len_l = cpu_to_le32(lower_32_bits(mr->size));
@@ -2162,6 +2179,7 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp,
struct hns_roce_v2_qp_context *context,
struct hns_roce_v2_qp_context *qpc_mask)
{
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
/*
@@ -2274,7 +2292,8 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp,
context->rq_db_record_addr = hr_qp->rdb.dma >> 32;
qpc_mask->rq_db_record_addr = 0;
- roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_RQIE_S, 1);
+ roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_RQIE_S,
+ (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) ? 1 : 0);
roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_RQIE_S, 0);
roce_set_field(context->byte_80_rnr_rx_cqn, V2_QPC_BYTE_80_RX_CQN_M,
@@ -2592,10 +2611,12 @@ static void modify_qp_init_to_init(struct ib_qp *ibqp,
roce_set_field(qpc_mask->byte_4_sqpn_tst, V2_QPC_BYTE_4_SQPN_M,
V2_QPC_BYTE_4_SQPN_S, 0);
- roce_set_field(context->byte_56_dqpn_err, V2_QPC_BYTE_56_DQPN_M,
- V2_QPC_BYTE_56_DQPN_S, hr_qp->qpn);
- roce_set_field(qpc_mask->byte_56_dqpn_err, V2_QPC_BYTE_56_DQPN_M,
- V2_QPC_BYTE_56_DQPN_S, 0);
+ if (attr_mask & IB_QP_DEST_QPN) {
+ roce_set_field(context->byte_56_dqpn_err, V2_QPC_BYTE_56_DQPN_M,
+ V2_QPC_BYTE_56_DQPN_S, hr_qp->qpn);
+ roce_set_field(qpc_mask->byte_56_dqpn_err,
+ V2_QPC_BYTE_56_DQPN_M, V2_QPC_BYTE_56_DQPN_S, 0);
+ }
roce_set_field(context->byte_168_irrl_idx,
V2_QPC_BYTE_168_SQ_SHIFT_BAK_M,
V2_QPC_BYTE_168_SQ_SHIFT_BAK_S,
@@ -2650,8 +2671,7 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp,
return -EINVAL;
}
- if ((attr_mask & IB_QP_ALT_PATH) || (attr_mask & IB_QP_ACCESS_FLAGS) ||
- (attr_mask & IB_QP_PKEY_INDEX) || (attr_mask & IB_QP_QKEY)) {
+ if (attr_mask & IB_QP_ALT_PATH) {
dev_err(dev, "INIT2RTR attr_mask (0x%x) error\n", attr_mask);
return -EINVAL;
}
@@ -2800,10 +2820,12 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp,
V2_QPC_BYTE_140_RR_MAX_S, 0);
}
- roce_set_field(context->byte_56_dqpn_err, V2_QPC_BYTE_56_DQPN_M,
- V2_QPC_BYTE_56_DQPN_S, attr->dest_qp_num);
- roce_set_field(qpc_mask->byte_56_dqpn_err, V2_QPC_BYTE_56_DQPN_M,
- V2_QPC_BYTE_56_DQPN_S, 0);
+ if (attr_mask & IB_QP_DEST_QPN) {
+ roce_set_field(context->byte_56_dqpn_err, V2_QPC_BYTE_56_DQPN_M,
+ V2_QPC_BYTE_56_DQPN_S, attr->dest_qp_num);
+ roce_set_field(qpc_mask->byte_56_dqpn_err,
+ V2_QPC_BYTE_56_DQPN_M, V2_QPC_BYTE_56_DQPN_S, 0);
+ }
/* Configure GID index */
port_num = rdma_ah_get_port_num(&attr->ah_attr);
@@ -2845,7 +2867,7 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp,
if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_UD)
roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_MTU_M,
V2_QPC_BYTE_24_MTU_S, IB_MTU_4096);
- else
+ else if (attr_mask & IB_QP_PATH_MTU)
roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_MTU_M,
V2_QPC_BYTE_24_MTU_S, attr->path_mtu);
@@ -2922,11 +2944,9 @@ static int modify_qp_rtr_to_rts(struct ib_qp *ibqp,
return -EINVAL;
}
- /* If exist optional param, return error */
- if ((attr_mask & IB_QP_ALT_PATH) || (attr_mask & IB_QP_ACCESS_FLAGS) ||
- (attr_mask & IB_QP_QKEY) || (attr_mask & IB_QP_PATH_MIG_STATE) ||
- (attr_mask & IB_QP_CUR_STATE) ||
- (attr_mask & IB_QP_MIN_RNR_TIMER)) {
+ /* Not support alternate path and path migration */
+ if ((attr_mask & IB_QP_ALT_PATH) ||
+ (attr_mask & IB_QP_PATH_MIG_STATE)) {
dev_err(dev, "RTR2RTS attr_mask (0x%x)error\n", attr_mask);
return -EINVAL;
}
@@ -3161,7 +3181,8 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp,
(cur_state == IB_QPS_RTR && new_state == IB_QPS_ERR) ||
(cur_state == IB_QPS_RTS && new_state == IB_QPS_ERR) ||
(cur_state == IB_QPS_SQD && new_state == IB_QPS_ERR) ||
- (cur_state == IB_QPS_SQE && new_state == IB_QPS_ERR)) {
+ (cur_state == IB_QPS_SQE && new_state == IB_QPS_ERR) ||
+ (cur_state == IB_QPS_ERR && new_state == IB_QPS_ERR)) {
/* Nothing */
;
} else {
@@ -4478,7 +4499,7 @@ static int hns_roce_v2_create_eq(struct hns_roce_dev *hr_dev,
ret = hns_roce_cmd_mbox(hr_dev, mailbox->dma, 0, eq->eqn, 0,
eq_cmd, HNS_ROCE_CMD_TIMEOUT_MSECS);
if (ret) {
- dev_err(dev, "[mailbox cmd] creat eqc failed.\n");
+ dev_err(dev, "[mailbox cmd] create eqc failed.\n");
goto err_cmd_mbox;
}
@@ -4694,6 +4715,8 @@ static const struct pci_device_id hns_roce_hw_v2_pci_tbl[] = {
{0, }
};
+MODULE_DEVICE_TABLE(pci, hns_roce_hw_v2_pci_tbl);
+
static int hns_roce_hw_v2_get_cfg(struct hns_roce_dev *hr_dev,
struct hnae3_handle *handle)
{
diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c
index 9d48bc07a9e6..96fb6a9ed93c 100644
--- a/drivers/infiniband/hw/hns/hns_roce_main.c
+++ b/drivers/infiniband/hw/hns/hns_roce_main.c
@@ -199,7 +199,7 @@ static int hns_roce_query_device(struct ib_device *ib_dev,
memset(props, 0, sizeof(*props));
- props->sys_image_guid = cpu_to_be32(hr_dev->sys_image_guid);
+ props->sys_image_guid = cpu_to_be64(hr_dev->sys_image_guid);
props->max_mr_size = (u64)(~(0ULL));
props->page_size_cap = hr_dev->caps.page_size_cap;
props->vendor_id = hr_dev->vendor_id;
diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c
index e289a924e789..baaf906f7c2e 100644
--- a/drivers/infiniband/hw/hns/hns_roce_qp.c
+++ b/drivers/infiniband/hw/hns/hns_roce_qp.c
@@ -620,7 +620,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
to_hr_ucontext(ib_pd->uobject->context),
ucmd.db_addr, &hr_qp->rdb);
if (ret) {
- dev_err(dev, "rp record doorbell map failed!\n");
+ dev_err(dev, "rq record doorbell map failed!\n");
goto err_mtt;
}
}
@@ -660,6 +660,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
goto err_rq_sge_list;
}
*hr_qp->rdb.db_record = 0;
+ hr_qp->rdb_en = 1;
}
/* Allocate QP buf */
@@ -955,7 +956,14 @@ int hns_roce_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
}
if (cur_state == new_state && cur_state == IB_QPS_RESET) {
- ret = 0;
+ if (hr_dev->caps.min_wqes) {
+ ret = -EPERM;
+ dev_err(dev, "cur_state=%d new_state=%d\n", cur_state,
+ new_state);
+ } else {
+ ret = 0;
+ }
+
goto out;
}
diff --git a/drivers/infiniband/hw/i40iw/i40iw.h b/drivers/infiniband/hw/i40iw/i40iw.h
index d5d8c1be345a..2f2b4426ded7 100644
--- a/drivers/infiniband/hw/i40iw/i40iw.h
+++ b/drivers/infiniband/hw/i40iw/i40iw.h
@@ -207,6 +207,7 @@ struct i40iw_msix_vector {
u32 irq;
u32 cpu_affinity;
u32 ceq_id;
+ cpumask_t mask;
};
struct l2params_work {
diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.c b/drivers/infiniband/hw/i40iw/i40iw_cm.c
index 4cfa8f4647e2..f7c6fd9ff6e2 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_cm.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_cm.c
@@ -2093,7 +2093,7 @@ static int i40iw_addr_resolve_neigh_ipv6(struct i40iw_device *iwdev,
if (netif_is_bond_slave(netdev))
netdev = netdev_master_upper_dev_get(netdev);
- neigh = dst_neigh_lookup(dst, &dst_addr);
+ neigh = dst_neigh_lookup(dst, dst_addr.sin6_addr.in6_u.u6_addr32);
rcu_read_lock();
if (neigh) {
diff --git a/drivers/infiniband/hw/i40iw/i40iw_hw.c b/drivers/infiniband/hw/i40iw/i40iw_hw.c
index 6139836fb533..c9f62ca7643c 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_hw.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_hw.c
@@ -331,7 +331,7 @@ void i40iw_process_aeq(struct i40iw_device *iwdev)
switch (info->ae_id) {
case I40IW_AE_LLP_FIN_RECEIVED:
if (qp->term_flags)
- continue;
+ break;
if (atomic_inc_return(&iwqp->close_timer_started) == 1) {
iwqp->hw_tcp_state = I40IW_TCP_STATE_CLOSE_WAIT;
if ((iwqp->hw_tcp_state == I40IW_TCP_STATE_CLOSE_WAIT) &&
@@ -360,7 +360,7 @@ void i40iw_process_aeq(struct i40iw_device *iwdev)
break;
case I40IW_AE_LLP_CONNECTION_RESET:
if (atomic_read(&iwqp->close_timer_started))
- continue;
+ break;
i40iw_cm_disconn(iwqp);
break;
case I40IW_AE_QP_SUSPEND_COMPLETE:
diff --git a/drivers/infiniband/hw/i40iw/i40iw_main.c b/drivers/infiniband/hw/i40iw/i40iw_main.c
index 9cd0d3ef9057..05001e6da1f8 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_main.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_main.c
@@ -687,7 +687,6 @@ static enum i40iw_status_code i40iw_configure_ceq_vector(struct i40iw_device *iw
struct i40iw_msix_vector *msix_vec)
{
enum i40iw_status_code status;
- cpumask_t mask;
if (iwdev->msix_shared && !ceq_id) {
tasklet_init(&iwdev->dpc_tasklet, i40iw_dpc, (unsigned long)iwdev);
@@ -697,9 +696,9 @@ static enum i40iw_status_code i40iw_configure_ceq_vector(struct i40iw_device *iw
status = request_irq(msix_vec->irq, i40iw_ceq_handler, 0, "CEQ", iwceq);
}
- cpumask_clear(&mask);
- cpumask_set_cpu(msix_vec->cpu_affinity, &mask);
- irq_set_affinity_hint(msix_vec->irq, &mask);
+ cpumask_clear(&msix_vec->mask);
+ cpumask_set_cpu(msix_vec->cpu_affinity, &msix_vec->mask);
+ irq_set_affinity_hint(msix_vec->irq, &msix_vec->mask);
if (status) {
i40iw_pr_err("ceq irq config fail\n");
diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
index 40e4f5ab2b46..68679ad4c6da 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
@@ -394,6 +394,7 @@ static struct i40iw_pbl *i40iw_get_pbl(unsigned long va,
list_for_each_entry(iwpbl, pbl_list, list) {
if (iwpbl->user_base == va) {
+ iwpbl->on_list = false;
list_del(&iwpbl->list);
return iwpbl;
}
@@ -614,6 +615,7 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd,
return ERR_PTR(-ENOMEM);
iwqp = (struct i40iw_qp *)mem;
+ iwqp->allocated_buffer = mem;
qp = &iwqp->sc_qp;
qp->back_qp = (void *)iwqp;
qp->push_idx = I40IW_INVALID_PUSH_PAGE_INDEX;
@@ -642,7 +644,6 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd,
goto error;
}
- iwqp->allocated_buffer = mem;
iwqp->iwdev = iwdev;
iwqp->iwpd = iwpd;
iwqp->ibqp.qp_num = qp_num;
@@ -1898,6 +1899,7 @@ static struct ib_mr *i40iw_reg_user_mr(struct ib_pd *pd,
goto error;
spin_lock_irqsave(&ucontext->qp_reg_mem_list_lock, flags);
list_add_tail(&iwpbl->list, &ucontext->qp_reg_mem_list);
+ iwpbl->on_list = true;
spin_unlock_irqrestore(&ucontext->qp_reg_mem_list_lock, flags);
break;
case IW_MEMREG_TYPE_CQ:
@@ -1908,6 +1910,7 @@ static struct ib_mr *i40iw_reg_user_mr(struct ib_pd *pd,
spin_lock_irqsave(&ucontext->cq_reg_mem_list_lock, flags);
list_add_tail(&iwpbl->list, &ucontext->cq_reg_mem_list);
+ iwpbl->on_list = true;
spin_unlock_irqrestore(&ucontext->cq_reg_mem_list_lock, flags);
break;
case IW_MEMREG_TYPE_MEM:
@@ -2045,14 +2048,18 @@ static void i40iw_del_memlist(struct i40iw_mr *iwmr,
switch (iwmr->type) {
case IW_MEMREG_TYPE_CQ:
spin_lock_irqsave(&ucontext->cq_reg_mem_list_lock, flags);
- if (!list_empty(&ucontext->cq_reg_mem_list))
+ if (iwpbl->on_list) {
+ iwpbl->on_list = false;
list_del(&iwpbl->list);
+ }
spin_unlock_irqrestore(&ucontext->cq_reg_mem_list_lock, flags);
break;
case IW_MEMREG_TYPE_QP:
spin_lock_irqsave(&ucontext->qp_reg_mem_list_lock, flags);
- if (!list_empty(&ucontext->qp_reg_mem_list))
+ if (iwpbl->on_list) {
+ iwpbl->on_list = false;
list_del(&iwpbl->list);
+ }
spin_unlock_irqrestore(&ucontext->qp_reg_mem_list_lock, flags);
break;
default:
diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.h b/drivers/infiniband/hw/i40iw/i40iw_verbs.h
index 9067443cd311..76cf173377ab 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_verbs.h
+++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.h
@@ -78,6 +78,7 @@ struct i40iw_pbl {
};
bool pbl_allocated;
+ bool on_list;
u64 user_base;
struct i40iw_pble_alloc pble_alloc;
struct i40iw_mr *iwmr;
diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c
index 17f4f151a97f..61d8b06375bb 100644
--- a/drivers/infiniband/hw/mlx4/mr.c
+++ b/drivers/infiniband/hw/mlx4/mr.c
@@ -346,7 +346,7 @@ int mlx4_ib_umem_calc_optimal_mtt_size(struct ib_umem *umem, u64 start_va,
/* Add to the first block the misalignment that it suffers from. */
total_len += (first_block_start & ((1ULL << block_shift) - 1ULL));
last_block_end = current_block_start + current_block_len;
- last_block_aligned_end = round_up(last_block_end, 1 << block_shift);
+ last_block_aligned_end = round_up(last_block_end, 1ULL << block_shift);
total_len += (last_block_aligned_end - last_block_end);
if (total_len & ((1ULL << block_shift) - 1ULL))
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 50af8915e7ec..199648adac74 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -673,7 +673,8 @@ static int set_qp_rss(struct mlx4_ib_dev *dev, struct mlx4_ib_rss *rss_ctx,
MLX4_IB_RX_HASH_SRC_PORT_TCP |
MLX4_IB_RX_HASH_DST_PORT_TCP |
MLX4_IB_RX_HASH_SRC_PORT_UDP |
- MLX4_IB_RX_HASH_DST_PORT_UDP)) {
+ MLX4_IB_RX_HASH_DST_PORT_UDP |
+ MLX4_IB_RX_HASH_INNER)) {
pr_debug("RX Hash fields_mask has unsupported mask (0x%llx)\n",
ucmd->rx_hash_fields_mask);
return (-EOPNOTSUPP);
diff --git a/drivers/infiniband/hw/mlx5/Kconfig b/drivers/infiniband/hw/mlx5/Kconfig
index bce263b92821..fb4d77be019b 100644
--- a/drivers/infiniband/hw/mlx5/Kconfig
+++ b/drivers/infiniband/hw/mlx5/Kconfig
@@ -1,6 +1,7 @@
config MLX5_INFINIBAND
tristate "Mellanox Connect-IB HCA support"
depends on NETDEVICES && ETHERNET && PCI && MLX5_CORE
+ depends on INFINIBAND_USER_ACCESS || INFINIBAND_USER_ACCESS=n
---help---
This driver provides low-level InfiniBand support for
Mellanox Connect-IB PCI Express host channel adapters (HCAs).
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index daa919e5a442..69716a7ea993 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -52,7 +52,6 @@
#include <linux/mlx5/port.h>
#include <linux/mlx5/vport.h>
#include <linux/mlx5/fs.h>
-#include <linux/mlx5/fs_helpers.h>
#include <linux/list.h>
#include <rdma/ib_smi.h>
#include <rdma/ib_umem.h>
@@ -180,7 +179,7 @@ static int mlx5_netdev_event(struct notifier_block *this,
if (rep_ndev == ndev)
roce->netdev = (event == NETDEV_UNREGISTER) ?
NULL : ndev;
- } else if (ndev->dev.parent == &ibdev->mdev->pdev->dev) {
+ } else if (ndev->dev.parent == &mdev->pdev->dev) {
roce->netdev = (event == NETDEV_UNREGISTER) ?
NULL : ndev;
}
@@ -2417,7 +2416,7 @@ static void set_proto(void *outer_c, void *outer_v, u8 mask, u8 val)
MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_protocol, val);
}
-static void set_flow_label(void *misc_c, void *misc_v, u8 mask, u8 val,
+static void set_flow_label(void *misc_c, void *misc_v, u32 mask, u32 val,
bool inner)
{
if (inner) {
@@ -4757,7 +4756,7 @@ mlx5_ib_get_vector_affinity(struct ib_device *ibdev, int comp_vector)
{
struct mlx5_ib_dev *dev = to_mdev(ibdev);
- return mlx5_get_vector_affinity(dev->mdev, comp_vector);
+ return mlx5_get_vector_affinity_hint(dev->mdev, comp_vector);
}
/* The mlx5_ib_multiport_mutex should be held when calling this function */
@@ -5427,9 +5426,7 @@ static void mlx5_ib_stage_cong_debugfs_cleanup(struct mlx5_ib_dev *dev)
static int mlx5_ib_stage_uar_init(struct mlx5_ib_dev *dev)
{
dev->mdev->priv.uar = mlx5_get_uars_page(dev->mdev);
- if (!dev->mdev->priv.uar)
- return -ENOMEM;
- return 0;
+ return PTR_ERR_OR_ZERO(dev->mdev->priv.uar);
}
static void mlx5_ib_stage_uar_cleanup(struct mlx5_ib_dev *dev)
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 1520a2f20f98..90a9c461cedc 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -866,25 +866,28 @@ static int mr_umem_get(struct ib_pd *pd, u64 start, u64 length,
int *order)
{
struct mlx5_ib_dev *dev = to_mdev(pd->device);
+ struct ib_umem *u;
int err;
- *umem = ib_umem_get(pd->uobject->context, start, length,
- access_flags, 0);
- err = PTR_ERR_OR_ZERO(*umem);
+ *umem = NULL;
+
+ u = ib_umem_get(pd->uobject->context, start, length, access_flags, 0);
+ err = PTR_ERR_OR_ZERO(u);
if (err) {
- *umem = NULL;
- mlx5_ib_err(dev, "umem get failed (%d)\n", err);
+ mlx5_ib_dbg(dev, "umem get failed (%d)\n", err);
return err;
}
- mlx5_ib_cont_pages(*umem, start, MLX5_MKEY_PAGE_SHIFT_MASK, npages,
+ mlx5_ib_cont_pages(u, start, MLX5_MKEY_PAGE_SHIFT_MASK, npages,
page_shift, ncont, order);
if (!*npages) {
mlx5_ib_warn(dev, "avoid zero region\n");
- ib_umem_release(*umem);
+ ib_umem_release(u);
return -EINVAL;
}
+ *umem = u;
+
mlx5_ib_dbg(dev, "npages %d, ncont %d, order %d, page_shift %d\n",
*npages, *ncont, *order, *page_shift);
@@ -1458,13 +1461,12 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
int access_flags = flags & IB_MR_REREG_ACCESS ?
new_access_flags :
mr->access_flags;
- u64 addr = (flags & IB_MR_REREG_TRANS) ? virt_addr : mr->umem->address;
- u64 len = (flags & IB_MR_REREG_TRANS) ? length : mr->umem->length;
int page_shift = 0;
int upd_flags = 0;
int npages = 0;
int ncont = 0;
int order = 0;
+ u64 addr, len;
int err;
mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n",
@@ -1472,6 +1474,17 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
atomic_sub(mr->npages, &dev->mdev->priv.reg_pages);
+ if (!mr->umem)
+ return -EINVAL;
+
+ if (flags & IB_MR_REREG_TRANS) {
+ addr = virt_addr;
+ len = length;
+ } else {
+ addr = mr->umem->address;
+ len = mr->umem->length;
+ }
+
if (flags != IB_MR_REREG_PD) {
/*
* Replace umem. This needs to be done whether or not UMR is
@@ -1479,6 +1492,7 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
*/
flags |= IB_MR_REREG_TRANS;
ib_umem_release(mr->umem);
+ mr->umem = NULL;
err = mr_umem_get(pd, addr, len, access_flags, &mr->umem,
&npages, &page_shift, &ncont, &order);
if (err)
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index 7ed4b70f6447..2193dc1765fb 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -259,7 +259,11 @@ static int set_rq_size(struct mlx5_ib_dev *dev, struct ib_qp_cap *cap,
} else {
if (ucmd) {
qp->rq.wqe_cnt = ucmd->rq_wqe_count;
+ if (ucmd->rq_wqe_shift > BITS_PER_BYTE * sizeof(ucmd->rq_wqe_shift))
+ return -EINVAL;
qp->rq.wqe_shift = ucmd->rq_wqe_shift;
+ if ((1 << qp->rq.wqe_shift) / sizeof(struct mlx5_wqe_data_seg) < qp->wq_sig)
+ return -EINVAL;
qp->rq.max_gs = (1 << qp->rq.wqe_shift) / sizeof(struct mlx5_wqe_data_seg) - qp->wq_sig;
qp->rq.max_post = qp->rq.wqe_cnt;
} else {
@@ -480,11 +484,6 @@ static int qp_has_rq(struct ib_qp_init_attr *attr)
return 1;
}
-static int first_med_bfreg(void)
-{
- return 1;
-}
-
enum {
/* this is the first blue flame register in the array of bfregs assigned
* to a processes. Since we do not use it for blue flame but rather
@@ -510,6 +509,12 @@ static int num_med_bfreg(struct mlx5_ib_dev *dev,
return n >= 0 ? n : 0;
}
+static int first_med_bfreg(struct mlx5_ib_dev *dev,
+ struct mlx5_bfreg_info *bfregi)
+{
+ return num_med_bfreg(dev, bfregi) ? 1 : -ENOMEM;
+}
+
static int first_hi_bfreg(struct mlx5_ib_dev *dev,
struct mlx5_bfreg_info *bfregi)
{
@@ -537,10 +542,13 @@ static int alloc_high_class_bfreg(struct mlx5_ib_dev *dev,
static int alloc_med_class_bfreg(struct mlx5_ib_dev *dev,
struct mlx5_bfreg_info *bfregi)
{
- int minidx = first_med_bfreg();
+ int minidx = first_med_bfreg(dev, bfregi);
int i;
- for (i = first_med_bfreg(); i < first_hi_bfreg(dev, bfregi); i++) {
+ if (minidx < 0)
+ return minidx;
+
+ for (i = minidx; i < first_hi_bfreg(dev, bfregi); i++) {
if (bfregi->count[i] < bfregi->count[minidx])
minidx = i;
if (!bfregi->count[minidx])
@@ -2451,18 +2459,18 @@ enum {
static int ib_rate_to_mlx5(struct mlx5_ib_dev *dev, u8 rate)
{
- if (rate == IB_RATE_PORT_CURRENT) {
+ if (rate == IB_RATE_PORT_CURRENT)
return 0;
- } else if (rate < IB_RATE_2_5_GBPS || rate > IB_RATE_300_GBPS) {
+
+ if (rate < IB_RATE_2_5_GBPS || rate > IB_RATE_300_GBPS)
return -EINVAL;
- } else {
- while (rate != IB_RATE_2_5_GBPS &&
- !(1 << (rate + MLX5_STAT_RATE_OFFSET) &
- MLX5_CAP_GEN(dev->mdev, stat_rate_support)))
- --rate;
- }
- return rate + MLX5_STAT_RATE_OFFSET;
+ while (rate != IB_RATE_PORT_CURRENT &&
+ !(1 << (rate + MLX5_STAT_RATE_OFFSET) &
+ MLX5_CAP_GEN(dev->mdev, stat_rate_support)))
+ --rate;
+
+ return rate ? rate + MLX5_STAT_RATE_OFFSET : rate;
}
static int modify_raw_packet_eth_prio(struct mlx5_core_dev *dev,
diff --git a/drivers/infiniband/hw/nes/nes_nic.c b/drivers/infiniband/hw/nes/nes_nic.c
index 0a75164cedea..007d5e8a0121 100644
--- a/drivers/infiniband/hw/nes/nes_nic.c
+++ b/drivers/infiniband/hw/nes/nes_nic.c
@@ -461,7 +461,7 @@ static bool nes_nic_send(struct sk_buff *skb, struct net_device *netdev)
/**
* nes_netdev_start_xmit
*/
-static int nes_netdev_start_xmit(struct sk_buff *skb, struct net_device *netdev)
+static netdev_tx_t nes_netdev_start_xmit(struct sk_buff *skb, struct net_device *netdev)
{
struct nes_vnic *nesvnic = netdev_priv(netdev);
struct nes_device *nesdev = nesvnic->nesdev;
diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c
index 7d3763b2e01c..3f9afc02d166 100644
--- a/drivers/infiniband/hw/qedr/verbs.c
+++ b/drivers/infiniband/hw/qedr/verbs.c
@@ -401,49 +401,47 @@ int qedr_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
{
struct qedr_ucontext *ucontext = get_qedr_ucontext(context);
struct qedr_dev *dev = get_qedr_dev(context->device);
- unsigned long vm_page = vma->vm_pgoff << PAGE_SHIFT;
- u64 unmapped_db = dev->db_phys_addr;
+ unsigned long phys_addr = vma->vm_pgoff << PAGE_SHIFT;
unsigned long len = (vma->vm_end - vma->vm_start);
- int rc = 0;
- bool found;
+ unsigned long dpi_start;
+
+ dpi_start = dev->db_phys_addr + (ucontext->dpi * ucontext->dpi_size);
DP_DEBUG(dev, QEDR_MSG_INIT,
- "qedr_mmap called vm_page=0x%lx vm_pgoff=0x%lx unmapped_db=0x%llx db_size=%x, len=%lx\n",
- vm_page, vma->vm_pgoff, unmapped_db, dev->db_size, len);
- if (vma->vm_start & (PAGE_SIZE - 1)) {
- DP_ERR(dev, "Vma_start not page aligned = %ld\n",
- vma->vm_start);
+ "mmap invoked with vm_start=0x%pK, vm_end=0x%pK,vm_pgoff=0x%pK; dpi_start=0x%pK dpi_size=0x%x\n",
+ (void *)vma->vm_start, (void *)vma->vm_end,
+ (void *)vma->vm_pgoff, (void *)dpi_start, ucontext->dpi_size);
+
+ if ((vma->vm_start & (PAGE_SIZE - 1)) || (len & (PAGE_SIZE - 1))) {
+ DP_ERR(dev,
+ "failed mmap, adrresses must be page aligned: start=0x%pK, end=0x%pK\n",
+ (void *)vma->vm_start, (void *)vma->vm_end);
return -EINVAL;
}
- found = qedr_search_mmap(ucontext, vm_page, len);
- if (!found) {
- DP_ERR(dev, "Vma_pgoff not found in mapped array = %ld\n",
+ if (!qedr_search_mmap(ucontext, phys_addr, len)) {
+ DP_ERR(dev, "failed mmap, vm_pgoff=0x%lx is not authorized\n",
vma->vm_pgoff);
return -EINVAL;
}
- DP_DEBUG(dev, QEDR_MSG_INIT, "Mapping doorbell bar\n");
-
- if ((vm_page >= unmapped_db) && (vm_page <= (unmapped_db +
- dev->db_size))) {
- DP_DEBUG(dev, QEDR_MSG_INIT, "Mapping doorbell bar\n");
- if (vma->vm_flags & VM_READ) {
- DP_ERR(dev, "Trying to map doorbell bar for read\n");
- return -EPERM;
- }
-
- vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
+ if (phys_addr < dpi_start ||
+ ((phys_addr + len) > (dpi_start + ucontext->dpi_size))) {
+ DP_ERR(dev,
+ "failed mmap, pages are outside of dpi; page address=0x%pK, dpi_start=0x%pK, dpi_size=0x%x\n",
+ (void *)phys_addr, (void *)dpi_start,
+ ucontext->dpi_size);
+ return -EINVAL;
+ }
- rc = io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
- PAGE_SIZE, vma->vm_page_prot);
- } else {
- DP_DEBUG(dev, QEDR_MSG_INIT, "Mapping chains\n");
- rc = remap_pfn_range(vma, vma->vm_start,
- vma->vm_pgoff, len, vma->vm_page_prot);
+ if (vma->vm_flags & VM_READ) {
+ DP_ERR(dev, "failed mmap, cannot map doorbell bar for read\n");
+ return -EINVAL;
}
- DP_DEBUG(dev, QEDR_MSG_INIT, "qedr_mmap return code: %d\n", rc);
- return rc;
+
+ vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
+ return io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, len,
+ vma->vm_page_prot);
}
struct ib_pd *qedr_alloc_pd(struct ib_device *ibdev,
diff --git a/drivers/infiniband/sw/rxe/rxe_opcode.c b/drivers/infiniband/sw/rxe/rxe_opcode.c
index 61927c165b59..4cf11063e0b5 100644
--- a/drivers/infiniband/sw/rxe/rxe_opcode.c
+++ b/drivers/infiniband/sw/rxe/rxe_opcode.c
@@ -390,7 +390,7 @@ struct rxe_opcode_info rxe_opcode[RXE_NUM_OPCODE] = {
.name = "IB_OPCODE_RC_SEND_ONLY_INV",
.mask = RXE_IETH_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK
| RXE_COMP_MASK | RXE_RWR_MASK | RXE_SEND_MASK
- | RXE_END_MASK,
+ | RXE_END_MASK | RXE_START_MASK,
.length = RXE_BTH_BYTES + RXE_IETH_BYTES,
.offset = {
[RXE_BTH] = 0,
diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c
index 7bdaf71b8221..785199990457 100644
--- a/drivers/infiniband/sw/rxe/rxe_req.c
+++ b/drivers/infiniband/sw/rxe/rxe_req.c
@@ -728,7 +728,6 @@ next_wqe:
rollback_state(wqe, qp, &rollback_wqe, rollback_psn);
if (ret == -EAGAIN) {
- kfree_skb(skb);
rxe_run_task(&qp->req.task, 1);
goto exit;
}
diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c
index a65c9969f7fc..955ff3b6da9c 100644
--- a/drivers/infiniband/sw/rxe/rxe_resp.c
+++ b/drivers/infiniband/sw/rxe/rxe_resp.c
@@ -742,7 +742,6 @@ static enum resp_states read_reply(struct rxe_qp *qp,
err = rxe_xmit_packet(rxe, qp, &ack_pkt, skb);
if (err) {
pr_err("Failed sending RDMA reply.\n");
- kfree_skb(skb);
return RESPST_ERR_RNR;
}
@@ -954,10 +953,8 @@ static int send_ack(struct rxe_qp *qp, struct rxe_pkt_info *pkt,
}
err = rxe_xmit_packet(rxe, qp, &ack_pkt, skb);
- if (err) {
+ if (err)
pr_err_ratelimited("Failed sending ack\n");
- kfree_skb(skb);
- }
err1:
return err;
@@ -1141,7 +1138,6 @@ static enum resp_states duplicate_request(struct rxe_qp *qp,
if (rc) {
pr_err("Failed resending result. This flow is not handled - skb ignored\n");
rxe_drop_ref(qp);
- kfree_skb(skb_copy);
rc = RESPST_CLEANUP;
goto out;
}
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c
index 2cb52fd48cf1..73a00a1c06f6 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.c
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.c
@@ -761,7 +761,6 @@ static int rxe_post_send_kernel(struct rxe_qp *qp, struct ib_send_wr *wr,
unsigned int mask;
unsigned int length = 0;
int i;
- int must_sched;
while (wr) {
mask = wr_opcode_mask(wr->opcode, qp);
@@ -791,14 +790,7 @@ static int rxe_post_send_kernel(struct rxe_qp *qp, struct ib_send_wr *wr,
wr = wr->next;
}
- /*
- * Must sched in case of GSI QP because ib_send_mad() hold irq lock,
- * and the requester call ip_local_out_sk() that takes spin_lock_bh.
- */
- must_sched = (qp_type(qp) == IB_QPT_GSI) ||
- (queue_count(qp->sq.queue) > 1);
-
- rxe_run_task(&qp->req.task, must_sched);
+ rxe_run_task(&qp->req.task, 1);
if (unlikely(qp->req.state == QP_STATE_ERROR))
rxe_run_task(&qp->comp.task, 1);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 161ba8c76285..cf291f90b58f 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -1094,7 +1094,7 @@ drop_and_unlock:
spin_unlock_irqrestore(&priv->lock, flags);
}
-static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct rdma_netdev *rn = netdev_priv(dev);
diff --git a/drivers/infiniband/ulp/srp/Kconfig b/drivers/infiniband/ulp/srp/Kconfig
index c74ee9633041..99db8fe5173a 100644
--- a/drivers/infiniband/ulp/srp/Kconfig
+++ b/drivers/infiniband/ulp/srp/Kconfig
@@ -1,6 +1,6 @@
config INFINIBAND_SRP
tristate "InfiniBand SCSI RDMA Protocol"
- depends on SCSI
+ depends on SCSI && INFINIBAND_ADDR_TRANS
select SCSI_SRP_ATTRS
---help---
Support for the SCSI RDMA Protocol over InfiniBand. This
diff --git a/drivers/infiniband/ulp/srpt/Kconfig b/drivers/infiniband/ulp/srpt/Kconfig
index 31ee83d528d9..fb8b7182f05e 100644
--- a/drivers/infiniband/ulp/srpt/Kconfig
+++ b/drivers/infiniband/ulp/srpt/Kconfig
@@ -1,6 +1,6 @@
config INFINIBAND_SRPT
tristate "InfiniBand SCSI RDMA Protocol target support"
- depends on INFINIBAND && TARGET_CORE
+ depends on INFINIBAND && INFINIBAND_ADDR_TRANS && TARGET_CORE
---help---
Support for the SCSI RDMA Protocol (SRP) Target driver. The
diff --git a/drivers/input/input-leds.c b/drivers/input/input-leds.c
index 766bf2660116..5f04b2d94635 100644
--- a/drivers/input/input-leds.c
+++ b/drivers/input/input-leds.c
@@ -88,6 +88,7 @@ static int input_leds_connect(struct input_handler *handler,
const struct input_device_id *id)
{
struct input_leds *leds;
+ struct input_led *led;
unsigned int num_leds;
unsigned int led_code;
int led_no;
@@ -119,14 +120,13 @@ static int input_leds_connect(struct input_handler *handler,
led_no = 0;
for_each_set_bit(led_code, dev->ledbit, LED_CNT) {
- struct input_led *led = &leds->leds[led_no];
+ if (!input_led_info[led_code].name)
+ continue;
+ led = &leds->leds[led_no];
led->handle = &leds->handle;
led->code = led_code;
- if (!input_led_info[led_code].name)
- continue;
-
led->cdev.name = kasprintf(GFP_KERNEL, "%s::%s",
dev_name(&dev->dev),
input_led_info[led_code].name);
diff --git a/drivers/input/misc/hp_sdc_rtc.c b/drivers/input/misc/hp_sdc_rtc.c
index 49b34de0aed4..47eb8ca729fe 100644
--- a/drivers/input/misc/hp_sdc_rtc.c
+++ b/drivers/input/misc/hp_sdc_rtc.c
@@ -509,18 +509,6 @@ static int hp_sdc_rtc_proc_show(struct seq_file *m, void *v)
#undef NY
}
-static int hp_sdc_rtc_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, hp_sdc_rtc_proc_show, NULL);
-}
-
-static const struct file_operations hp_sdc_rtc_proc_fops = {
- .open = hp_sdc_rtc_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
static int hp_sdc_rtc_ioctl(struct file *file,
unsigned int cmd, unsigned long arg)
{
@@ -713,7 +701,7 @@ static int __init hp_sdc_rtc_init(void)
if (misc_register(&hp_sdc_rtc_dev) != 0)
printk(KERN_INFO "Could not register misc. dev for i8042 rtc\n");
- proc_create("driver/rtc", 0, NULL, &hp_sdc_rtc_proc_fops);
+ proc_create_single("driver/rtc", 0, NULL, hp_sdc_rtc_proc_show);
printk(KERN_INFO "HP i8042 SDC + MSM-58321 RTC support loaded "
"(RTC v " RTC_VERSION ")\n");
diff --git a/drivers/input/mouse/alps.c b/drivers/input/mouse/alps.c
index 0a67f235ba88..38f9501acdf0 100644
--- a/drivers/input/mouse/alps.c
+++ b/drivers/input/mouse/alps.c
@@ -583,7 +583,7 @@ static void alps_process_trackstick_packet_v3(struct psmouse *psmouse)
x = (s8)(((packet[0] & 0x20) << 2) | (packet[1] & 0x7f));
y = (s8)(((packet[0] & 0x10) << 3) | (packet[2] & 0x7f));
- z = packet[4] & 0x7c;
+ z = packet[4] & 0x7f;
/*
* The x and y values tend to be quite large, and when used
diff --git a/drivers/input/mouse/elan_i2c_smbus.c b/drivers/input/mouse/elan_i2c_smbus.c
index 29f99529b187..cfcb32559925 100644
--- a/drivers/input/mouse/elan_i2c_smbus.c
+++ b/drivers/input/mouse/elan_i2c_smbus.c
@@ -130,7 +130,7 @@ static int elan_smbus_get_baseline_data(struct i2c_client *client,
bool max_baseline, u8 *value)
{
int error;
- u8 val[3];
+ u8 val[I2C_SMBUS_BLOCK_MAX] = {0};
error = i2c_smbus_read_block_data(client,
max_baseline ?
@@ -149,7 +149,7 @@ static int elan_smbus_get_version(struct i2c_client *client,
bool iap, u8 *version)
{
int error;
- u8 val[3];
+ u8 val[I2C_SMBUS_BLOCK_MAX] = {0};
error = i2c_smbus_read_block_data(client,
iap ? ETP_SMBUS_IAP_VERSION_CMD :
@@ -170,7 +170,7 @@ static int elan_smbus_get_sm_version(struct i2c_client *client,
u8 *clickpad)
{
int error;
- u8 val[3];
+ u8 val[I2C_SMBUS_BLOCK_MAX] = {0};
error = i2c_smbus_read_block_data(client,
ETP_SMBUS_SM_VERSION_CMD, val);
@@ -188,7 +188,7 @@ static int elan_smbus_get_sm_version(struct i2c_client *client,
static int elan_smbus_get_product_id(struct i2c_client *client, u16 *id)
{
int error;
- u8 val[3];
+ u8 val[I2C_SMBUS_BLOCK_MAX] = {0};
error = i2c_smbus_read_block_data(client,
ETP_SMBUS_UNIQUEID_CMD, val);
@@ -205,7 +205,7 @@ static int elan_smbus_get_checksum(struct i2c_client *client,
bool iap, u16 *csum)
{
int error;
- u8 val[3];
+ u8 val[I2C_SMBUS_BLOCK_MAX] = {0};
error = i2c_smbus_read_block_data(client,
iap ? ETP_SMBUS_FW_CHECKSUM_CMD :
@@ -226,7 +226,7 @@ static int elan_smbus_get_max(struct i2c_client *client,
{
int ret;
int error;
- u8 val[3];
+ u8 val[I2C_SMBUS_BLOCK_MAX] = {0};
ret = i2c_smbus_read_block_data(client, ETP_SMBUS_RANGE_CMD, val);
if (ret != 3) {
@@ -246,7 +246,7 @@ static int elan_smbus_get_resolution(struct i2c_client *client,
{
int ret;
int error;
- u8 val[3];
+ u8 val[I2C_SMBUS_BLOCK_MAX] = {0};
ret = i2c_smbus_read_block_data(client, ETP_SMBUS_RESOLUTION_CMD, val);
if (ret != 3) {
@@ -267,7 +267,7 @@ static int elan_smbus_get_num_traces(struct i2c_client *client,
{
int ret;
int error;
- u8 val[3];
+ u8 val[I2C_SMBUS_BLOCK_MAX] = {0};
ret = i2c_smbus_read_block_data(client, ETP_SMBUS_XY_TRACENUM_CMD, val);
if (ret != 3) {
@@ -294,7 +294,7 @@ static int elan_smbus_iap_get_mode(struct i2c_client *client,
{
int error;
u16 constant;
- u8 val[3];
+ u8 val[I2C_SMBUS_BLOCK_MAX] = {0};
error = i2c_smbus_read_block_data(client, ETP_SMBUS_IAP_CTRL_CMD, val);
if (error < 0) {
@@ -345,7 +345,7 @@ static int elan_smbus_prepare_fw_update(struct i2c_client *client)
int len;
int error;
enum tp_mode mode;
- u8 val[3];
+ u8 val[I2C_SMBUS_BLOCK_MAX] = {0};
u8 cmd[4] = {0x0F, 0x78, 0x00, 0x06};
u16 password;
@@ -419,7 +419,7 @@ static int elan_smbus_write_fw_block(struct i2c_client *client,
struct device *dev = &client->dev;
int error;
u16 result;
- u8 val[3];
+ u8 val[I2C_SMBUS_BLOCK_MAX] = {0};
/*
* Due to the limitation of smbus protocol limiting
diff --git a/drivers/input/mouse/synaptics.c b/drivers/input/mouse/synaptics.c
index 60f2c463d1cc..a9591d278145 100644
--- a/drivers/input/mouse/synaptics.c
+++ b/drivers/input/mouse/synaptics.c
@@ -172,6 +172,12 @@ static const char * const smbus_pnp_ids[] = {
"LEN0048", /* X1 Carbon 3 */
"LEN0046", /* X250 */
"LEN004a", /* W541 */
+ "LEN0071", /* T480 */
+ "LEN0072", /* X1 Carbon Gen 5 (2017) - Elan/ALPS trackpoint */
+ "LEN0073", /* X1 Carbon G5 (Elantech) */
+ "LEN0092", /* X1 Carbon 6 */
+ "LEN0096", /* X280 */
+ "LEN0097", /* X280 -> ALPS trackpoint */
"LEN200f", /* T450s */
NULL
};
diff --git a/drivers/input/rmi4/rmi_spi.c b/drivers/input/rmi4/rmi_spi.c
index 76edbf2c1bce..082defc329a8 100644
--- a/drivers/input/rmi4/rmi_spi.c
+++ b/drivers/input/rmi4/rmi_spi.c
@@ -147,8 +147,11 @@ static int rmi_spi_xfer(struct rmi_spi_xport *rmi_spi,
if (len > RMI_SPI_XFER_SIZE_LIMIT)
return -EINVAL;
- if (rmi_spi->xfer_buf_size < len)
- rmi_spi_manage_pools(rmi_spi, len);
+ if (rmi_spi->xfer_buf_size < len) {
+ ret = rmi_spi_manage_pools(rmi_spi, len);
+ if (ret < 0)
+ return ret;
+ }
if (addr == 0)
/*
diff --git a/drivers/input/touchscreen/Kconfig b/drivers/input/touchscreen/Kconfig
index 4f15496fec8b..3e613afa10b4 100644
--- a/drivers/input/touchscreen/Kconfig
+++ b/drivers/input/touchscreen/Kconfig
@@ -362,7 +362,7 @@ config TOUCHSCREEN_HIDEEP
If unsure, say N.
- To compile this driver as a moudle, choose M here : the
+ To compile this driver as a module, choose M here : the
module will be called hideep_ts.
config TOUCHSCREEN_ILI210X
diff --git a/drivers/input/touchscreen/atmel_mxt_ts.c b/drivers/input/touchscreen/atmel_mxt_ts.c
index 5d9699fe1b55..09194721aed2 100644
--- a/drivers/input/touchscreen/atmel_mxt_ts.c
+++ b/drivers/input/touchscreen/atmel_mxt_ts.c
@@ -280,7 +280,8 @@ struct mxt_data {
struct input_dev *input_dev;
char phys[64]; /* device physical location */
struct mxt_object *object_table;
- struct mxt_info info;
+ struct mxt_info *info;
+ void *raw_info_block;
unsigned int irq;
unsigned int max_x;
unsigned int max_y;
@@ -460,12 +461,13 @@ static int mxt_lookup_bootloader_address(struct mxt_data *data, bool retry)
{
u8 appmode = data->client->addr;
u8 bootloader;
+ u8 family_id = data->info ? data->info->family_id : 0;
switch (appmode) {
case 0x4a:
case 0x4b:
/* Chips after 1664S use different scheme */
- if (retry || data->info.family_id >= 0xa2) {
+ if (retry || family_id >= 0xa2) {
bootloader = appmode - 0x24;
break;
}
@@ -692,7 +694,7 @@ mxt_get_object(struct mxt_data *data, u8 type)
struct mxt_object *object;
int i;
- for (i = 0; i < data->info.object_num; i++) {
+ for (i = 0; i < data->info->object_num; i++) {
object = data->object_table + i;
if (object->type == type)
return object;
@@ -1462,12 +1464,12 @@ static int mxt_update_cfg(struct mxt_data *data, const struct firmware *cfg)
data_pos += offset;
}
- if (cfg_info.family_id != data->info.family_id) {
+ if (cfg_info.family_id != data->info->family_id) {
dev_err(dev, "Family ID mismatch!\n");
return -EINVAL;
}
- if (cfg_info.variant_id != data->info.variant_id) {
+ if (cfg_info.variant_id != data->info->variant_id) {
dev_err(dev, "Variant ID mismatch!\n");
return -EINVAL;
}
@@ -1512,7 +1514,7 @@ static int mxt_update_cfg(struct mxt_data *data, const struct firmware *cfg)
/* Malloc memory to store configuration */
cfg_start_ofs = MXT_OBJECT_START +
- data->info.object_num * sizeof(struct mxt_object) +
+ data->info->object_num * sizeof(struct mxt_object) +
MXT_INFO_CHECKSUM_SIZE;
config_mem_size = data->mem_size - cfg_start_ofs;
config_mem = kzalloc(config_mem_size, GFP_KERNEL);
@@ -1563,20 +1565,6 @@ release_mem:
return ret;
}
-static int mxt_get_info(struct mxt_data *data)
-{
- struct i2c_client *client = data->client;
- struct mxt_info *info = &data->info;
- int error;
-
- /* Read 7-byte info block starting at address 0 */
- error = __mxt_read_reg(client, 0, sizeof(*info), info);
- if (error)
- return error;
-
- return 0;
-}
-
static void mxt_free_input_device(struct mxt_data *data)
{
if (data->input_dev) {
@@ -1591,9 +1579,10 @@ static void mxt_free_object_table(struct mxt_data *data)
video_unregister_device(&data->dbg.vdev);
v4l2_device_unregister(&data->dbg.v4l2);
#endif
-
- kfree(data->object_table);
data->object_table = NULL;
+ data->info = NULL;
+ kfree(data->raw_info_block);
+ data->raw_info_block = NULL;
kfree(data->msg_buf);
data->msg_buf = NULL;
data->T5_address = 0;
@@ -1609,34 +1598,18 @@ static void mxt_free_object_table(struct mxt_data *data)
data->max_reportid = 0;
}
-static int mxt_get_object_table(struct mxt_data *data)
+static int mxt_parse_object_table(struct mxt_data *data,
+ struct mxt_object *object_table)
{
struct i2c_client *client = data->client;
- size_t table_size;
- struct mxt_object *object_table;
- int error;
int i;
u8 reportid;
u16 end_address;
- table_size = data->info.object_num * sizeof(struct mxt_object);
- object_table = kzalloc(table_size, GFP_KERNEL);
- if (!object_table) {
- dev_err(&data->client->dev, "Failed to allocate memory\n");
- return -ENOMEM;
- }
-
- error = __mxt_read_reg(client, MXT_OBJECT_START, table_size,
- object_table);
- if (error) {
- kfree(object_table);
- return error;
- }
-
/* Valid Report IDs start counting from 1 */
reportid = 1;
data->mem_size = 0;
- for (i = 0; i < data->info.object_num; i++) {
+ for (i = 0; i < data->info->object_num; i++) {
struct mxt_object *object = object_table + i;
u8 min_id, max_id;
@@ -1660,8 +1633,8 @@ static int mxt_get_object_table(struct mxt_data *data)
switch (object->type) {
case MXT_GEN_MESSAGE_T5:
- if (data->info.family_id == 0x80 &&
- data->info.version < 0x20) {
+ if (data->info->family_id == 0x80 &&
+ data->info->version < 0x20) {
/*
* On mXT224 firmware versions prior to V2.0
* read and discard unused CRC byte otherwise
@@ -1716,24 +1689,102 @@ static int mxt_get_object_table(struct mxt_data *data)
/* If T44 exists, T5 position has to be directly after */
if (data->T44_address && (data->T5_address != data->T44_address + 1)) {
dev_err(&client->dev, "Invalid T44 position\n");
- error = -EINVAL;
- goto free_object_table;
+ return -EINVAL;
}
data->msg_buf = kcalloc(data->max_reportid,
data->T5_msg_size, GFP_KERNEL);
- if (!data->msg_buf) {
- dev_err(&client->dev, "Failed to allocate message buffer\n");
+ if (!data->msg_buf)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static int mxt_read_info_block(struct mxt_data *data)
+{
+ struct i2c_client *client = data->client;
+ int error;
+ size_t size;
+ void *id_buf, *buf;
+ uint8_t num_objects;
+ u32 calculated_crc;
+ u8 *crc_ptr;
+
+ /* If info block already allocated, free it */
+ if (data->raw_info_block)
+ mxt_free_object_table(data);
+
+ /* Read 7-byte ID information block starting at address 0 */
+ size = sizeof(struct mxt_info);
+ id_buf = kzalloc(size, GFP_KERNEL);
+ if (!id_buf)
+ return -ENOMEM;
+
+ error = __mxt_read_reg(client, 0, size, id_buf);
+ if (error)
+ goto err_free_mem;
+
+ /* Resize buffer to give space for rest of info block */
+ num_objects = ((struct mxt_info *)id_buf)->object_num;
+ size += (num_objects * sizeof(struct mxt_object))
+ + MXT_INFO_CHECKSUM_SIZE;
+
+ buf = krealloc(id_buf, size, GFP_KERNEL);
+ if (!buf) {
error = -ENOMEM;
- goto free_object_table;
+ goto err_free_mem;
+ }
+ id_buf = buf;
+
+ /* Read rest of info block */
+ error = __mxt_read_reg(client, MXT_OBJECT_START,
+ size - MXT_OBJECT_START,
+ id_buf + MXT_OBJECT_START);
+ if (error)
+ goto err_free_mem;
+
+ /* Extract & calculate checksum */
+ crc_ptr = id_buf + size - MXT_INFO_CHECKSUM_SIZE;
+ data->info_crc = crc_ptr[0] | (crc_ptr[1] << 8) | (crc_ptr[2] << 16);
+
+ calculated_crc = mxt_calculate_crc(id_buf, 0,
+ size - MXT_INFO_CHECKSUM_SIZE);
+
+ /*
+ * CRC mismatch can be caused by data corruption due to I2C comms
+ * issue or else device is not using Object Based Protocol (eg i2c-hid)
+ */
+ if ((data->info_crc == 0) || (data->info_crc != calculated_crc)) {
+ dev_err(&client->dev,
+ "Info Block CRC error calculated=0x%06X read=0x%06X\n",
+ calculated_crc, data->info_crc);
+ error = -EIO;
+ goto err_free_mem;
+ }
+
+ data->raw_info_block = id_buf;
+ data->info = (struct mxt_info *)id_buf;
+
+ dev_info(&client->dev,
+ "Family: %u Variant: %u Firmware V%u.%u.%02X Objects: %u\n",
+ data->info->family_id, data->info->variant_id,
+ data->info->version >> 4, data->info->version & 0xf,
+ data->info->build, data->info->object_num);
+
+ /* Parse object table information */
+ error = mxt_parse_object_table(data, id_buf + MXT_OBJECT_START);
+ if (error) {
+ dev_err(&client->dev, "Error %d parsing object table\n", error);
+ mxt_free_object_table(data);
+ goto err_free_mem;
}
- data->object_table = object_table;
+ data->object_table = (struct mxt_object *)(id_buf + MXT_OBJECT_START);
return 0;
-free_object_table:
- mxt_free_object_table(data);
+err_free_mem:
+ kfree(id_buf);
return error;
}
@@ -2046,7 +2097,7 @@ static int mxt_initialize(struct mxt_data *data)
int error;
while (1) {
- error = mxt_get_info(data);
+ error = mxt_read_info_block(data);
if (!error)
break;
@@ -2077,16 +2128,9 @@ static int mxt_initialize(struct mxt_data *data)
msleep(MXT_FW_RESET_TIME);
}
- /* Get object table information */
- error = mxt_get_object_table(data);
- if (error) {
- dev_err(&client->dev, "Error %d reading object table\n", error);
- return error;
- }
-
error = mxt_acquire_irq(data);
if (error)
- goto err_free_object_table;
+ return error;
error = request_firmware_nowait(THIS_MODULE, true, MXT_CFG_NAME,
&client->dev, GFP_KERNEL, data,
@@ -2094,14 +2138,10 @@ static int mxt_initialize(struct mxt_data *data)
if (error) {
dev_err(&client->dev, "Failed to invoke firmware loader: %d\n",
error);
- goto err_free_object_table;
+ return error;
}
return 0;
-
-err_free_object_table:
- mxt_free_object_table(data);
- return error;
}
static int mxt_set_t7_power_cfg(struct mxt_data *data, u8 sleep)
@@ -2162,7 +2202,7 @@ recheck:
static u16 mxt_get_debug_value(struct mxt_data *data, unsigned int x,
unsigned int y)
{
- struct mxt_info *info = &data->info;
+ struct mxt_info *info = data->info;
struct mxt_dbg *dbg = &data->dbg;
unsigned int ofs, page;
unsigned int col = 0;
@@ -2490,7 +2530,7 @@ static const struct video_device mxt_video_device = {
static void mxt_debug_init(struct mxt_data *data)
{
- struct mxt_info *info = &data->info;
+ struct mxt_info *info = data->info;
struct mxt_dbg *dbg = &data->dbg;
struct mxt_object *object;
int error;
@@ -2576,7 +2616,6 @@ static int mxt_configure_objects(struct mxt_data *data,
const struct firmware *cfg)
{
struct device *dev = &data->client->dev;
- struct mxt_info *info = &data->info;
int error;
error = mxt_init_t7_power_cfg(data);
@@ -2601,11 +2640,6 @@ static int mxt_configure_objects(struct mxt_data *data,
mxt_debug_init(data);
- dev_info(dev,
- "Family: %u Variant: %u Firmware V%u.%u.%02X Objects: %u\n",
- info->family_id, info->variant_id, info->version >> 4,
- info->version & 0xf, info->build, info->object_num);
-
return 0;
}
@@ -2614,7 +2648,7 @@ static ssize_t mxt_fw_version_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct mxt_data *data = dev_get_drvdata(dev);
- struct mxt_info *info = &data->info;
+ struct mxt_info *info = data->info;
return scnprintf(buf, PAGE_SIZE, "%u.%u.%02X\n",
info->version >> 4, info->version & 0xf, info->build);
}
@@ -2624,7 +2658,7 @@ static ssize_t mxt_hw_version_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct mxt_data *data = dev_get_drvdata(dev);
- struct mxt_info *info = &data->info;
+ struct mxt_info *info = data->info;
return scnprintf(buf, PAGE_SIZE, "%u.%u\n",
info->family_id, info->variant_id);
}
@@ -2663,7 +2697,7 @@ static ssize_t mxt_object_show(struct device *dev,
return -ENOMEM;
error = 0;
- for (i = 0; i < data->info.object_num; i++) {
+ for (i = 0; i < data->info->object_num; i++) {
object = data->object_table + i;
if (!mxt_object_readable(object->type))
@@ -3035,6 +3069,15 @@ static const struct dmi_system_id mxt_dmi_table[] = {
.driver_data = samus_platform_data,
},
{
+ /* Samsung Chromebook Pro */
+ .ident = "Samsung Chromebook Pro",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Google"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "Caroline"),
+ },
+ .driver_data = samus_platform_data,
+ },
+ {
/* Other Google Chromebooks */
.ident = "Chromebook",
.matches = {
@@ -3254,6 +3297,11 @@ static SIMPLE_DEV_PM_OPS(mxt_pm_ops, mxt_suspend, mxt_resume);
static const struct of_device_id mxt_of_match[] = {
{ .compatible = "atmel,maxtouch", },
+ /* Compatibles listed below are deprecated */
+ { .compatible = "atmel,qt602240_ts", },
+ { .compatible = "atmel,atmel_mxt_ts", },
+ { .compatible = "atmel,atmel_mxt_tp", },
+ { .compatible = "atmel,mXT224", },
{},
};
MODULE_DEVICE_TABLE(of, mxt_of_match);
diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index df171cb85822..5b714a062fa7 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -146,6 +146,7 @@ config INTEL_IOMMU
select DMA_DIRECT_OPS
select IOMMU_API
select IOMMU_IOVA
+ select NEED_DMA_MAP_STATE
select DMAR_TABLE
help
DMA remapping (DMAR) devices support enables independent address
diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 2a99f0f14795..8fb8c737fffe 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -83,7 +83,6 @@
static DEFINE_SPINLOCK(amd_iommu_devtable_lock);
static DEFINE_SPINLOCK(pd_bitmap_lock);
-static DEFINE_SPINLOCK(iommu_table_lock);
/* List of all available dev_data structures */
static LLIST_HEAD(dev_data_list);
@@ -3562,6 +3561,7 @@ EXPORT_SYMBOL(amd_iommu_device_info);
*****************************************************************************/
static struct irq_chip amd_ir_chip;
+static DEFINE_SPINLOCK(iommu_table_lock);
static void set_dte_irq_entry(u16 devid, struct irq_remap_table *table)
{
diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index f05f3cf90756..ddcbbdb5d658 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -167,40 +167,16 @@ EXPORT_SYMBOL(iommu_put_dma_cookie);
* @list: Reserved region list from iommu_get_resv_regions()
*
* IOMMU drivers can use this to implement their .get_resv_regions callback
- * for general non-IOMMU-specific reservations. Currently, this covers host
- * bridge windows for PCI devices and GICv3 ITS region reservation on ACPI
- * based ARM platforms that may require HW MSI reservation.
+ * for general non-IOMMU-specific reservations. Currently, this covers GICv3
+ * ITS region reservation on ACPI based ARM platforms that may require HW MSI
+ * reservation.
*/
void iommu_dma_get_resv_regions(struct device *dev, struct list_head *list)
{
- struct pci_host_bridge *bridge;
- struct resource_entry *window;
-
- if (!is_of_node(dev->iommu_fwspec->iommu_fwnode) &&
- iort_iommu_msi_get_resv_regions(dev, list) < 0)
- return;
-
- if (!dev_is_pci(dev))
- return;
-
- bridge = pci_find_host_bridge(to_pci_dev(dev)->bus);
- resource_list_for_each_entry(window, &bridge->windows) {
- struct iommu_resv_region *region;
- phys_addr_t start;
- size_t length;
-
- if (resource_type(window->res) != IORESOURCE_MEM)
- continue;
- start = window->res->start - window->offset;
- length = window->res->end - window->res->start + 1;
- region = iommu_alloc_resv_region(start, length, 0,
- IOMMU_RESV_RESERVED);
- if (!region)
- return;
+ if (!is_of_node(dev->iommu_fwspec->iommu_fwnode))
+ iort_iommu_msi_get_resv_regions(dev, list);
- list_add_tail(&region->list, list);
- }
}
EXPORT_SYMBOL(iommu_dma_get_resv_regions);
@@ -229,6 +205,23 @@ static int cookie_init_hw_msi_region(struct iommu_dma_cookie *cookie,
return 0;
}
+static void iova_reserve_pci_windows(struct pci_dev *dev,
+ struct iova_domain *iovad)
+{
+ struct pci_host_bridge *bridge = pci_find_host_bridge(dev->bus);
+ struct resource_entry *window;
+ unsigned long lo, hi;
+
+ resource_list_for_each_entry(window, &bridge->windows) {
+ if (resource_type(window->res) != IORESOURCE_MEM)
+ continue;
+
+ lo = iova_pfn(iovad, window->res->start - window->offset);
+ hi = iova_pfn(iovad, window->res->end - window->offset);
+ reserve_iova(iovad, lo, hi);
+ }
+}
+
static int iova_reserve_iommu_regions(struct device *dev,
struct iommu_domain *domain)
{
@@ -238,6 +231,9 @@ static int iova_reserve_iommu_regions(struct device *dev,
LIST_HEAD(resv_regions);
int ret = 0;
+ if (dev_is_pci(dev))
+ iova_reserve_pci_windows(to_pci_dev(dev), iovad);
+
iommu_get_resv_regions(dev, &resv_regions);
list_for_each_entry(region, &resv_regions, list) {
unsigned long lo, hi;
diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c
index accf58388bdb..460bed4fc5b1 100644
--- a/drivers/iommu/dmar.c
+++ b/drivers/iommu/dmar.c
@@ -1345,7 +1345,7 @@ void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 qdep,
struct qi_desc desc;
if (mask) {
- BUG_ON(addr & ((1 << (VTD_PAGE_SHIFT + mask)) - 1));
+ WARN_ON_ONCE(addr & ((1ULL << (VTD_PAGE_SHIFT + mask)) - 1));
addr |= (1ULL << (VTD_PAGE_SHIFT + mask - 1)) - 1;
desc.high = QI_DEV_IOTLB_ADDR(addr) | QI_DEV_IOTLB_SIZE;
} else
diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c
index 66f69af2c219..3062a154a9fb 100644
--- a/drivers/iommu/intel_irq_remapping.c
+++ b/drivers/iommu/intel_irq_remapping.c
@@ -1136,7 +1136,7 @@ static void intel_ir_reconfigure_irte(struct irq_data *irqd, bool force)
irte->dest_id = IRTE_DEST(cfg->dest_apicid);
/* Update the hardware only if the interrupt is in remapped mode. */
- if (!force || ir_data->irq_2_iommu.mode == IRQ_REMAPPING)
+ if (force || ir_data->irq_2_iommu.mode == IRQ_REMAPPING)
modify_irte(&ir_data->irq_2_iommu, irte);
}
diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c
index 5fc8656c60f9..0468acfa131f 100644
--- a/drivers/iommu/rockchip-iommu.c
+++ b/drivers/iommu/rockchip-iommu.c
@@ -1098,7 +1098,7 @@ static int rk_iommu_of_xlate(struct device *dev,
data->iommu = platform_get_drvdata(iommu_dev);
dev->archdata.iommu = data;
- of_dev_put(iommu_dev);
+ platform_device_put(iommu_dev);
return 0;
}
@@ -1175,8 +1175,15 @@ static int rk_iommu_probe(struct platform_device *pdev)
for (i = 0; i < iommu->num_clocks; ++i)
iommu->clocks[i].id = rk_iommu_clocks[i];
+ /*
+ * iommu clocks should be present for all new devices and devicetrees
+ * but there are older devicetrees without clocks out in the wild.
+ * So clocks as optional for the time being.
+ */
err = devm_clk_bulk_get(iommu->dev, iommu->num_clocks, iommu->clocks);
- if (err)
+ if (err == -ENOENT)
+ iommu->num_clocks = 0;
+ else if (err)
return err;
err = clk_bulk_prepare(iommu->num_clocks, iommu->clocks);
diff --git a/drivers/irqchip/Makefile b/drivers/irqchip/Makefile
index 5ed465ab1c76..15f268f646bf 100644
--- a/drivers/irqchip/Makefile
+++ b/drivers/irqchip/Makefile
@@ -27,7 +27,7 @@ obj-$(CONFIG_ARM_GIC) += irq-gic.o irq-gic-common.o
obj-$(CONFIG_ARM_GIC_PM) += irq-gic-pm.o
obj-$(CONFIG_ARCH_REALVIEW) += irq-gic-realview.o
obj-$(CONFIG_ARM_GIC_V2M) += irq-gic-v2m.o
-obj-$(CONFIG_ARM_GIC_V3) += irq-gic-v3.o irq-gic-common.o
+obj-$(CONFIG_ARM_GIC_V3) += irq-gic-v3.o irq-gic-v3-mbi.o irq-gic-common.o
obj-$(CONFIG_ARM_GIC_V3_ITS) += irq-gic-v3-its.o irq-gic-v3-its-platform-msi.o irq-gic-v4.o
obj-$(CONFIG_ARM_GIC_V3_ITS_PCI) += irq-gic-v3-its-pci-msi.o
obj-$(CONFIG_ARM_GIC_V3_ITS_FSL_MC) += irq-gic-v3-its-fsl-mc-msi.o
diff --git a/drivers/irqchip/irq-gic-v3-mbi.c b/drivers/irqchip/irq-gic-v3-mbi.c
new file mode 100644
index 000000000000..ad70e7c416e3
--- /dev/null
+++ b/drivers/irqchip/irq-gic-v3-mbi.c
@@ -0,0 +1,331 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2018 ARM Limited, All Rights Reserved.
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ */
+
+#define pr_fmt(fmt) "GICv3: " fmt
+
+#include <linux/dma-iommu.h>
+#include <linux/irq.h>
+#include <linux/irqdomain.h>
+#include <linux/kernel.h>
+#include <linux/msi.h>
+#include <linux/of_address.h>
+#include <linux/of_pci.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+
+#include <linux/irqchip/arm-gic-v3.h>
+
+struct mbi_range {
+ u32 spi_start;
+ u32 nr_spis;
+ unsigned long *bm;
+};
+
+static struct mutex mbi_lock;
+static phys_addr_t mbi_phys_base;
+static struct mbi_range *mbi_ranges;
+static unsigned int mbi_range_nr;
+
+static struct irq_chip mbi_irq_chip = {
+ .name = "MBI",
+ .irq_mask = irq_chip_mask_parent,
+ .irq_unmask = irq_chip_unmask_parent,
+ .irq_eoi = irq_chip_eoi_parent,
+ .irq_set_type = irq_chip_set_type_parent,
+ .irq_set_affinity = irq_chip_set_affinity_parent,
+};
+
+static int mbi_irq_gic_domain_alloc(struct irq_domain *domain,
+ unsigned int virq,
+ irq_hw_number_t hwirq)
+{
+ struct irq_fwspec fwspec;
+ struct irq_data *d;
+ int err;
+
+ /*
+ * Using ACPI? There is no MBI support in the spec, you
+ * shouldn't even be here.
+ */
+ if (!is_of_node(domain->parent->fwnode))
+ return -EINVAL;
+
+ /*
+ * Let's default to edge. This is consistent with traditional
+ * MSIs, and systems requiring level signaling will just
+ * enforce the trigger on their own.
+ */
+ fwspec.fwnode = domain->parent->fwnode;
+ fwspec.param_count = 3;
+ fwspec.param[0] = 0;
+ fwspec.param[1] = hwirq - 32;
+ fwspec.param[2] = IRQ_TYPE_EDGE_RISING;
+
+ err = irq_domain_alloc_irqs_parent(domain, virq, 1, &fwspec);
+ if (err)
+ return err;
+
+ d = irq_domain_get_irq_data(domain->parent, virq);
+ return d->chip->irq_set_type(d, IRQ_TYPE_EDGE_RISING);
+}
+
+static void mbi_free_msi(struct mbi_range *mbi, unsigned int hwirq,
+ int nr_irqs)
+{
+ mutex_lock(&mbi_lock);
+ bitmap_release_region(mbi->bm, hwirq - mbi->spi_start,
+ get_count_order(nr_irqs));
+ mutex_unlock(&mbi_lock);
+}
+
+static int mbi_irq_domain_alloc(struct irq_domain *domain, unsigned int virq,
+ unsigned int nr_irqs, void *args)
+{
+ struct mbi_range *mbi = NULL;
+ int hwirq, offset, i, err = 0;
+
+ mutex_lock(&mbi_lock);
+ for (i = 0; i < mbi_range_nr; i++) {
+ offset = bitmap_find_free_region(mbi_ranges[i].bm,
+ mbi_ranges[i].nr_spis,
+ get_count_order(nr_irqs));
+ if (offset >= 0) {
+ mbi = &mbi_ranges[i];
+ break;
+ }
+ }
+ mutex_unlock(&mbi_lock);
+
+ if (!mbi)
+ return -ENOSPC;
+
+ hwirq = mbi->spi_start + offset;
+
+ for (i = 0; i < nr_irqs; i++) {
+ err = mbi_irq_gic_domain_alloc(domain, virq + i, hwirq + i);
+ if (err)
+ goto fail;
+
+ irq_domain_set_hwirq_and_chip(domain, virq + i, hwirq + i,
+ &mbi_irq_chip, mbi);
+ }
+
+ return 0;
+
+fail:
+ irq_domain_free_irqs_parent(domain, virq, nr_irqs);
+ mbi_free_msi(mbi, hwirq, nr_irqs);
+ return err;
+}
+
+static void mbi_irq_domain_free(struct irq_domain *domain,
+ unsigned int virq, unsigned int nr_irqs)
+{
+ struct irq_data *d = irq_domain_get_irq_data(domain, virq);
+ struct mbi_range *mbi = irq_data_get_irq_chip_data(d);
+
+ mbi_free_msi(mbi, d->hwirq, nr_irqs);
+ irq_domain_free_irqs_parent(domain, virq, nr_irqs);
+}
+
+static const struct irq_domain_ops mbi_domain_ops = {
+ .alloc = mbi_irq_domain_alloc,
+ .free = mbi_irq_domain_free,
+};
+
+static void mbi_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
+{
+ msg[0].address_hi = upper_32_bits(mbi_phys_base + GICD_SETSPI_NSR);
+ msg[0].address_lo = lower_32_bits(mbi_phys_base + GICD_SETSPI_NSR);
+ msg[0].data = data->parent_data->hwirq;
+
+ iommu_dma_map_msi_msg(data->irq, msg);
+}
+
+#ifdef CONFIG_PCI_MSI
+/* PCI-specific irqchip */
+static void mbi_mask_msi_irq(struct irq_data *d)
+{
+ pci_msi_mask_irq(d);
+ irq_chip_mask_parent(d);
+}
+
+static void mbi_unmask_msi_irq(struct irq_data *d)
+{
+ pci_msi_unmask_irq(d);
+ irq_chip_unmask_parent(d);
+}
+
+static struct irq_chip mbi_msi_irq_chip = {
+ .name = "MSI",
+ .irq_mask = mbi_mask_msi_irq,
+ .irq_unmask = mbi_unmask_msi_irq,
+ .irq_eoi = irq_chip_eoi_parent,
+ .irq_compose_msi_msg = mbi_compose_msi_msg,
+ .irq_write_msi_msg = pci_msi_domain_write_msg,
+};
+
+static struct msi_domain_info mbi_msi_domain_info = {
+ .flags = (MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS |
+ MSI_FLAG_PCI_MSIX | MSI_FLAG_MULTI_PCI_MSI),
+ .chip = &mbi_msi_irq_chip,
+};
+
+static int mbi_allocate_pci_domain(struct irq_domain *nexus_domain,
+ struct irq_domain **pci_domain)
+{
+ *pci_domain = pci_msi_create_irq_domain(nexus_domain->parent->fwnode,
+ &mbi_msi_domain_info,
+ nexus_domain);
+ if (!*pci_domain)
+ return -ENOMEM;
+
+ return 0;
+}
+#else
+static int mbi_allocate_pci_domain(struct irq_domain *nexus_domain,
+ struct irq_domain **pci_domain)
+{
+ *pci_domain = NULL;
+ return 0;
+}
+#endif
+
+static void mbi_compose_mbi_msg(struct irq_data *data, struct msi_msg *msg)
+{
+ mbi_compose_msi_msg(data, msg);
+
+ msg[1].address_hi = upper_32_bits(mbi_phys_base + GICD_CLRSPI_NSR);
+ msg[1].address_lo = lower_32_bits(mbi_phys_base + GICD_CLRSPI_NSR);
+ msg[1].data = data->parent_data->hwirq;
+
+ iommu_dma_map_msi_msg(data->irq, &msg[1]);
+}
+
+/* Platform-MSI specific irqchip */
+static struct irq_chip mbi_pmsi_irq_chip = {
+ .name = "pMSI",
+ .irq_set_type = irq_chip_set_type_parent,
+ .irq_compose_msi_msg = mbi_compose_mbi_msg,
+ .flags = IRQCHIP_SUPPORTS_LEVEL_MSI,
+};
+
+static struct msi_domain_ops mbi_pmsi_ops = {
+};
+
+static struct msi_domain_info mbi_pmsi_domain_info = {
+ .flags = (MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS |
+ MSI_FLAG_LEVEL_CAPABLE),
+ .ops = &mbi_pmsi_ops,
+ .chip = &mbi_pmsi_irq_chip,
+};
+
+static int mbi_allocate_domains(struct irq_domain *parent)
+{
+ struct irq_domain *nexus_domain, *pci_domain, *plat_domain;
+ int err;
+
+ nexus_domain = irq_domain_create_tree(parent->fwnode,
+ &mbi_domain_ops, NULL);
+ if (!nexus_domain)
+ return -ENOMEM;
+
+ irq_domain_update_bus_token(nexus_domain, DOMAIN_BUS_NEXUS);
+ nexus_domain->parent = parent;
+
+ err = mbi_allocate_pci_domain(nexus_domain, &pci_domain);
+
+ plat_domain = platform_msi_create_irq_domain(parent->fwnode,
+ &mbi_pmsi_domain_info,
+ nexus_domain);
+
+ if (err || !plat_domain) {
+ if (plat_domain)
+ irq_domain_remove(plat_domain);
+ if (pci_domain)
+ irq_domain_remove(pci_domain);
+ irq_domain_remove(nexus_domain);
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+int __init mbi_init(struct fwnode_handle *fwnode, struct irq_domain *parent)
+{
+ struct device_node *np;
+ const __be32 *reg;
+ int ret, n;
+
+ np = to_of_node(fwnode);
+
+ if (!of_property_read_bool(np, "msi-controller"))
+ return 0;
+
+ n = of_property_count_elems_of_size(np, "mbi-ranges", sizeof(u32));
+ if (n <= 0 || n % 2)
+ return -EINVAL;
+
+ mbi_range_nr = n / 2;
+ mbi_ranges = kcalloc(mbi_range_nr, sizeof(*mbi_ranges), GFP_KERNEL);
+ if (!mbi_ranges)
+ return -ENOMEM;
+
+ for (n = 0; n < mbi_range_nr; n++) {
+ ret = of_property_read_u32_index(np, "mbi-ranges", n * 2,
+ &mbi_ranges[n].spi_start);
+ if (ret)
+ goto err_free_mbi;
+ ret = of_property_read_u32_index(np, "mbi-ranges", n * 2 + 1,
+ &mbi_ranges[n].nr_spis);
+ if (ret)
+ goto err_free_mbi;
+
+ mbi_ranges[n].bm = kcalloc(BITS_TO_LONGS(mbi_ranges[n].nr_spis),
+ sizeof(long), GFP_KERNEL);
+ if (!mbi_ranges[n].bm) {
+ ret = -ENOMEM;
+ goto err_free_mbi;
+ }
+ pr_info("MBI range [%d:%d]\n", mbi_ranges[n].spi_start,
+ mbi_ranges[n].spi_start + mbi_ranges[n].nr_spis - 1);
+ }
+
+ reg = of_get_property(np, "mbi-alias", NULL);
+ if (reg) {
+ mbi_phys_base = of_translate_address(np, reg);
+ if (mbi_phys_base == OF_BAD_ADDR) {
+ ret = -ENXIO;
+ goto err_free_mbi;
+ }
+ } else {
+ struct resource res;
+
+ if (of_address_to_resource(np, 0, &res)) {
+ ret = -ENXIO;
+ goto err_free_mbi;
+ }
+
+ mbi_phys_base = res.start;
+ }
+
+ pr_info("Using MBI frame %pa\n", &mbi_phys_base);
+
+ ret = mbi_allocate_domains(parent);
+ if (ret)
+ goto err_free_mbi;
+
+ return 0;
+
+err_free_mbi:
+ if (mbi_ranges) {
+ for (n = 0; n < mbi_range_nr; n++)
+ kfree(mbi_ranges[n].bm);
+ kfree(mbi_ranges);
+ }
+
+ return ret;
+}
diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c
index e5d101418390..5a67ec084588 100644
--- a/drivers/irqchip/irq-gic-v3.c
+++ b/drivers/irqchip/irq-gic-v3.c
@@ -1099,6 +1099,7 @@ static int __init gic_init_bases(void __iomem *dist_base,
gic_data.domain = irq_domain_create_tree(handle, &gic_irq_domain_ops,
&gic_data);
+ irq_domain_update_bus_token(gic_data.domain, DOMAIN_BUS_WIRED);
gic_data.rdists.rdist = alloc_percpu(typeof(*gic_data.rdists.rdist));
gic_data.rdists.has_vlpis = true;
gic_data.rdists.has_direct_lpi = true;
@@ -1112,6 +1113,12 @@ static int __init gic_init_bases(void __iomem *dist_base,
pr_info("Distributor has %sRange Selector support\n",
gic_data.has_rss ? "" : "no ");
+ if (typer & GICD_TYPER_MBIS) {
+ err = mbi_init(handle, gic_data.domain);
+ if (err)
+ pr_err("Failed to initialize MBIs\n");
+ }
+
set_handle_irq(gic_handle_irq);
gic_update_vlpi_properties();
diff --git a/drivers/irqchip/irq-meson-gpio.c b/drivers/irqchip/irq-meson-gpio.c
index a59bdbc0b9bb..7b531fd075b8 100644
--- a/drivers/irqchip/irq-meson-gpio.c
+++ b/drivers/irqchip/irq-meson-gpio.c
@@ -63,11 +63,16 @@ static const struct meson_gpio_irq_params gxl_params = {
.nr_hwirq = 110,
};
+static const struct meson_gpio_irq_params axg_params = {
+ .nr_hwirq = 100,
+};
+
static const struct of_device_id meson_irq_gpio_matches[] = {
{ .compatible = "amlogic,meson8-gpio-intc", .data = &meson8_params },
{ .compatible = "amlogic,meson8b-gpio-intc", .data = &meson8b_params },
{ .compatible = "amlogic,meson-gxbb-gpio-intc", .data = &gxbb_params },
{ .compatible = "amlogic,meson-gxl-gpio-intc", .data = &gxl_params },
+ { .compatible = "amlogic,meson-axg-gpio-intc", .data = &axg_params },
{ }
};
diff --git a/drivers/irqchip/irq-mvebu-gicp.c b/drivers/irqchip/irq-mvebu-gicp.c
index 17a4a7b6cdbb..4e17f7081efc 100644
--- a/drivers/irqchip/irq-mvebu-gicp.c
+++ b/drivers/irqchip/irq-mvebu-gicp.c
@@ -19,8 +19,6 @@
#include <dt-bindings/interrupt-controller/arm-gic.h>
-#include "irq-mvebu-gicp.h"
-
#define GICP_SETSPI_NSR_OFFSET 0x0
#define GICP_CLRSPI_NSR_OFFSET 0x8
@@ -55,34 +53,18 @@ static int gicp_idx_to_spi(struct mvebu_gicp *gicp, int idx)
return -EINVAL;
}
-int mvebu_gicp_get_doorbells(struct device_node *dn, phys_addr_t *setspi,
- phys_addr_t *clrspi)
-{
- struct platform_device *pdev;
- struct mvebu_gicp *gicp;
-
- pdev = of_find_device_by_node(dn);
- if (!pdev)
- return -ENODEV;
-
- gicp = platform_get_drvdata(pdev);
- if (!gicp)
- return -ENODEV;
-
- *setspi = gicp->res->start + GICP_SETSPI_NSR_OFFSET;
- *clrspi = gicp->res->start + GICP_CLRSPI_NSR_OFFSET;
-
- return 0;
-}
-
static void gicp_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
{
struct mvebu_gicp *gicp = data->chip_data;
phys_addr_t setspi = gicp->res->start + GICP_SETSPI_NSR_OFFSET;
-
- msg->data = data->hwirq;
- msg->address_lo = lower_32_bits(setspi);
- msg->address_hi = upper_32_bits(setspi);
+ phys_addr_t clrspi = gicp->res->start + GICP_CLRSPI_NSR_OFFSET;
+
+ msg[0].data = data->hwirq;
+ msg[0].address_lo = lower_32_bits(setspi);
+ msg[0].address_hi = upper_32_bits(setspi);
+ msg[1].data = data->hwirq;
+ msg[1].address_lo = lower_32_bits(clrspi);
+ msg[1].address_hi = upper_32_bits(clrspi);
}
static struct irq_chip gicp_irq_chip = {
@@ -170,13 +152,15 @@ static const struct irq_domain_ops gicp_domain_ops = {
static struct irq_chip gicp_msi_irq_chip = {
.name = "GICP",
.irq_set_type = irq_chip_set_type_parent,
+ .flags = IRQCHIP_SUPPORTS_LEVEL_MSI,
};
static struct msi_domain_ops gicp_msi_ops = {
};
static struct msi_domain_info gicp_msi_domain_info = {
- .flags = (MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS),
+ .flags = (MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS |
+ MSI_FLAG_LEVEL_CAPABLE),
.ops = &gicp_msi_ops,
.chip = &gicp_msi_irq_chip,
};
diff --git a/drivers/irqchip/irq-mvebu-gicp.h b/drivers/irqchip/irq-mvebu-gicp.h
deleted file mode 100644
index eaa12fb72102..000000000000
--- a/drivers/irqchip/irq-mvebu-gicp.h
+++ /dev/null
@@ -1,12 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __MVEBU_GICP_H__
-#define __MVEBU_GICP_H__
-
-#include <linux/types.h>
-
-struct device_node;
-
-int mvebu_gicp_get_doorbells(struct device_node *dn, phys_addr_t *setspi,
- phys_addr_t *clrspi);
-
-#endif /* __MVEBU_GICP_H__ */
diff --git a/drivers/irqchip/irq-mvebu-icu.c b/drivers/irqchip/irq-mvebu-icu.c
index e18c48d3a92e..13063339b416 100644
--- a/drivers/irqchip/irq-mvebu-icu.c
+++ b/drivers/irqchip/irq-mvebu-icu.c
@@ -21,8 +21,6 @@
#include <dt-bindings/interrupt-controller/mvebu-icu.h>
-#include "irq-mvebu-gicp.h"
-
/* ICU registers */
#define ICU_SETSPI_NSR_AL 0x10
#define ICU_SETSPI_NSR_AH 0x14
@@ -43,6 +41,7 @@ struct mvebu_icu {
void __iomem *base;
struct irq_domain *domain;
struct device *dev;
+ atomic_t initialized;
};
struct mvebu_icu_irq_data {
@@ -51,6 +50,18 @@ struct mvebu_icu_irq_data {
unsigned int type;
};
+static void mvebu_icu_init(struct mvebu_icu *icu, struct msi_msg *msg)
+{
+ if (atomic_cmpxchg(&icu->initialized, false, true))
+ return;
+
+ /* Set Clear/Set ICU SPI message address in AP */
+ writel_relaxed(msg[0].address_hi, icu->base + ICU_SETSPI_NSR_AH);
+ writel_relaxed(msg[0].address_lo, icu->base + ICU_SETSPI_NSR_AL);
+ writel_relaxed(msg[1].address_hi, icu->base + ICU_CLRSPI_NSR_AH);
+ writel_relaxed(msg[1].address_lo, icu->base + ICU_CLRSPI_NSR_AL);
+}
+
static void mvebu_icu_write_msg(struct msi_desc *desc, struct msi_msg *msg)
{
struct irq_data *d = irq_get_irq_data(desc->irq);
@@ -59,6 +70,8 @@ static void mvebu_icu_write_msg(struct msi_desc *desc, struct msi_msg *msg)
unsigned int icu_int;
if (msg->address_lo || msg->address_hi) {
+ /* One off initialization */
+ mvebu_icu_init(icu, msg);
/* Configure the ICU with irq number & type */
icu_int = msg->data | ICU_INT_ENABLE;
if (icu_irqd->type & IRQ_TYPE_EDGE_RISING)
@@ -197,9 +210,7 @@ static int mvebu_icu_probe(struct platform_device *pdev)
struct device_node *node = pdev->dev.of_node;
struct device_node *gicp_dn;
struct resource *res;
- phys_addr_t setspi, clrspi;
- u32 i, icu_int;
- int ret;
+ int i;
icu = devm_kzalloc(&pdev->dev, sizeof(struct mvebu_icu),
GFP_KERNEL);
@@ -242,22 +253,12 @@ static int mvebu_icu_probe(struct platform_device *pdev)
if (!gicp_dn)
return -ENODEV;
- ret = mvebu_gicp_get_doorbells(gicp_dn, &setspi, &clrspi);
- if (ret)
- return ret;
-
- /* Set Clear/Set ICU SPI message address in AP */
- writel_relaxed(upper_32_bits(setspi), icu->base + ICU_SETSPI_NSR_AH);
- writel_relaxed(lower_32_bits(setspi), icu->base + ICU_SETSPI_NSR_AL);
- writel_relaxed(upper_32_bits(clrspi), icu->base + ICU_CLRSPI_NSR_AH);
- writel_relaxed(lower_32_bits(clrspi), icu->base + ICU_CLRSPI_NSR_AL);
-
/*
* Clean all ICU interrupts with type SPI_NSR, required to
* avoid unpredictable SPI assignments done by firmware.
*/
for (i = 0 ; i < ICU_MAX_IRQS ; i++) {
- icu_int = readl(icu->base + ICU_INT_CFG(i));
+ u32 icu_int = readl_relaxed(icu->base + ICU_INT_CFG(i));
if ((icu_int >> ICU_GROUP_SHIFT) == ICU_GRP_NSR)
writel_relaxed(0x0, icu->base + ICU_INT_CFG(i));
}
diff --git a/drivers/irqchip/irq-stm32-exti.c b/drivers/irqchip/irq-stm32-exti.c
index 36f0fbe36c35..5089c1e2838d 100644
--- a/drivers/irqchip/irq-stm32-exti.c
+++ b/drivers/irqchip/irq-stm32-exti.c
@@ -14,6 +14,9 @@
#include <linux/irqdomain.h>
#include <linux/of_address.h>
#include <linux/of_irq.h>
+#include <linux/syscore_ops.h>
+
+#include <dt-bindings/interrupt-controller/arm-gic.h>
#define IRQS_PER_BANK 32
@@ -23,29 +26,69 @@ struct stm32_exti_bank {
u32 rtsr_ofst;
u32 ftsr_ofst;
u32 swier_ofst;
- u32 pr_ofst;
+ u32 rpr_ofst;
+ u32 fpr_ofst;
+};
+
+#define UNDEF_REG ~0
+
+struct stm32_desc_irq {
+ u32 exti;
+ u32 irq_parent;
+};
+
+struct stm32_exti_drv_data {
+ const struct stm32_exti_bank **exti_banks;
+ const struct stm32_desc_irq *desc_irqs;
+ u32 bank_nr;
+ u32 irq_nr;
+};
+
+struct stm32_exti_chip_data {
+ struct stm32_exti_host_data *host_data;
+ const struct stm32_exti_bank *reg_bank;
+ struct raw_spinlock rlock;
+ u32 wake_active;
+ u32 mask_cache;
+ u32 rtsr_cache;
+ u32 ftsr_cache;
};
+struct stm32_exti_host_data {
+ void __iomem *base;
+ struct stm32_exti_chip_data *chips_data;
+ const struct stm32_exti_drv_data *drv_data;
+};
+
+static struct stm32_exti_host_data *stm32_host_data;
+
static const struct stm32_exti_bank stm32f4xx_exti_b1 = {
.imr_ofst = 0x00,
.emr_ofst = 0x04,
.rtsr_ofst = 0x08,
.ftsr_ofst = 0x0C,
.swier_ofst = 0x10,
- .pr_ofst = 0x14,
+ .rpr_ofst = 0x14,
+ .fpr_ofst = UNDEF_REG,
};
static const struct stm32_exti_bank *stm32f4xx_exti_banks[] = {
&stm32f4xx_exti_b1,
};
+static const struct stm32_exti_drv_data stm32f4xx_drv_data = {
+ .exti_banks = stm32f4xx_exti_banks,
+ .bank_nr = ARRAY_SIZE(stm32f4xx_exti_banks),
+};
+
static const struct stm32_exti_bank stm32h7xx_exti_b1 = {
.imr_ofst = 0x80,
.emr_ofst = 0x84,
.rtsr_ofst = 0x00,
.ftsr_ofst = 0x04,
.swier_ofst = 0x08,
- .pr_ofst = 0x88,
+ .rpr_ofst = 0x88,
+ .fpr_ofst = UNDEF_REG,
};
static const struct stm32_exti_bank stm32h7xx_exti_b2 = {
@@ -54,7 +97,8 @@ static const struct stm32_exti_bank stm32h7xx_exti_b2 = {
.rtsr_ofst = 0x20,
.ftsr_ofst = 0x24,
.swier_ofst = 0x28,
- .pr_ofst = 0x98,
+ .rpr_ofst = 0x98,
+ .fpr_ofst = UNDEF_REG,
};
static const struct stm32_exti_bank stm32h7xx_exti_b3 = {
@@ -63,7 +107,8 @@ static const struct stm32_exti_bank stm32h7xx_exti_b3 = {
.rtsr_ofst = 0x40,
.ftsr_ofst = 0x44,
.swier_ofst = 0x48,
- .pr_ofst = 0xA8,
+ .rpr_ofst = 0xA8,
+ .fpr_ofst = UNDEF_REG,
};
static const struct stm32_exti_bank *stm32h7xx_exti_banks[] = {
@@ -72,18 +117,105 @@ static const struct stm32_exti_bank *stm32h7xx_exti_banks[] = {
&stm32h7xx_exti_b3,
};
-static unsigned long stm32_exti_pending(struct irq_chip_generic *gc)
+static const struct stm32_exti_drv_data stm32h7xx_drv_data = {
+ .exti_banks = stm32h7xx_exti_banks,
+ .bank_nr = ARRAY_SIZE(stm32h7xx_exti_banks),
+};
+
+static const struct stm32_exti_bank stm32mp1_exti_b1 = {
+ .imr_ofst = 0x80,
+ .emr_ofst = 0x84,
+ .rtsr_ofst = 0x00,
+ .ftsr_ofst = 0x04,
+ .swier_ofst = 0x08,
+ .rpr_ofst = 0x0C,
+ .fpr_ofst = 0x10,
+};
+
+static const struct stm32_exti_bank stm32mp1_exti_b2 = {
+ .imr_ofst = 0x90,
+ .emr_ofst = 0x94,
+ .rtsr_ofst = 0x20,
+ .ftsr_ofst = 0x24,
+ .swier_ofst = 0x28,
+ .rpr_ofst = 0x2C,
+ .fpr_ofst = 0x30,
+};
+
+static const struct stm32_exti_bank stm32mp1_exti_b3 = {
+ .imr_ofst = 0xA0,
+ .emr_ofst = 0xA4,
+ .rtsr_ofst = 0x40,
+ .ftsr_ofst = 0x44,
+ .swier_ofst = 0x48,
+ .rpr_ofst = 0x4C,
+ .fpr_ofst = 0x50,
+};
+
+static const struct stm32_exti_bank *stm32mp1_exti_banks[] = {
+ &stm32mp1_exti_b1,
+ &stm32mp1_exti_b2,
+ &stm32mp1_exti_b3,
+};
+
+static const struct stm32_desc_irq stm32mp1_desc_irq[] = {
+ { .exti = 1, .irq_parent = 7 },
+ { .exti = 2, .irq_parent = 8 },
+ { .exti = 3, .irq_parent = 9 },
+ { .exti = 4, .irq_parent = 10 },
+ { .exti = 5, .irq_parent = 23 },
+ { .exti = 6, .irq_parent = 64 },
+ { .exti = 7, .irq_parent = 65 },
+ { .exti = 8, .irq_parent = 66 },
+ { .exti = 9, .irq_parent = 67 },
+ { .exti = 10, .irq_parent = 40 },
+ { .exti = 11, .irq_parent = 42 },
+ { .exti = 12, .irq_parent = 76 },
+ { .exti = 13, .irq_parent = 77 },
+ { .exti = 14, .irq_parent = 121 },
+ { .exti = 15, .irq_parent = 127 },
+ { .exti = 16, .irq_parent = 1 },
+ { .exti = 65, .irq_parent = 144 },
+ { .exti = 68, .irq_parent = 143 },
+ { .exti = 73, .irq_parent = 129 },
+};
+
+static const struct stm32_exti_drv_data stm32mp1_drv_data = {
+ .exti_banks = stm32mp1_exti_banks,
+ .bank_nr = ARRAY_SIZE(stm32mp1_exti_banks),
+ .desc_irqs = stm32mp1_desc_irq,
+ .irq_nr = ARRAY_SIZE(stm32mp1_desc_irq),
+};
+
+static int stm32_exti_to_irq(const struct stm32_exti_drv_data *drv_data,
+ irq_hw_number_t hwirq)
{
- const struct stm32_exti_bank *stm32_bank = gc->private;
+ const struct stm32_desc_irq *desc_irq;
+ int i;
- return irq_reg_readl(gc, stm32_bank->pr_ofst);
+ if (!drv_data->desc_irqs)
+ return -EINVAL;
+
+ for (i = 0; i < drv_data->irq_nr; i++) {
+ desc_irq = &drv_data->desc_irqs[i];
+ if (desc_irq->exti == hwirq)
+ return desc_irq->irq_parent;
+ }
+
+ return -EINVAL;
}
-static void stm32_exti_irq_ack(struct irq_chip_generic *gc, u32 mask)
+static unsigned long stm32_exti_pending(struct irq_chip_generic *gc)
{
- const struct stm32_exti_bank *stm32_bank = gc->private;
+ struct stm32_exti_chip_data *chip_data = gc->private;
+ const struct stm32_exti_bank *stm32_bank = chip_data->reg_bank;
+ unsigned long pending;
+
+ pending = irq_reg_readl(gc, stm32_bank->rpr_ofst);
+ if (stm32_bank->fpr_ofst != UNDEF_REG)
+ pending |= irq_reg_readl(gc, stm32_bank->fpr_ofst);
- irq_reg_writel(gc, mask, stm32_bank->pr_ofst);
+ return pending;
}
static void stm32_irq_handler(struct irq_desc *desc)
@@ -92,7 +224,6 @@ static void stm32_irq_handler(struct irq_desc *desc)
struct irq_chip *chip = irq_desc_get_chip(desc);
unsigned int virq, nbanks = domain->gc->num_chips;
struct irq_chip_generic *gc;
- const struct stm32_exti_bank *stm32_bank;
unsigned long pending;
int n, i, irq_base = 0;
@@ -100,13 +231,11 @@ static void stm32_irq_handler(struct irq_desc *desc)
for (i = 0; i < nbanks; i++, irq_base += IRQS_PER_BANK) {
gc = irq_get_domain_generic_chip(domain, irq_base);
- stm32_bank = gc->private;
while ((pending = stm32_exti_pending(gc))) {
for_each_set_bit(n, &pending, IRQS_PER_BANK) {
virq = irq_find_mapping(domain, irq_base + n);
generic_handle_irq(virq);
- stm32_exti_irq_ack(gc, BIT(n));
}
}
}
@@ -114,36 +243,50 @@ static void stm32_irq_handler(struct irq_desc *desc)
chained_irq_exit(chip, desc);
}
-static int stm32_irq_set_type(struct irq_data *data, unsigned int type)
+static int stm32_exti_set_type(struct irq_data *d,
+ unsigned int type, u32 *rtsr, u32 *ftsr)
{
- struct irq_chip_generic *gc = irq_data_get_irq_chip_data(data);
- const struct stm32_exti_bank *stm32_bank = gc->private;
- int pin = data->hwirq % IRQS_PER_BANK;
- u32 rtsr, ftsr;
-
- irq_gc_lock(gc);
-
- rtsr = irq_reg_readl(gc, stm32_bank->rtsr_ofst);
- ftsr = irq_reg_readl(gc, stm32_bank->ftsr_ofst);
+ u32 mask = BIT(d->hwirq % IRQS_PER_BANK);
switch (type) {
case IRQ_TYPE_EDGE_RISING:
- rtsr |= BIT(pin);
- ftsr &= ~BIT(pin);
+ *rtsr |= mask;
+ *ftsr &= ~mask;
break;
case IRQ_TYPE_EDGE_FALLING:
- rtsr &= ~BIT(pin);
- ftsr |= BIT(pin);
+ *rtsr &= ~mask;
+ *ftsr |= mask;
break;
case IRQ_TYPE_EDGE_BOTH:
- rtsr |= BIT(pin);
- ftsr |= BIT(pin);
+ *rtsr |= mask;
+ *ftsr |= mask;
break;
default:
- irq_gc_unlock(gc);
return -EINVAL;
}
+ return 0;
+}
+
+static int stm32_irq_set_type(struct irq_data *d, unsigned int type)
+{
+ struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
+ struct stm32_exti_chip_data *chip_data = gc->private;
+ const struct stm32_exti_bank *stm32_bank = chip_data->reg_bank;
+ u32 rtsr, ftsr;
+ int err;
+
+ irq_gc_lock(gc);
+
+ rtsr = irq_reg_readl(gc, stm32_bank->rtsr_ofst);
+ ftsr = irq_reg_readl(gc, stm32_bank->ftsr_ofst);
+
+ err = stm32_exti_set_type(d, type, &rtsr, &ftsr);
+ if (err) {
+ irq_gc_unlock(gc);
+ return err;
+ }
+
irq_reg_writel(gc, rtsr, stm32_bank->rtsr_ofst);
irq_reg_writel(gc, ftsr, stm32_bank->ftsr_ofst);
@@ -152,40 +295,59 @@ static int stm32_irq_set_type(struct irq_data *data, unsigned int type)
return 0;
}
-static int stm32_irq_set_wake(struct irq_data *data, unsigned int on)
+static void stm32_chip_suspend(struct stm32_exti_chip_data *chip_data,
+ u32 wake_active)
{
- struct irq_chip_generic *gc = irq_data_get_irq_chip_data(data);
- const struct stm32_exti_bank *stm32_bank = gc->private;
- int pin = data->hwirq % IRQS_PER_BANK;
- u32 imr;
+ const struct stm32_exti_bank *stm32_bank = chip_data->reg_bank;
+ void __iomem *base = chip_data->host_data->base;
- irq_gc_lock(gc);
+ /* save rtsr, ftsr registers */
+ chip_data->rtsr_cache = readl_relaxed(base + stm32_bank->rtsr_ofst);
+ chip_data->ftsr_cache = readl_relaxed(base + stm32_bank->ftsr_ofst);
- imr = irq_reg_readl(gc, stm32_bank->imr_ofst);
- if (on)
- imr |= BIT(pin);
- else
- imr &= ~BIT(pin);
- irq_reg_writel(gc, imr, stm32_bank->imr_ofst);
+ writel_relaxed(wake_active, base + stm32_bank->imr_ofst);
+}
+
+static void stm32_chip_resume(struct stm32_exti_chip_data *chip_data,
+ u32 mask_cache)
+{
+ const struct stm32_exti_bank *stm32_bank = chip_data->reg_bank;
+ void __iomem *base = chip_data->host_data->base;
+
+ /* restore rtsr, ftsr, registers */
+ writel_relaxed(chip_data->rtsr_cache, base + stm32_bank->rtsr_ofst);
+ writel_relaxed(chip_data->ftsr_cache, base + stm32_bank->ftsr_ofst);
+ writel_relaxed(mask_cache, base + stm32_bank->imr_ofst);
+}
+
+static void stm32_irq_suspend(struct irq_chip_generic *gc)
+{
+ struct stm32_exti_chip_data *chip_data = gc->private;
+
+ irq_gc_lock(gc);
+ stm32_chip_suspend(chip_data, gc->wake_active);
irq_gc_unlock(gc);
+}
- return 0;
+static void stm32_irq_resume(struct irq_chip_generic *gc)
+{
+ struct stm32_exti_chip_data *chip_data = gc->private;
+
+ irq_gc_lock(gc);
+ stm32_chip_resume(chip_data, gc->mask_cache);
+ irq_gc_unlock(gc);
}
static int stm32_exti_alloc(struct irq_domain *d, unsigned int virq,
unsigned int nr_irqs, void *data)
{
- struct irq_chip_generic *gc;
struct irq_fwspec *fwspec = data;
irq_hw_number_t hwirq;
hwirq = fwspec->param[0];
- gc = irq_get_domain_generic_chip(d, hwirq);
irq_map_generic_chip(d, virq, hwirq);
- irq_domain_set_info(d, virq, hwirq, &gc->chip_types->chip, gc,
- handle_simple_irq, NULL, NULL);
return 0;
}
@@ -198,30 +360,318 @@ static void stm32_exti_free(struct irq_domain *d, unsigned int virq,
irq_domain_reset_irq_data(data);
}
-struct irq_domain_ops irq_exti_domain_ops = {
+static const struct irq_domain_ops irq_exti_domain_ops = {
.map = irq_map_generic_chip,
- .xlate = irq_domain_xlate_onetwocell,
.alloc = stm32_exti_alloc,
.free = stm32_exti_free,
};
-static int
-__init stm32_exti_init(const struct stm32_exti_bank **stm32_exti_banks,
- int bank_nr, struct device_node *node)
+static void stm32_irq_ack(struct irq_data *d)
+{
+ struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
+ struct stm32_exti_chip_data *chip_data = gc->private;
+ const struct stm32_exti_bank *stm32_bank = chip_data->reg_bank;
+
+ irq_gc_lock(gc);
+
+ irq_reg_writel(gc, d->mask, stm32_bank->rpr_ofst);
+ if (stm32_bank->fpr_ofst != UNDEF_REG)
+ irq_reg_writel(gc, d->mask, stm32_bank->fpr_ofst);
+
+ irq_gc_unlock(gc);
+}
+
+static inline u32 stm32_exti_set_bit(struct irq_data *d, u32 reg)
{
+ struct stm32_exti_chip_data *chip_data = irq_data_get_irq_chip_data(d);
+ void __iomem *base = chip_data->host_data->base;
+ u32 val;
+
+ val = readl_relaxed(base + reg);
+ val |= BIT(d->hwirq % IRQS_PER_BANK);
+ writel_relaxed(val, base + reg);
+
+ return val;
+}
+
+static inline u32 stm32_exti_clr_bit(struct irq_data *d, u32 reg)
+{
+ struct stm32_exti_chip_data *chip_data = irq_data_get_irq_chip_data(d);
+ void __iomem *base = chip_data->host_data->base;
+ u32 val;
+
+ val = readl_relaxed(base + reg);
+ val &= ~BIT(d->hwirq % IRQS_PER_BANK);
+ writel_relaxed(val, base + reg);
+
+ return val;
+}
+
+static void stm32_exti_h_eoi(struct irq_data *d)
+{
+ struct stm32_exti_chip_data *chip_data = irq_data_get_irq_chip_data(d);
+ const struct stm32_exti_bank *stm32_bank = chip_data->reg_bank;
+
+ raw_spin_lock(&chip_data->rlock);
+
+ stm32_exti_set_bit(d, stm32_bank->rpr_ofst);
+ if (stm32_bank->fpr_ofst != UNDEF_REG)
+ stm32_exti_set_bit(d, stm32_bank->fpr_ofst);
+
+ raw_spin_unlock(&chip_data->rlock);
+
+ if (d->parent_data->chip)
+ irq_chip_eoi_parent(d);
+}
+
+static void stm32_exti_h_mask(struct irq_data *d)
+{
+ struct stm32_exti_chip_data *chip_data = irq_data_get_irq_chip_data(d);
+ const struct stm32_exti_bank *stm32_bank = chip_data->reg_bank;
+
+ raw_spin_lock(&chip_data->rlock);
+ chip_data->mask_cache = stm32_exti_clr_bit(d, stm32_bank->imr_ofst);
+ raw_spin_unlock(&chip_data->rlock);
+
+ if (d->parent_data->chip)
+ irq_chip_mask_parent(d);
+}
+
+static void stm32_exti_h_unmask(struct irq_data *d)
+{
+ struct stm32_exti_chip_data *chip_data = irq_data_get_irq_chip_data(d);
+ const struct stm32_exti_bank *stm32_bank = chip_data->reg_bank;
+
+ raw_spin_lock(&chip_data->rlock);
+ chip_data->mask_cache = stm32_exti_set_bit(d, stm32_bank->imr_ofst);
+ raw_spin_unlock(&chip_data->rlock);
+
+ if (d->parent_data->chip)
+ irq_chip_unmask_parent(d);
+}
+
+static int stm32_exti_h_set_type(struct irq_data *d, unsigned int type)
+{
+ struct stm32_exti_chip_data *chip_data = irq_data_get_irq_chip_data(d);
+ const struct stm32_exti_bank *stm32_bank = chip_data->reg_bank;
+ void __iomem *base = chip_data->host_data->base;
+ u32 rtsr, ftsr;
+ int err;
+
+ raw_spin_lock(&chip_data->rlock);
+ rtsr = readl_relaxed(base + stm32_bank->rtsr_ofst);
+ ftsr = readl_relaxed(base + stm32_bank->ftsr_ofst);
+
+ err = stm32_exti_set_type(d, type, &rtsr, &ftsr);
+ if (err) {
+ raw_spin_unlock(&chip_data->rlock);
+ return err;
+ }
+
+ writel_relaxed(rtsr, base + stm32_bank->rtsr_ofst);
+ writel_relaxed(ftsr, base + stm32_bank->ftsr_ofst);
+ raw_spin_unlock(&chip_data->rlock);
+
+ return 0;
+}
+
+static int stm32_exti_h_set_wake(struct irq_data *d, unsigned int on)
+{
+ struct stm32_exti_chip_data *chip_data = irq_data_get_irq_chip_data(d);
+ u32 mask = BIT(d->hwirq % IRQS_PER_BANK);
+
+ raw_spin_lock(&chip_data->rlock);
+
+ if (on)
+ chip_data->wake_active |= mask;
+ else
+ chip_data->wake_active &= ~mask;
+
+ raw_spin_unlock(&chip_data->rlock);
+
+ return 0;
+}
+
+static int stm32_exti_h_set_affinity(struct irq_data *d,
+ const struct cpumask *dest, bool force)
+{
+ if (d->parent_data->chip)
+ return irq_chip_set_affinity_parent(d, dest, force);
+
+ return -EINVAL;
+}
+
+#ifdef CONFIG_PM
+static int stm32_exti_h_suspend(void)
+{
+ struct stm32_exti_chip_data *chip_data;
+ int i;
+
+ for (i = 0; i < stm32_host_data->drv_data->bank_nr; i++) {
+ chip_data = &stm32_host_data->chips_data[i];
+ raw_spin_lock(&chip_data->rlock);
+ stm32_chip_suspend(chip_data, chip_data->wake_active);
+ raw_spin_unlock(&chip_data->rlock);
+ }
+
+ return 0;
+}
+
+static void stm32_exti_h_resume(void)
+{
+ struct stm32_exti_chip_data *chip_data;
+ int i;
+
+ for (i = 0; i < stm32_host_data->drv_data->bank_nr; i++) {
+ chip_data = &stm32_host_data->chips_data[i];
+ raw_spin_lock(&chip_data->rlock);
+ stm32_chip_resume(chip_data, chip_data->mask_cache);
+ raw_spin_unlock(&chip_data->rlock);
+ }
+}
+
+static struct syscore_ops stm32_exti_h_syscore_ops = {
+ .suspend = stm32_exti_h_suspend,
+ .resume = stm32_exti_h_resume,
+};
+
+static void stm32_exti_h_syscore_init(void)
+{
+ register_syscore_ops(&stm32_exti_h_syscore_ops);
+}
+#else
+static inline void stm32_exti_h_syscore_init(void) {}
+#endif
+
+static struct irq_chip stm32_exti_h_chip = {
+ .name = "stm32-exti-h",
+ .irq_eoi = stm32_exti_h_eoi,
+ .irq_mask = stm32_exti_h_mask,
+ .irq_unmask = stm32_exti_h_unmask,
+ .irq_retrigger = irq_chip_retrigger_hierarchy,
+ .irq_set_type = stm32_exti_h_set_type,
+ .irq_set_wake = stm32_exti_h_set_wake,
+ .flags = IRQCHIP_MASK_ON_SUSPEND,
+#ifdef CONFIG_SMP
+ .irq_set_affinity = stm32_exti_h_set_affinity,
+#endif
+};
+
+static int stm32_exti_h_domain_alloc(struct irq_domain *dm,
+ unsigned int virq,
+ unsigned int nr_irqs, void *data)
+{
+ struct stm32_exti_host_data *host_data = dm->host_data;
+ struct stm32_exti_chip_data *chip_data;
+ struct irq_fwspec *fwspec = data;
+ struct irq_fwspec p_fwspec;
+ irq_hw_number_t hwirq;
+ int p_irq, bank;
+
+ hwirq = fwspec->param[0];
+ bank = hwirq / IRQS_PER_BANK;
+ chip_data = &host_data->chips_data[bank];
+
+ irq_domain_set_hwirq_and_chip(dm, virq, hwirq,
+ &stm32_exti_h_chip, chip_data);
+
+ p_irq = stm32_exti_to_irq(host_data->drv_data, hwirq);
+ if (p_irq >= 0) {
+ p_fwspec.fwnode = dm->parent->fwnode;
+ p_fwspec.param_count = 3;
+ p_fwspec.param[0] = GIC_SPI;
+ p_fwspec.param[1] = p_irq;
+ p_fwspec.param[2] = IRQ_TYPE_LEVEL_HIGH;
+
+ return irq_domain_alloc_irqs_parent(dm, virq, 1, &p_fwspec);
+ }
+
+ return 0;
+}
+
+static struct
+stm32_exti_host_data *stm32_exti_host_init(const struct stm32_exti_drv_data *dd,
+ struct device_node *node)
+{
+ struct stm32_exti_host_data *host_data;
+
+ host_data = kzalloc(sizeof(*host_data), GFP_KERNEL);
+ if (!host_data)
+ return NULL;
+
+ host_data->drv_data = dd;
+ host_data->chips_data = kcalloc(dd->bank_nr,
+ sizeof(struct stm32_exti_chip_data),
+ GFP_KERNEL);
+ if (!host_data->chips_data)
+ return NULL;
+
+ host_data->base = of_iomap(node, 0);
+ if (!host_data->base) {
+ pr_err("%pOF: Unable to map registers\n", node);
+ return NULL;
+ }
+
+ stm32_host_data = host_data;
+
+ return host_data;
+}
+
+static struct
+stm32_exti_chip_data *stm32_exti_chip_init(struct stm32_exti_host_data *h_data,
+ u32 bank_idx,
+ struct device_node *node)
+{
+ const struct stm32_exti_bank *stm32_bank;
+ struct stm32_exti_chip_data *chip_data;
+ void __iomem *base = h_data->base;
+ u32 irqs_mask;
+
+ stm32_bank = h_data->drv_data->exti_banks[bank_idx];
+ chip_data = &h_data->chips_data[bank_idx];
+ chip_data->host_data = h_data;
+ chip_data->reg_bank = stm32_bank;
+
+ raw_spin_lock_init(&chip_data->rlock);
+
+ /* Determine number of irqs supported */
+ writel_relaxed(~0UL, base + stm32_bank->rtsr_ofst);
+ irqs_mask = readl_relaxed(base + stm32_bank->rtsr_ofst);
+
+ /*
+ * This IP has no reset, so after hot reboot we should
+ * clear registers to avoid residue
+ */
+ writel_relaxed(0, base + stm32_bank->imr_ofst);
+ writel_relaxed(0, base + stm32_bank->emr_ofst);
+ writel_relaxed(0, base + stm32_bank->rtsr_ofst);
+ writel_relaxed(0, base + stm32_bank->ftsr_ofst);
+ writel_relaxed(~0UL, base + stm32_bank->rpr_ofst);
+ if (stm32_bank->fpr_ofst != UNDEF_REG)
+ writel_relaxed(~0UL, base + stm32_bank->fpr_ofst);
+
+ pr_info("%s: bank%d, External IRQs available:%#x\n",
+ node->full_name, bank_idx, irqs_mask);
+
+ return chip_data;
+}
+
+static int __init stm32_exti_init(const struct stm32_exti_drv_data *drv_data,
+ struct device_node *node)
+{
+ struct stm32_exti_host_data *host_data;
unsigned int clr = IRQ_NOREQUEST | IRQ_NOPROBE | IRQ_NOAUTOEN;
- int nr_irqs, nr_exti, ret, i;
+ int nr_irqs, ret, i;
struct irq_chip_generic *gc;
struct irq_domain *domain;
- void *base;
- base = of_iomap(node, 0);
- if (!base) {
- pr_err("%pOF: Unable to map registers\n", node);
- return -ENOMEM;
+ host_data = stm32_exti_host_init(drv_data, node);
+ if (!host_data) {
+ ret = -ENOMEM;
+ goto out_free_mem;
}
- domain = irq_domain_add_linear(node, bank_nr * IRQS_PER_BANK,
+ domain = irq_domain_add_linear(node, drv_data->bank_nr * IRQS_PER_BANK,
&irq_exti_domain_ops, NULL);
if (!domain) {
pr_err("%s: Could not register interrupt domain.\n",
@@ -234,44 +684,32 @@ __init stm32_exti_init(const struct stm32_exti_bank **stm32_exti_banks,
handle_edge_irq, clr, 0, 0);
if (ret) {
pr_err("%pOF: Could not allocate generic interrupt chip.\n",
- node);
+ node);
goto out_free_domain;
}
- for (i = 0; i < bank_nr; i++) {
- const struct stm32_exti_bank *stm32_bank = stm32_exti_banks[i];
- u32 irqs_mask;
+ for (i = 0; i < drv_data->bank_nr; i++) {
+ const struct stm32_exti_bank *stm32_bank;
+ struct stm32_exti_chip_data *chip_data;
+
+ stm32_bank = drv_data->exti_banks[i];
+ chip_data = stm32_exti_chip_init(host_data, i, node);
gc = irq_get_domain_generic_chip(domain, i * IRQS_PER_BANK);
- gc->reg_base = base;
+ gc->reg_base = host_data->base;
gc->chip_types->type = IRQ_TYPE_EDGE_BOTH;
- gc->chip_types->chip.irq_ack = irq_gc_ack_set_bit;
+ gc->chip_types->chip.irq_ack = stm32_irq_ack;
gc->chip_types->chip.irq_mask = irq_gc_mask_clr_bit;
gc->chip_types->chip.irq_unmask = irq_gc_mask_set_bit;
gc->chip_types->chip.irq_set_type = stm32_irq_set_type;
- gc->chip_types->chip.irq_set_wake = stm32_irq_set_wake;
- gc->chip_types->regs.ack = stm32_bank->pr_ofst;
+ gc->chip_types->chip.irq_set_wake = irq_gc_set_wake;
+ gc->suspend = stm32_irq_suspend;
+ gc->resume = stm32_irq_resume;
+ gc->wake_enabled = IRQ_MSK(IRQS_PER_BANK);
+
gc->chip_types->regs.mask = stm32_bank->imr_ofst;
- gc->private = (void *)stm32_bank;
-
- /* Determine number of irqs supported */
- writel_relaxed(~0UL, base + stm32_bank->rtsr_ofst);
- irqs_mask = readl_relaxed(base + stm32_bank->rtsr_ofst);
- nr_exti = fls(readl_relaxed(base + stm32_bank->rtsr_ofst));
-
- /*
- * This IP has no reset, so after hot reboot we should
- * clear registers to avoid residue
- */
- writel_relaxed(0, base + stm32_bank->imr_ofst);
- writel_relaxed(0, base + stm32_bank->emr_ofst);
- writel_relaxed(0, base + stm32_bank->rtsr_ofst);
- writel_relaxed(0, base + stm32_bank->ftsr_ofst);
- writel_relaxed(~0UL, base + stm32_bank->pr_ofst);
-
- pr_info("%s: bank%d, External IRQs available:%#x\n",
- node->full_name, i, irqs_mask);
+ gc->private = (void *)chip_data;
}
nr_irqs = of_irq_count(node);
@@ -287,15 +725,69 @@ __init stm32_exti_init(const struct stm32_exti_bank **stm32_exti_banks,
out_free_domain:
irq_domain_remove(domain);
out_unmap:
- iounmap(base);
+ iounmap(host_data->base);
+out_free_mem:
+ kfree(host_data->chips_data);
+ kfree(host_data);
+ return ret;
+}
+
+static const struct irq_domain_ops stm32_exti_h_domain_ops = {
+ .alloc = stm32_exti_h_domain_alloc,
+ .free = irq_domain_free_irqs_common,
+};
+
+static int
+__init stm32_exti_hierarchy_init(const struct stm32_exti_drv_data *drv_data,
+ struct device_node *node,
+ struct device_node *parent)
+{
+ struct irq_domain *parent_domain, *domain;
+ struct stm32_exti_host_data *host_data;
+ int ret, i;
+
+ parent_domain = irq_find_host(parent);
+ if (!parent_domain) {
+ pr_err("interrupt-parent not found\n");
+ return -EINVAL;
+ }
+
+ host_data = stm32_exti_host_init(drv_data, node);
+ if (!host_data) {
+ ret = -ENOMEM;
+ goto out_free_mem;
+ }
+
+ for (i = 0; i < drv_data->bank_nr; i++)
+ stm32_exti_chip_init(host_data, i, node);
+
+ domain = irq_domain_add_hierarchy(parent_domain, 0,
+ drv_data->bank_nr * IRQS_PER_BANK,
+ node, &stm32_exti_h_domain_ops,
+ host_data);
+
+ if (!domain) {
+ pr_err("%s: Could not register exti domain.\n", node->name);
+ ret = -ENOMEM;
+ goto out_unmap;
+ }
+
+ stm32_exti_h_syscore_init();
+
+ return 0;
+
+out_unmap:
+ iounmap(host_data->base);
+out_free_mem:
+ kfree(host_data->chips_data);
+ kfree(host_data);
return ret;
}
static int __init stm32f4_exti_of_init(struct device_node *np,
struct device_node *parent)
{
- return stm32_exti_init(stm32f4xx_exti_banks,
- ARRAY_SIZE(stm32f4xx_exti_banks), np);
+ return stm32_exti_init(&stm32f4xx_drv_data, np);
}
IRQCHIP_DECLARE(stm32f4_exti, "st,stm32-exti", stm32f4_exti_of_init);
@@ -303,8 +795,15 @@ IRQCHIP_DECLARE(stm32f4_exti, "st,stm32-exti", stm32f4_exti_of_init);
static int __init stm32h7_exti_of_init(struct device_node *np,
struct device_node *parent)
{
- return stm32_exti_init(stm32h7xx_exti_banks,
- ARRAY_SIZE(stm32h7xx_exti_banks), np);
+ return stm32_exti_init(&stm32h7xx_drv_data, np);
}
IRQCHIP_DECLARE(stm32h7_exti, "st,stm32h7-exti", stm32h7_exti_of_init);
+
+static int __init stm32mp1_exti_of_init(struct device_node *np,
+ struct device_node *parent)
+{
+ return stm32_exti_hierarchy_init(&stm32mp1_drv_data, np, parent);
+}
+
+IRQCHIP_DECLARE(stm32mp1_exti, "st,stm32mp1-exti", stm32mp1_exti_of_init);
diff --git a/drivers/irqchip/qcom-irq-combiner.c b/drivers/irqchip/qcom-irq-combiner.c
index f31265937439..7f0c0be322e0 100644
--- a/drivers/irqchip/qcom-irq-combiner.c
+++ b/drivers/irqchip/qcom-irq-combiner.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 2015-2016, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2015-2018, The Linux Foundation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@@ -68,7 +68,7 @@ static void combiner_handle_irq(struct irq_desc *desc)
bit = readl_relaxed(combiner->regs[reg].addr);
status = bit & combiner->regs[reg].enabled;
- if (!status)
+ if (bit && !status)
pr_warn_ratelimited("Unexpected IRQ on CPU%d: (%08x %08lx %p)\n",
smp_processor_id(), bit,
combiner->regs[reg].enabled,
diff --git a/drivers/isdn/capi/capi.c b/drivers/isdn/capi/capi.c
index 19cd93783c87..baa1ee2bc2ac 100644
--- a/drivers/isdn/capi/capi.c
+++ b/drivers/isdn/capi/capi.c
@@ -1340,19 +1340,6 @@ static int capi20_proc_show(struct seq_file *m, void *v)
return 0;
}
-static int capi20_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, capi20_proc_show, NULL);
-}
-
-static const struct file_operations capi20_proc_fops = {
- .owner = THIS_MODULE,
- .open = capi20_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
/*
* /proc/capi/capi20ncci:
* applid ncci
@@ -1373,23 +1360,10 @@ static int capi20ncci_proc_show(struct seq_file *m, void *v)
return 0;
}
-static int capi20ncci_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, capi20ncci_proc_show, NULL);
-}
-
-static const struct file_operations capi20ncci_proc_fops = {
- .owner = THIS_MODULE,
- .open = capi20ncci_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
static void __init proc_init(void)
{
- proc_create("capi/capi20", 0, NULL, &capi20_proc_fops);
- proc_create("capi/capi20ncci", 0, NULL, &capi20ncci_proc_fops);
+ proc_create_single("capi/capi20", 0, NULL, capi20_proc_show);
+ proc_create_single("capi/capi20ncci", 0, NULL, capi20ncci_proc_show);
}
static void __exit proc_exit(void)
diff --git a/drivers/isdn/capi/capidrv.c b/drivers/isdn/capi/capidrv.c
index 49fef08858c5..7ac51798949d 100644
--- a/drivers/isdn/capi/capidrv.c
+++ b/drivers/isdn/capi/capidrv.c
@@ -2460,22 +2460,9 @@ static int capidrv_proc_show(struct seq_file *m, void *v)
return 0;
}
-static int capidrv_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, capidrv_proc_show, NULL);
-}
-
-static const struct file_operations capidrv_proc_fops = {
- .owner = THIS_MODULE,
- .open = capidrv_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
static void __init proc_init(void)
{
- proc_create("capi/capidrv", 0, NULL, &capidrv_proc_fops);
+ proc_create_single("capi/capidrv", 0, NULL, capidrv_proc_show);
}
static void __exit proc_exit(void)
diff --git a/drivers/isdn/capi/kcapi.c b/drivers/isdn/capi/kcapi.c
index 46c189ad8d94..0ff517d3c98f 100644
--- a/drivers/isdn/capi/kcapi.c
+++ b/drivers/isdn/capi/kcapi.c
@@ -534,7 +534,8 @@ int attach_capi_ctr(struct capi_ctr *ctr)
init_waitqueue_head(&ctr->state_wait_queue);
sprintf(ctr->procfn, "capi/controllers/%d", ctr->cnr);
- ctr->procent = proc_create_data(ctr->procfn, 0, NULL, ctr->proc_fops, ctr);
+ ctr->procent = proc_create_single_data(ctr->procfn, 0, NULL,
+ ctr->proc_show, ctr);
ncontrollers++;
diff --git a/drivers/isdn/capi/kcapi_proc.c b/drivers/isdn/capi/kcapi_proc.c
index 68db3c5a1063..c94bd12c0f7c 100644
--- a/drivers/isdn/capi/kcapi_proc.c
+++ b/drivers/isdn/capi/kcapi_proc.c
@@ -108,32 +108,6 @@ static const struct seq_operations seq_contrstats_ops = {
.show = contrstats_show,
};
-static int seq_controller_open(struct inode *inode, struct file *file)
-{
- return seq_open(file, &seq_controller_ops);
-}
-
-static int seq_contrstats_open(struct inode *inode, struct file *file)
-{
- return seq_open(file, &seq_contrstats_ops);
-}
-
-static const struct file_operations proc_controller_ops = {
- .owner = THIS_MODULE,
- .open = seq_controller_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
-static const struct file_operations proc_contrstats_ops = {
- .owner = THIS_MODULE,
- .open = seq_contrstats_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
// /proc/capi/applications:
// applid l3cnt dblkcnt dblklen #ncci recvqueuelen
// /proc/capi/applstats:
@@ -216,34 +190,6 @@ static const struct seq_operations seq_applstats_ops = {
.show = applstats_show,
};
-static int
-seq_applications_open(struct inode *inode, struct file *file)
-{
- return seq_open(file, &seq_applications_ops);
-}
-
-static int
-seq_applstats_open(struct inode *inode, struct file *file)
-{
- return seq_open(file, &seq_applstats_ops);
-}
-
-static const struct file_operations proc_applications_ops = {
- .owner = THIS_MODULE,
- .open = seq_applications_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
-static const struct file_operations proc_applstats_ops = {
- .owner = THIS_MODULE,
- .open = seq_applstats_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
// ---------------------------------------------------------------------------
static void *capi_driver_start(struct seq_file *seq, loff_t *pos)
@@ -279,22 +225,6 @@ static const struct seq_operations seq_capi_driver_ops = {
.show = capi_driver_show,
};
-static int
-seq_capi_driver_open(struct inode *inode, struct file *file)
-{
- int err;
- err = seq_open(file, &seq_capi_driver_ops);
- return err;
-}
-
-static const struct file_operations proc_driver_ops = {
- .owner = THIS_MODULE,
- .open = seq_capi_driver_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
// ---------------------------------------------------------------------------
void __init
@@ -302,11 +232,11 @@ kcapi_proc_init(void)
{
proc_mkdir("capi", NULL);
proc_mkdir("capi/controllers", NULL);
- proc_create("capi/controller", 0, NULL, &proc_controller_ops);
- proc_create("capi/contrstats", 0, NULL, &proc_contrstats_ops);
- proc_create("capi/applications", 0, NULL, &proc_applications_ops);
- proc_create("capi/applstats", 0, NULL, &proc_applstats_ops);
- proc_create("capi/driver", 0, NULL, &proc_driver_ops);
+ proc_create_seq("capi/controller", 0, NULL, &seq_controller_ops);
+ proc_create_seq("capi/contrstats", 0, NULL, &seq_contrstats_ops);
+ proc_create_seq("capi/applications", 0, NULL, &seq_applications_ops);
+ proc_create_seq("capi/applstats", 0, NULL, &seq_applstats_ops);
+ proc_create_seq("capi/driver", 0, NULL, &seq_capi_driver_ops);
}
void __exit
diff --git a/drivers/isdn/gigaset/capi.c b/drivers/isdn/gigaset/capi.c
index ccec7778cad2..56748af78c04 100644
--- a/drivers/isdn/gigaset/capi.c
+++ b/drivers/isdn/gigaset/capi.c
@@ -2437,19 +2437,6 @@ static int gigaset_proc_show(struct seq_file *m, void *v)
return 0;
}
-static int gigaset_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, gigaset_proc_show, PDE_DATA(inode));
-}
-
-static const struct file_operations gigaset_proc_fops = {
- .owner = THIS_MODULE,
- .open = gigaset_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
/**
* gigaset_isdn_regdev() - register device to LL
* @cs: device descriptor structure.
@@ -2479,7 +2466,7 @@ int gigaset_isdn_regdev(struct cardstate *cs, const char *isdnid)
iif->ctr.release_appl = gigaset_release_appl;
iif->ctr.send_message = gigaset_send_message;
iif->ctr.procinfo = gigaset_procinfo;
- iif->ctr.proc_fops = &gigaset_proc_fops;
+ iif->ctr.proc_show = gigaset_proc_show,
INIT_LIST_HEAD(&iif->appls);
skb_queue_head_init(&iif->sendqueue);
atomic_set(&iif->sendqlen, 0);
diff --git a/drivers/isdn/hardware/avm/avmcard.h b/drivers/isdn/hardware/avm/avmcard.h
index c95712dbfa9f..cdfa89c71997 100644
--- a/drivers/isdn/hardware/avm/avmcard.h
+++ b/drivers/isdn/hardware/avm/avmcard.h
@@ -556,7 +556,7 @@ u16 b1_send_message(struct capi_ctr *ctrl, struct sk_buff *skb);
void b1_parse_version(avmctrl_info *card);
irqreturn_t b1_interrupt(int interrupt, void *devptr);
-extern const struct file_operations b1ctl_proc_fops;
+int b1_proc_show(struct seq_file *m, void *v);
avmcard_dmainfo *avmcard_dma_alloc(char *name, struct pci_dev *,
long rsize, long ssize);
@@ -576,6 +576,6 @@ void b1dma_register_appl(struct capi_ctr *ctrl,
capi_register_params *rp);
void b1dma_release_appl(struct capi_ctr *ctrl, u16 appl);
u16 b1dma_send_message(struct capi_ctr *ctrl, struct sk_buff *skb);
-extern const struct file_operations b1dmactl_proc_fops;
+int b1dma_proc_show(struct seq_file *m, void *v);
#endif /* _AVMCARD_H_ */
diff --git a/drivers/isdn/hardware/avm/b1.c b/drivers/isdn/hardware/avm/b1.c
index b1833d08a5fe..5ee5489d3f15 100644
--- a/drivers/isdn/hardware/avm/b1.c
+++ b/drivers/isdn/hardware/avm/b1.c
@@ -637,7 +637,7 @@ irqreturn_t b1_interrupt(int interrupt, void *devptr)
}
/* ------------------------------------------------------------- */
-static int b1ctl_proc_show(struct seq_file *m, void *v)
+int b1_proc_show(struct seq_file *m, void *v)
{
struct capi_ctr *ctrl = m->private;
avmctrl_info *cinfo = (avmctrl_info *)(ctrl->driverdata);
@@ -699,20 +699,7 @@ static int b1ctl_proc_show(struct seq_file *m, void *v)
return 0;
}
-
-static int b1ctl_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, b1ctl_proc_show, PDE_DATA(inode));
-}
-
-const struct file_operations b1ctl_proc_fops = {
- .owner = THIS_MODULE,
- .open = b1ctl_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-EXPORT_SYMBOL(b1ctl_proc_fops);
+EXPORT_SYMBOL(b1_proc_show);
/* ------------------------------------------------------------- */
diff --git a/drivers/isdn/hardware/avm/b1dma.c b/drivers/isdn/hardware/avm/b1dma.c
index 9538a9e5e1a8..6a3dc9937ce5 100644
--- a/drivers/isdn/hardware/avm/b1dma.c
+++ b/drivers/isdn/hardware/avm/b1dma.c
@@ -858,7 +858,7 @@ u16 b1dma_send_message(struct capi_ctr *ctrl, struct sk_buff *skb)
/* ------------------------------------------------------------- */
-static int b1dmactl_proc_show(struct seq_file *m, void *v)
+int b1dma_proc_show(struct seq_file *m, void *v)
{
struct capi_ctr *ctrl = m->private;
avmctrl_info *cinfo = (avmctrl_info *)(ctrl->driverdata);
@@ -941,20 +941,7 @@ static int b1dmactl_proc_show(struct seq_file *m, void *v)
return 0;
}
-
-static int b1dmactl_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, b1dmactl_proc_show, PDE_DATA(inode));
-}
-
-const struct file_operations b1dmactl_proc_fops = {
- .owner = THIS_MODULE,
- .open = b1dmactl_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-EXPORT_SYMBOL(b1dmactl_proc_fops);
+EXPORT_SYMBOL(b1dma_proc_show);
/* ------------------------------------------------------------- */
diff --git a/drivers/isdn/hardware/avm/b1isa.c b/drivers/isdn/hardware/avm/b1isa.c
index 54e871a47387..cdfea72e0ef6 100644
--- a/drivers/isdn/hardware/avm/b1isa.c
+++ b/drivers/isdn/hardware/avm/b1isa.c
@@ -121,7 +121,7 @@ static int b1isa_probe(struct pci_dev *pdev)
cinfo->capi_ctrl.load_firmware = b1_load_firmware;
cinfo->capi_ctrl.reset_ctr = b1_reset_ctr;
cinfo->capi_ctrl.procinfo = b1isa_procinfo;
- cinfo->capi_ctrl.proc_fops = &b1ctl_proc_fops;
+ cinfo->capi_ctrl.proc_show = b1_proc_show;
strcpy(cinfo->capi_ctrl.name, card->name);
retval = attach_capi_ctr(&cinfo->capi_ctrl);
diff --git a/drivers/isdn/hardware/avm/b1pci.c b/drivers/isdn/hardware/avm/b1pci.c
index ac4863c2ecbc..b76b57a82c02 100644
--- a/drivers/isdn/hardware/avm/b1pci.c
+++ b/drivers/isdn/hardware/avm/b1pci.c
@@ -112,7 +112,7 @@ static int b1pci_probe(struct capicardparams *p, struct pci_dev *pdev)
cinfo->capi_ctrl.load_firmware = b1_load_firmware;
cinfo->capi_ctrl.reset_ctr = b1_reset_ctr;
cinfo->capi_ctrl.procinfo = b1pci_procinfo;
- cinfo->capi_ctrl.proc_fops = &b1ctl_proc_fops;
+ cinfo->capi_ctrl.proc_show = b1_proc_show;
strcpy(cinfo->capi_ctrl.name, card->name);
cinfo->capi_ctrl.owner = THIS_MODULE;
@@ -251,7 +251,7 @@ static int b1pciv4_probe(struct capicardparams *p, struct pci_dev *pdev)
cinfo->capi_ctrl.load_firmware = b1dma_load_firmware;
cinfo->capi_ctrl.reset_ctr = b1dma_reset_ctr;
cinfo->capi_ctrl.procinfo = b1pciv4_procinfo;
- cinfo->capi_ctrl.proc_fops = &b1dmactl_proc_fops;
+ cinfo->capi_ctrl.proc_show = b1dma_proc_show;
strcpy(cinfo->capi_ctrl.name, card->name);
retval = attach_capi_ctr(&cinfo->capi_ctrl);
diff --git a/drivers/isdn/hardware/avm/b1pcmcia.c b/drivers/isdn/hardware/avm/b1pcmcia.c
index 6b0d19d963d5..3aca16e62902 100644
--- a/drivers/isdn/hardware/avm/b1pcmcia.c
+++ b/drivers/isdn/hardware/avm/b1pcmcia.c
@@ -108,7 +108,7 @@ static int b1pcmcia_add_card(unsigned int port, unsigned irq,
cinfo->capi_ctrl.load_firmware = b1_load_firmware;
cinfo->capi_ctrl.reset_ctr = b1_reset_ctr;
cinfo->capi_ctrl.procinfo = b1pcmcia_procinfo;
- cinfo->capi_ctrl.proc_fops = &b1ctl_proc_fops;
+ cinfo->capi_ctrl.proc_show = b1_proc_show;
strcpy(cinfo->capi_ctrl.name, card->name);
retval = attach_capi_ctr(&cinfo->capi_ctrl);
diff --git a/drivers/isdn/hardware/avm/c4.c b/drivers/isdn/hardware/avm/c4.c
index 034cabac699d..ac72cd204c4d 100644
--- a/drivers/isdn/hardware/avm/c4.c
+++ b/drivers/isdn/hardware/avm/c4.c
@@ -1127,19 +1127,6 @@ static int c4_proc_show(struct seq_file *m, void *v)
return 0;
}
-static int c4_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, c4_proc_show, PDE_DATA(inode));
-}
-
-static const struct file_operations c4_proc_fops = {
- .owner = THIS_MODULE,
- .open = c4_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
/* ------------------------------------------------------------- */
static int c4_add_card(struct capicardparams *p, struct pci_dev *dev,
@@ -1211,7 +1198,7 @@ static int c4_add_card(struct capicardparams *p, struct pci_dev *dev,
cinfo->capi_ctrl.load_firmware = c4_load_firmware;
cinfo->capi_ctrl.reset_ctr = c4_reset_ctr;
cinfo->capi_ctrl.procinfo = c4_procinfo;
- cinfo->capi_ctrl.proc_fops = &c4_proc_fops;
+ cinfo->capi_ctrl.proc_show = c4_proc_show;
strcpy(cinfo->capi_ctrl.name, card->name);
retval = attach_capi_ctr(&cinfo->capi_ctrl);
diff --git a/drivers/isdn/hardware/avm/t1isa.c b/drivers/isdn/hardware/avm/t1isa.c
index 9f80d20ced87..2153619c5b31 100644
--- a/drivers/isdn/hardware/avm/t1isa.c
+++ b/drivers/isdn/hardware/avm/t1isa.c
@@ -430,7 +430,7 @@ static int t1isa_probe(struct pci_dev *pdev, int cardnr)
cinfo->capi_ctrl.load_firmware = t1isa_load_firmware;
cinfo->capi_ctrl.reset_ctr = t1isa_reset_ctr;
cinfo->capi_ctrl.procinfo = t1isa_procinfo;
- cinfo->capi_ctrl.proc_fops = &b1ctl_proc_fops;
+ cinfo->capi_ctrl.proc_show = b1_proc_show;
strcpy(cinfo->capi_ctrl.name, card->name);
retval = attach_capi_ctr(&cinfo->capi_ctrl);
diff --git a/drivers/isdn/hardware/avm/t1pci.c b/drivers/isdn/hardware/avm/t1pci.c
index 2180b1685691..f5ed1d5004c9 100644
--- a/drivers/isdn/hardware/avm/t1pci.c
+++ b/drivers/isdn/hardware/avm/t1pci.c
@@ -119,7 +119,7 @@ static int t1pci_add_card(struct capicardparams *p, struct pci_dev *pdev)
cinfo->capi_ctrl.load_firmware = b1dma_load_firmware;
cinfo->capi_ctrl.reset_ctr = b1dma_reset_ctr;
cinfo->capi_ctrl.procinfo = t1pci_procinfo;
- cinfo->capi_ctrl.proc_fops = &b1dmactl_proc_fops;
+ cinfo->capi_ctrl.proc_show = b1dma_proc_show;
strcpy(cinfo->capi_ctrl.name, card->name);
retval = attach_capi_ctr(&cinfo->capi_ctrl);
diff --git a/drivers/isdn/hardware/eicon/capimain.c b/drivers/isdn/hardware/eicon/capimain.c
index be36d82004d6..f9244dc1c3c9 100644
--- a/drivers/isdn/hardware/eicon/capimain.c
+++ b/drivers/isdn/hardware/eicon/capimain.c
@@ -90,19 +90,6 @@ static int diva_ctl_proc_show(struct seq_file *m, void *v)
return 0;
}
-static int diva_ctl_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, diva_ctl_proc_show, NULL);
-}
-
-static const struct file_operations diva_ctl_proc_fops = {
- .owner = THIS_MODULE,
- .open = diva_ctl_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
/*
* set additional os settings in capi_ctr struct
*/
@@ -111,7 +98,7 @@ void diva_os_set_controller_struct(struct capi_ctr *ctrl)
ctrl->driver_name = DRIVERLNAME;
ctrl->load_firmware = NULL;
ctrl->reset_ctr = NULL;
- ctrl->proc_fops = &diva_ctl_proc_fops;
+ ctrl->proc_show = diva_ctl_proc_show;
ctrl->owner = THIS_MODULE;
}
diff --git a/drivers/isdn/hardware/eicon/diva.c b/drivers/isdn/hardware/eicon/diva.c
index 944a7f338099..1b25d8bc153a 100644
--- a/drivers/isdn/hardware/eicon/diva.c
+++ b/drivers/isdn/hardware/eicon/diva.c
@@ -388,10 +388,10 @@ void divasa_xdi_driver_unload(void)
** Receive and process command from user mode utility
*/
void *diva_xdi_open_adapter(void *os_handle, const void __user *src,
- int length,
+ int length, void *mptr,
divas_xdi_copy_from_user_fn_t cp_fn)
{
- diva_xdi_um_cfg_cmd_t msg;
+ diva_xdi_um_cfg_cmd_t *msg = (diva_xdi_um_cfg_cmd_t *)mptr;
diva_os_xdi_adapter_t *a = NULL;
diva_os_spin_lock_magic_t old_irql;
struct list_head *tmp;
@@ -401,21 +401,21 @@ void *diva_xdi_open_adapter(void *os_handle, const void __user *src,
length, sizeof(diva_xdi_um_cfg_cmd_t)))
return NULL;
}
- if ((*cp_fn) (os_handle, &msg, src, sizeof(msg)) <= 0) {
+ if ((*cp_fn) (os_handle, msg, src, sizeof(*msg)) <= 0) {
DBG_ERR(("A: A(?) open, write error"))
return NULL;
}
diva_os_enter_spin_lock(&adapter_lock, &old_irql, "open_adapter");
list_for_each(tmp, &adapter_queue) {
a = list_entry(tmp, diva_os_xdi_adapter_t, link);
- if (a->controller == (int)msg.adapter)
+ if (a->controller == (int)msg->adapter)
break;
a = NULL;
}
diva_os_leave_spin_lock(&adapter_lock, &old_irql, "open_adapter");
if (!a) {
- DBG_ERR(("A: A(%d) open, adapter not found", msg.adapter))
+ DBG_ERR(("A: A(%d) open, adapter not found", msg->adapter))
}
return (a);
@@ -437,8 +437,10 @@ void diva_xdi_close_adapter(void *adapter, void *os_handle)
int
diva_xdi_write(void *adapter, void *os_handle, const void __user *src,
- int length, divas_xdi_copy_from_user_fn_t cp_fn)
+ int length, void *mptr,
+ divas_xdi_copy_from_user_fn_t cp_fn)
{
+ diva_xdi_um_cfg_cmd_t *msg = (diva_xdi_um_cfg_cmd_t *)mptr;
diva_os_xdi_adapter_t *a = (diva_os_xdi_adapter_t *) adapter;
void *data;
@@ -459,7 +461,13 @@ diva_xdi_write(void *adapter, void *os_handle, const void __user *src,
return (-2);
}
- length = (*cp_fn) (os_handle, data, src, length);
+ if (msg) {
+ *(diva_xdi_um_cfg_cmd_t *)data = *msg;
+ length = (*cp_fn) (os_handle, (char *)data + sizeof(*msg),
+ src + sizeof(*msg), length - sizeof(*msg));
+ } else {
+ length = (*cp_fn) (os_handle, data, src, length);
+ }
if (length > 0) {
if ((*(a->interface.cmd_proc))
(a, (diva_xdi_um_cfg_cmd_t *) data, length)) {
diff --git a/drivers/isdn/hardware/eicon/diva.h b/drivers/isdn/hardware/eicon/diva.h
index b067032093a8..1ad76650fbf9 100644
--- a/drivers/isdn/hardware/eicon/diva.h
+++ b/drivers/isdn/hardware/eicon/diva.h
@@ -20,10 +20,11 @@ int diva_xdi_read(void *adapter, void *os_handle, void __user *dst,
int max_length, divas_xdi_copy_to_user_fn_t cp_fn);
int diva_xdi_write(void *adapter, void *os_handle, const void __user *src,
- int length, divas_xdi_copy_from_user_fn_t cp_fn);
+ int length, void *msg,
+ divas_xdi_copy_from_user_fn_t cp_fn);
void *diva_xdi_open_adapter(void *os_handle, const void __user *src,
- int length,
+ int length, void *msg,
divas_xdi_copy_from_user_fn_t cp_fn);
void diva_xdi_close_adapter(void *adapter, void *os_handle);
diff --git a/drivers/isdn/hardware/eicon/diva_didd.c b/drivers/isdn/hardware/eicon/diva_didd.c
index fab6ccfb00d5..60e79257dd5f 100644
--- a/drivers/isdn/hardware/eicon/diva_didd.c
+++ b/drivers/isdn/hardware/eicon/diva_didd.c
@@ -78,26 +78,13 @@ static int divadidd_proc_show(struct seq_file *m, void *v)
return 0;
}
-static int divadidd_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, divadidd_proc_show, NULL);
-}
-
-static const struct file_operations divadidd_proc_fops = {
- .owner = THIS_MODULE,
- .open = divadidd_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
static int __init create_proc(void)
{
proc_net_eicon = proc_mkdir("eicon", init_net.proc_net);
if (proc_net_eicon) {
- proc_didd = proc_create(DRIVERLNAME, S_IRUGO, proc_net_eicon,
- &divadidd_proc_fops);
+ proc_didd = proc_create_single(DRIVERLNAME, S_IRUGO,
+ proc_net_eicon, divadidd_proc_show);
return (1);
}
return (0);
diff --git a/drivers/isdn/hardware/eicon/divasi.c b/drivers/isdn/hardware/eicon/divasi.c
index 525518c945fe..e7081e0c0e35 100644
--- a/drivers/isdn/hardware/eicon/divasi.c
+++ b/drivers/isdn/hardware/eicon/divasi.c
@@ -101,23 +101,10 @@ static int um_idi_proc_show(struct seq_file *m, void *v)
return 0;
}
-static int um_idi_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, um_idi_proc_show, NULL);
-}
-
-static const struct file_operations um_idi_proc_fops = {
- .owner = THIS_MODULE,
- .open = um_idi_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
static int __init create_um_idi_proc(void)
{
- um_idi_proc_entry = proc_create(DRIVERLNAME, S_IRUGO, proc_net_eicon,
- &um_idi_proc_fops);
+ um_idi_proc_entry = proc_create_single(DRIVERLNAME, S_IRUGO,
+ proc_net_eicon, um_idi_proc_show);
if (!um_idi_proc_entry)
return (0);
return (1);
diff --git a/drivers/isdn/hardware/eicon/divasmain.c b/drivers/isdn/hardware/eicon/divasmain.c
index b9980e84f9db..b6a3950b2564 100644
--- a/drivers/isdn/hardware/eicon/divasmain.c
+++ b/drivers/isdn/hardware/eicon/divasmain.c
@@ -591,19 +591,22 @@ static int divas_release(struct inode *inode, struct file *file)
static ssize_t divas_write(struct file *file, const char __user *buf,
size_t count, loff_t *ppos)
{
+ diva_xdi_um_cfg_cmd_t msg;
int ret = -EINVAL;
if (!file->private_data) {
file->private_data = diva_xdi_open_adapter(file, buf,
- count,
+ count, &msg,
xdi_copy_from_user);
- }
- if (!file->private_data) {
- return (-ENODEV);
+ if (!file->private_data)
+ return (-ENODEV);
+ ret = diva_xdi_write(file->private_data, file,
+ buf, count, &msg, xdi_copy_from_user);
+ } else {
+ ret = diva_xdi_write(file->private_data, file,
+ buf, count, NULL, xdi_copy_from_user);
}
- ret = diva_xdi_write(file->private_data, file,
- buf, count, xdi_copy_from_user);
switch (ret) {
case -1: /* Message should be removed from rx mailbox first */
ret = -EBUSY;
@@ -622,11 +625,12 @@ static ssize_t divas_write(struct file *file, const char __user *buf,
static ssize_t divas_read(struct file *file, char __user *buf,
size_t count, loff_t *ppos)
{
+ diva_xdi_um_cfg_cmd_t msg;
int ret = -EINVAL;
if (!file->private_data) {
file->private_data = diva_xdi_open_adapter(file, buf,
- count,
+ count, &msg,
xdi_copy_from_user);
}
if (!file->private_data) {
diff --git a/drivers/isdn/hysdn/hycapi.c b/drivers/isdn/hysdn/hycapi.c
index eac0f51a0f60..a2c15cd7bf67 100644
--- a/drivers/isdn/hysdn/hycapi.c
+++ b/drivers/isdn/hysdn/hycapi.c
@@ -467,19 +467,6 @@ static int hycapi_proc_show(struct seq_file *m, void *v)
return 0;
}
-static int hycapi_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, hycapi_proc_show, PDE_DATA(inode));
-}
-
-static const struct file_operations hycapi_proc_fops = {
- .owner = THIS_MODULE,
- .open = hycapi_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
/**************************************************************
hycapi_load_firmware
@@ -774,7 +761,7 @@ hycapi_capi_create(hysdn_card *card)
ctrl->load_firmware = hycapi_load_firmware;
ctrl->reset_ctr = hycapi_reset_ctr;
ctrl->procinfo = hycapi_procinfo;
- ctrl->proc_fops = &hycapi_proc_fops;
+ ctrl->proc_show = hycapi_proc_show;
strcpy(ctrl->name, cinfo->cardname);
ctrl->owner = THIS_MODULE;
diff --git a/drivers/isdn/mISDN/socket.c b/drivers/isdn/mISDN/socket.c
index 1f8f489b4167..98f90aadd141 100644
--- a/drivers/isdn/mISDN/socket.c
+++ b/drivers/isdn/mISDN/socket.c
@@ -588,7 +588,7 @@ static const struct proto_ops data_sock_ops = {
.getname = data_sock_getname,
.sendmsg = mISDN_sock_sendmsg,
.recvmsg = mISDN_sock_recvmsg,
- .poll = datagram_poll,
+ .poll_mask = datagram_poll_mask,
.listen = sock_no_listen,
.shutdown = sock_no_shutdown,
.setsockopt = data_sock_setsockopt,
@@ -745,7 +745,6 @@ static const struct proto_ops base_sock_ops = {
.getname = sock_no_getname,
.sendmsg = sock_no_sendmsg,
.recvmsg = sock_no_recvmsg,
- .poll = sock_no_poll,
.listen = sock_no_listen,
.shutdown = sock_no_shutdown,
.setsockopt = sock_no_setsockopt,
diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c
index 63171cdce270..60aa7bc5a630 100644
--- a/drivers/lightnvm/core.c
+++ b/drivers/lightnvm/core.c
@@ -431,7 +431,7 @@ static int nvm_create_tgt(struct nvm_dev *dev, struct nvm_ioctl_create *create)
return 0;
err_sysfs:
if (tt->exit)
- tt->exit(targetdata);
+ tt->exit(targetdata, true);
err_init:
blk_cleanup_queue(tqueue);
tdisk->queue = NULL;
@@ -446,7 +446,7 @@ err_reserve:
return ret;
}
-static void __nvm_remove_target(struct nvm_target *t)
+static void __nvm_remove_target(struct nvm_target *t, bool graceful)
{
struct nvm_tgt_type *tt = t->type;
struct gendisk *tdisk = t->disk;
@@ -459,7 +459,7 @@ static void __nvm_remove_target(struct nvm_target *t)
tt->sysfs_exit(tdisk);
if (tt->exit)
- tt->exit(tdisk->private_data);
+ tt->exit(tdisk->private_data, graceful);
nvm_remove_tgt_dev(t->dev, 1);
put_disk(tdisk);
@@ -489,7 +489,7 @@ static int nvm_remove_tgt(struct nvm_dev *dev, struct nvm_ioctl_remove *remove)
mutex_unlock(&dev->mlock);
return 1;
}
- __nvm_remove_target(t);
+ __nvm_remove_target(t, true);
mutex_unlock(&dev->mlock);
return 0;
@@ -963,7 +963,7 @@ void nvm_unregister(struct nvm_dev *dev)
list_for_each_entry_safe(t, tmp, &dev->targets, list) {
if (t->dev->parent != dev)
continue;
- __nvm_remove_target(t);
+ __nvm_remove_target(t, false);
}
mutex_unlock(&dev->mlock);
diff --git a/drivers/lightnvm/pblk-cache.c b/drivers/lightnvm/pblk-cache.c
index 29a23111b31c..b1c6d7eb6115 100644
--- a/drivers/lightnvm/pblk-cache.c
+++ b/drivers/lightnvm/pblk-cache.c
@@ -44,13 +44,15 @@ retry:
goto out;
}
- if (unlikely(!bio_has_data(bio)))
- goto out;
-
pblk_ppa_set_empty(&w_ctx.ppa);
w_ctx.flags = flags;
- if (bio->bi_opf & REQ_PREFLUSH)
+ if (bio->bi_opf & REQ_PREFLUSH) {
w_ctx.flags |= PBLK_FLUSH_ENTRY;
+ pblk_write_kick(pblk);
+ }
+
+ if (unlikely(!bio_has_data(bio)))
+ goto out;
for (i = 0; i < nr_entries; i++) {
void *data = bio_data(bio);
diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c
index 94d5d97c9d8a..ed9cc977c8b3 100644
--- a/drivers/lightnvm/pblk-core.c
+++ b/drivers/lightnvm/pblk-core.c
@@ -40,7 +40,7 @@ static void pblk_line_mark_bb(struct work_struct *work)
}
kfree(ppa);
- mempool_free(line_ws, pblk->gen_ws_pool);
+ mempool_free(line_ws, &pblk->gen_ws_pool);
}
static void pblk_mark_bb(struct pblk *pblk, struct pblk_line *line,
@@ -102,7 +102,7 @@ static void pblk_end_io_erase(struct nvm_rq *rqd)
struct pblk *pblk = rqd->private;
__pblk_end_io_erase(pblk, rqd);
- mempool_free(rqd, pblk->e_rq_pool);
+ mempool_free(rqd, &pblk->e_rq_pool);
}
/*
@@ -237,15 +237,15 @@ struct nvm_rq *pblk_alloc_rqd(struct pblk *pblk, int type)
switch (type) {
case PBLK_WRITE:
case PBLK_WRITE_INT:
- pool = pblk->w_rq_pool;
+ pool = &pblk->w_rq_pool;
rq_size = pblk_w_rq_size;
break;
case PBLK_READ:
- pool = pblk->r_rq_pool;
+ pool = &pblk->r_rq_pool;
rq_size = pblk_g_rq_size;
break;
default:
- pool = pblk->e_rq_pool;
+ pool = &pblk->e_rq_pool;
rq_size = pblk_g_rq_size;
}
@@ -265,20 +265,22 @@ void pblk_free_rqd(struct pblk *pblk, struct nvm_rq *rqd, int type)
case PBLK_WRITE:
kfree(((struct pblk_c_ctx *)nvm_rq_to_pdu(rqd))->lun_bitmap);
case PBLK_WRITE_INT:
- pool = pblk->w_rq_pool;
+ pool = &pblk->w_rq_pool;
break;
case PBLK_READ:
- pool = pblk->r_rq_pool;
+ pool = &pblk->r_rq_pool;
break;
case PBLK_ERASE:
- pool = pblk->e_rq_pool;
+ pool = &pblk->e_rq_pool;
break;
default:
pr_err("pblk: trying to free unknown rqd type\n");
return;
}
- nvm_dev_dma_free(dev->parent, rqd->meta_list, rqd->dma_meta_list);
+ if (rqd->meta_list)
+ nvm_dev_dma_free(dev->parent, rqd->meta_list,
+ rqd->dma_meta_list);
mempool_free(rqd, pool);
}
@@ -292,7 +294,7 @@ void pblk_bio_free_pages(struct pblk *pblk, struct bio *bio, int off,
for (i = off; i < nr_pages + off; i++) {
bv = bio->bi_io_vec[i];
- mempool_free(bv.bv_page, pblk->page_bio_pool);
+ mempool_free(bv.bv_page, &pblk->page_bio_pool);
}
}
@@ -304,23 +306,23 @@ int pblk_bio_add_pages(struct pblk *pblk, struct bio *bio, gfp_t flags,
int i, ret;
for (i = 0; i < nr_pages; i++) {
- page = mempool_alloc(pblk->page_bio_pool, flags);
+ page = mempool_alloc(&pblk->page_bio_pool, flags);
ret = bio_add_pc_page(q, bio, page, PBLK_EXPOSED_PAGE_SIZE, 0);
if (ret != PBLK_EXPOSED_PAGE_SIZE) {
pr_err("pblk: could not add page to bio\n");
- mempool_free(page, pblk->page_bio_pool);
+ mempool_free(page, &pblk->page_bio_pool);
goto err;
}
}
return 0;
err:
- pblk_bio_free_pages(pblk, bio, 0, i - 1);
+ pblk_bio_free_pages(pblk, bio, (bio->bi_vcnt - i), i);
return -1;
}
-static void pblk_write_kick(struct pblk *pblk)
+void pblk_write_kick(struct pblk *pblk)
{
wake_up_process(pblk->writer_ts);
mod_timer(&pblk->wtimer, jiffies + msecs_to_jiffies(1000));
@@ -342,13 +344,6 @@ void pblk_write_should_kick(struct pblk *pblk)
pblk_write_kick(pblk);
}
-void pblk_end_io_sync(struct nvm_rq *rqd)
-{
- struct completion *waiting = rqd->private;
-
- complete(waiting);
-}
-
static void pblk_wait_for_meta(struct pblk *pblk)
{
do {
@@ -380,7 +375,13 @@ struct list_head *pblk_line_gc_list(struct pblk *pblk, struct pblk_line *line)
lockdep_assert_held(&line->lock);
- if (!vsc) {
+ if (line->w_err_gc->has_write_err) {
+ if (line->gc_group != PBLK_LINEGC_WERR) {
+ line->gc_group = PBLK_LINEGC_WERR;
+ move_list = &l_mg->gc_werr_list;
+ pblk_rl_werr_line_in(&pblk->rl);
+ }
+ } else if (!vsc) {
if (line->gc_group != PBLK_LINEGC_FULL) {
line->gc_group = PBLK_LINEGC_FULL;
move_list = &l_mg->gc_full_list;
@@ -467,16 +468,13 @@ int pblk_submit_io(struct pblk *pblk, struct nvm_rq *rqd)
{
struct nvm_tgt_dev *dev = pblk->dev;
-#ifdef CONFIG_NVM_DEBUG
- int ret;
+ atomic_inc(&pblk->inflight_io);
- ret = pblk_check_io(pblk, rqd);
- if (ret)
- return ret;
+#ifdef CONFIG_NVM_DEBUG
+ if (pblk_check_io(pblk, rqd))
+ return NVM_IO_ERR;
#endif
- atomic_inc(&pblk->inflight_io);
-
return nvm_submit_io(dev, rqd);
}
@@ -484,16 +482,13 @@ int pblk_submit_io_sync(struct pblk *pblk, struct nvm_rq *rqd)
{
struct nvm_tgt_dev *dev = pblk->dev;
-#ifdef CONFIG_NVM_DEBUG
- int ret;
+ atomic_inc(&pblk->inflight_io);
- ret = pblk_check_io(pblk, rqd);
- if (ret)
- return ret;
+#ifdef CONFIG_NVM_DEBUG
+ if (pblk_check_io(pblk, rqd))
+ return NVM_IO_ERR;
#endif
- atomic_inc(&pblk->inflight_io);
-
return nvm_submit_io_sync(dev, rqd);
}
@@ -856,9 +851,10 @@ static int pblk_line_submit_smeta_io(struct pblk *pblk, struct pblk_line *line,
atomic_dec(&pblk->inflight_io);
if (rqd.error) {
- if (dir == PBLK_WRITE)
+ if (dir == PBLK_WRITE) {
pblk_log_write_err(pblk, &rqd);
- else if (dir == PBLK_READ)
+ ret = 1;
+ } else if (dir == PBLK_READ)
pblk_log_read_err(pblk, &rqd);
}
@@ -1071,6 +1067,25 @@ static int pblk_line_init_metadata(struct pblk *pblk, struct pblk_line *line,
return 1;
}
+static int pblk_line_alloc_bitmaps(struct pblk *pblk, struct pblk_line *line)
+{
+ struct pblk_line_meta *lm = &pblk->lm;
+
+ line->map_bitmap = kzalloc(lm->sec_bitmap_len, GFP_KERNEL);
+ if (!line->map_bitmap)
+ return -ENOMEM;
+
+ /* will be initialized using bb info from map_bitmap */
+ line->invalid_bitmap = kmalloc(lm->sec_bitmap_len, GFP_KERNEL);
+ if (!line->invalid_bitmap) {
+ kfree(line->map_bitmap);
+ line->map_bitmap = NULL;
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
/* For now lines are always assumed full lines. Thus, smeta former and current
* lun bitmaps are omitted.
*/
@@ -1108,7 +1123,7 @@ static int pblk_line_init_bb(struct pblk *pblk, struct pblk_line *line,
if (init && pblk_line_submit_smeta_io(pblk, line, off, PBLK_WRITE)) {
pr_debug("pblk: line smeta I/O failed. Retry\n");
- return 1;
+ return 0;
}
bitmap_copy(line->invalid_bitmap, line->map_bitmap, lm->sec_per_line);
@@ -1174,19 +1189,9 @@ static int pblk_prepare_new_line(struct pblk *pblk, struct pblk_line *line)
static int pblk_line_prepare(struct pblk *pblk, struct pblk_line *line)
{
struct pblk_line_meta *lm = &pblk->lm;
+ int blk_in_line = atomic_read(&line->blk_in_line);
int blk_to_erase;
- line->map_bitmap = kzalloc(lm->sec_bitmap_len, GFP_ATOMIC);
- if (!line->map_bitmap)
- return -ENOMEM;
-
- /* will be initialized using bb info from map_bitmap */
- line->invalid_bitmap = kmalloc(lm->sec_bitmap_len, GFP_ATOMIC);
- if (!line->invalid_bitmap) {
- kfree(line->map_bitmap);
- return -ENOMEM;
- }
-
/* Bad blocks do not need to be erased */
bitmap_copy(line->erase_bitmap, line->blk_bitmap, lm->blk_per_line);
@@ -1199,16 +1204,19 @@ static int pblk_line_prepare(struct pblk *pblk, struct pblk_line *line)
blk_to_erase = pblk_prepare_new_line(pblk, line);
line->state = PBLK_LINESTATE_FREE;
} else {
- blk_to_erase = atomic_read(&line->blk_in_line);
+ blk_to_erase = blk_in_line;
}
- if (line->state != PBLK_LINESTATE_FREE) {
- kfree(line->map_bitmap);
- kfree(line->invalid_bitmap);
+ if (blk_in_line < lm->min_blk_line) {
spin_unlock(&line->lock);
+ return -EAGAIN;
+ }
+
+ if (line->state != PBLK_LINESTATE_FREE) {
WARN(1, "pblk: corrupted line %d, state %d\n",
line->id, line->state);
- return -EAGAIN;
+ spin_unlock(&line->lock);
+ return -EINTR;
}
line->state = PBLK_LINESTATE_OPEN;
@@ -1241,13 +1249,16 @@ int pblk_line_recov_alloc(struct pblk *pblk, struct pblk_line *line)
}
spin_unlock(&l_mg->free_lock);
- pblk_rl_free_lines_dec(&pblk->rl, line, true);
+ ret = pblk_line_alloc_bitmaps(pblk, line);
+ if (ret)
+ return ret;
if (!pblk_line_init_bb(pblk, line, 0)) {
list_add(&line->list, &l_mg->free_list);
return -EINTR;
}
+ pblk_rl_free_lines_dec(&pblk->rl, line, true);
return 0;
}
@@ -1259,6 +1270,24 @@ void pblk_line_recov_close(struct pblk *pblk, struct pblk_line *line)
line->emeta = NULL;
}
+static void pblk_line_reinit(struct pblk_line *line)
+{
+ *line->vsc = cpu_to_le32(EMPTY_ENTRY);
+
+ line->map_bitmap = NULL;
+ line->invalid_bitmap = NULL;
+ line->smeta = NULL;
+ line->emeta = NULL;
+}
+
+void pblk_line_free(struct pblk_line *line)
+{
+ kfree(line->map_bitmap);
+ kfree(line->invalid_bitmap);
+
+ pblk_line_reinit(line);
+}
+
struct pblk_line *pblk_line_get(struct pblk *pblk)
{
struct pblk_line_mgmt *l_mg = &pblk->l_mg;
@@ -1292,10 +1321,14 @@ retry:
ret = pblk_line_prepare(pblk, line);
if (ret) {
- if (ret == -EAGAIN) {
+ switch (ret) {
+ case -EAGAIN:
+ list_add(&line->list, &l_mg->bad_list);
+ goto retry;
+ case -EINTR:
list_add(&line->list, &l_mg->corrupt_list);
goto retry;
- } else {
+ default:
pr_err("pblk: failed to prepare line %d\n", line->id);
list_add(&line->list, &l_mg->free_list);
l_mg->nr_free_lines++;
@@ -1321,11 +1354,14 @@ retry:
return NULL;
}
+ retry_line->map_bitmap = line->map_bitmap;
+ retry_line->invalid_bitmap = line->invalid_bitmap;
retry_line->smeta = line->smeta;
retry_line->emeta = line->emeta;
retry_line->meta_line = line->meta_line;
- pblk_line_free(pblk, line);
+ pblk_line_reinit(line);
+
l_mg->data_line = retry_line;
spin_unlock(&l_mg->free_lock);
@@ -1378,6 +1414,9 @@ struct pblk_line *pblk_line_get_first_data(struct pblk *pblk)
}
spin_unlock(&l_mg->free_lock);
+ if (pblk_line_alloc_bitmaps(pblk, line))
+ return NULL;
+
if (pblk_line_erase(pblk, line)) {
line = pblk_line_retry(pblk, line);
if (!line)
@@ -1449,7 +1488,7 @@ static void pblk_line_close_meta_sync(struct pblk *pblk)
flush_workqueue(pblk->close_wq);
}
-void pblk_pipeline_stop(struct pblk *pblk)
+void __pblk_pipeline_flush(struct pblk *pblk)
{
struct pblk_line_mgmt *l_mg = &pblk->l_mg;
int ret;
@@ -1474,6 +1513,11 @@ void pblk_pipeline_stop(struct pblk *pblk)
flush_workqueue(pblk->bb_wq);
pblk_line_close_meta_sync(pblk);
+}
+
+void __pblk_pipeline_stop(struct pblk *pblk)
+{
+ struct pblk_line_mgmt *l_mg = &pblk->l_mg;
spin_lock(&l_mg->free_lock);
pblk->state = PBLK_STATE_STOPPED;
@@ -1482,6 +1526,12 @@ void pblk_pipeline_stop(struct pblk *pblk)
spin_unlock(&l_mg->free_lock);
}
+void pblk_pipeline_stop(struct pblk *pblk)
+{
+ __pblk_pipeline_flush(pblk);
+ __pblk_pipeline_stop(pblk);
+}
+
struct pblk_line *pblk_line_replace_data(struct pblk *pblk)
{
struct pblk_line_mgmt *l_mg = &pblk->l_mg;
@@ -1511,6 +1561,9 @@ retry_erase:
goto retry_erase;
}
+ if (pblk_line_alloc_bitmaps(pblk, new))
+ return NULL;
+
retry_setup:
if (!pblk_line_init_metadata(pblk, new, cur)) {
new = pblk_line_retry(pblk, new);
@@ -1550,19 +1603,6 @@ out:
return new;
}
-void pblk_line_free(struct pblk *pblk, struct pblk_line *line)
-{
- kfree(line->map_bitmap);
- kfree(line->invalid_bitmap);
-
- *line->vsc = cpu_to_le32(EMPTY_ENTRY);
-
- line->map_bitmap = NULL;
- line->invalid_bitmap = NULL;
- line->smeta = NULL;
- line->emeta = NULL;
-}
-
static void __pblk_line_put(struct pblk *pblk, struct pblk_line *line)
{
struct pblk_line_mgmt *l_mg = &pblk->l_mg;
@@ -1572,9 +1612,14 @@ static void __pblk_line_put(struct pblk *pblk, struct pblk_line *line)
WARN_ON(line->state != PBLK_LINESTATE_GC);
line->state = PBLK_LINESTATE_FREE;
line->gc_group = PBLK_LINEGC_NONE;
- pblk_line_free(pblk, line);
- spin_unlock(&line->lock);
+ pblk_line_free(line);
+
+ if (line->w_err_gc->has_write_err) {
+ pblk_rl_werr_line_out(&pblk->rl);
+ line->w_err_gc->has_write_err = 0;
+ }
+ spin_unlock(&line->lock);
atomic_dec(&gc->pipeline_gc);
spin_lock(&l_mg->free_lock);
@@ -1593,7 +1638,7 @@ static void pblk_line_put_ws(struct work_struct *work)
struct pblk_line *line = line_put_ws->line;
__pblk_line_put(pblk, line);
- mempool_free(line_put_ws, pblk->gen_ws_pool);
+ mempool_free(line_put_ws, &pblk->gen_ws_pool);
}
void pblk_line_put(struct kref *ref)
@@ -1610,7 +1655,7 @@ void pblk_line_put_wq(struct kref *ref)
struct pblk *pblk = line->pblk;
struct pblk_line_ws *line_put_ws;
- line_put_ws = mempool_alloc(pblk->gen_ws_pool, GFP_ATOMIC);
+ line_put_ws = mempool_alloc(&pblk->gen_ws_pool, GFP_ATOMIC);
if (!line_put_ws)
return;
@@ -1737,11 +1782,34 @@ void pblk_line_close_meta(struct pblk *pblk, struct pblk_line *line)
spin_lock(&l_mg->close_lock);
spin_lock(&line->lock);
+
+ /* Update the in-memory start address for emeta, in case it has
+ * shifted due to write errors
+ */
+ if (line->emeta_ssec != line->cur_sec)
+ line->emeta_ssec = line->cur_sec;
+
list_add_tail(&line->list, &l_mg->emeta_list);
spin_unlock(&line->lock);
spin_unlock(&l_mg->close_lock);
pblk_line_should_sync_meta(pblk);
+
+
+}
+
+static void pblk_save_lba_list(struct pblk *pblk, struct pblk_line *line)
+{
+ struct pblk_line_meta *lm = &pblk->lm;
+ struct pblk_line_mgmt *l_mg = &pblk->l_mg;
+ unsigned int lba_list_size = lm->emeta_len[2];
+ struct pblk_w_err_gc *w_err_gc = line->w_err_gc;
+ struct pblk_emeta *emeta = line->emeta;
+
+ w_err_gc->lba_list = pblk_malloc(lba_list_size,
+ l_mg->emeta_alloc_type, GFP_KERNEL);
+ memcpy(w_err_gc->lba_list, emeta_to_lbas(pblk, emeta->buf),
+ lba_list_size);
}
void pblk_line_close_ws(struct work_struct *work)
@@ -1750,9 +1818,16 @@ void pblk_line_close_ws(struct work_struct *work)
ws);
struct pblk *pblk = line_ws->pblk;
struct pblk_line *line = line_ws->line;
+ struct pblk_w_err_gc *w_err_gc = line->w_err_gc;
+
+ /* Write errors makes the emeta start address stored in smeta invalid,
+ * so keep a copy of the lba list until we've gc'd the line
+ */
+ if (w_err_gc->has_write_err)
+ pblk_save_lba_list(pblk, line);
pblk_line_close(pblk, line);
- mempool_free(line_ws, pblk->gen_ws_pool);
+ mempool_free(line_ws, &pblk->gen_ws_pool);
}
void pblk_gen_run_ws(struct pblk *pblk, struct pblk_line *line, void *priv,
@@ -1761,7 +1836,7 @@ void pblk_gen_run_ws(struct pblk *pblk, struct pblk_line *line, void *priv,
{
struct pblk_line_ws *line_ws;
- line_ws = mempool_alloc(pblk->gen_ws_pool, gfp_mask);
+ line_ws = mempool_alloc(&pblk->gen_ws_pool, gfp_mask);
line_ws->pblk = pblk;
line_ws->line = line;
diff --git a/drivers/lightnvm/pblk-gc.c b/drivers/lightnvm/pblk-gc.c
index 6851a5c67189..df88f1bdd921 100644
--- a/drivers/lightnvm/pblk-gc.c
+++ b/drivers/lightnvm/pblk-gc.c
@@ -129,6 +129,53 @@ out:
kfree(gc_rq_ws);
}
+static __le64 *get_lba_list_from_emeta(struct pblk *pblk,
+ struct pblk_line *line)
+{
+ struct line_emeta *emeta_buf;
+ struct pblk_line_mgmt *l_mg = &pblk->l_mg;
+ struct pblk_line_meta *lm = &pblk->lm;
+ unsigned int lba_list_size = lm->emeta_len[2];
+ __le64 *lba_list;
+ int ret;
+
+ emeta_buf = pblk_malloc(lm->emeta_len[0],
+ l_mg->emeta_alloc_type, GFP_KERNEL);
+ if (!emeta_buf)
+ return NULL;
+
+ ret = pblk_line_read_emeta(pblk, line, emeta_buf);
+ if (ret) {
+ pr_err("pblk: line %d read emeta failed (%d)\n",
+ line->id, ret);
+ pblk_mfree(emeta_buf, l_mg->emeta_alloc_type);
+ return NULL;
+ }
+
+ /* If this read fails, it means that emeta is corrupted.
+ * For now, leave the line untouched.
+ * TODO: Implement a recovery routine that scans and moves
+ * all sectors on the line.
+ */
+
+ ret = pblk_recov_check_emeta(pblk, emeta_buf);
+ if (ret) {
+ pr_err("pblk: inconsistent emeta (line %d)\n",
+ line->id);
+ pblk_mfree(emeta_buf, l_mg->emeta_alloc_type);
+ return NULL;
+ }
+
+ lba_list = pblk_malloc(lba_list_size,
+ l_mg->emeta_alloc_type, GFP_KERNEL);
+ if (lba_list)
+ memcpy(lba_list, emeta_to_lbas(pblk, emeta_buf), lba_list_size);
+
+ pblk_mfree(emeta_buf, l_mg->emeta_alloc_type);
+
+ return lba_list;
+}
+
static void pblk_gc_line_prepare_ws(struct work_struct *work)
{
struct pblk_line_ws *line_ws = container_of(work, struct pblk_line_ws,
@@ -138,46 +185,26 @@ static void pblk_gc_line_prepare_ws(struct work_struct *work)
struct pblk_line_mgmt *l_mg = &pblk->l_mg;
struct pblk_line_meta *lm = &pblk->lm;
struct pblk_gc *gc = &pblk->gc;
- struct line_emeta *emeta_buf;
struct pblk_line_ws *gc_rq_ws;
struct pblk_gc_rq *gc_rq;
__le64 *lba_list;
unsigned long *invalid_bitmap;
int sec_left, nr_secs, bit;
- int ret;
invalid_bitmap = kmalloc(lm->sec_bitmap_len, GFP_KERNEL);
if (!invalid_bitmap)
goto fail_free_ws;
- emeta_buf = pblk_malloc(lm->emeta_len[0], l_mg->emeta_alloc_type,
- GFP_KERNEL);
- if (!emeta_buf) {
- pr_err("pblk: cannot use GC emeta\n");
- goto fail_free_bitmap;
- }
-
- ret = pblk_line_read_emeta(pblk, line, emeta_buf);
- if (ret) {
- pr_err("pblk: line %d read emeta failed (%d)\n", line->id, ret);
- goto fail_free_emeta;
- }
-
- /* If this read fails, it means that emeta is corrupted. For now, leave
- * the line untouched. TODO: Implement a recovery routine that scans and
- * moves all sectors on the line.
- */
-
- ret = pblk_recov_check_emeta(pblk, emeta_buf);
- if (ret) {
- pr_err("pblk: inconsistent emeta (line %d)\n", line->id);
- goto fail_free_emeta;
- }
-
- lba_list = emeta_to_lbas(pblk, emeta_buf);
- if (!lba_list) {
- pr_err("pblk: could not interpret emeta (line %d)\n", line->id);
- goto fail_free_emeta;
+ if (line->w_err_gc->has_write_err) {
+ lba_list = line->w_err_gc->lba_list;
+ line->w_err_gc->lba_list = NULL;
+ } else {
+ lba_list = get_lba_list_from_emeta(pblk, line);
+ if (!lba_list) {
+ pr_err("pblk: could not interpret emeta (line %d)\n",
+ line->id);
+ goto fail_free_ws;
+ }
}
spin_lock(&line->lock);
@@ -187,14 +214,14 @@ static void pblk_gc_line_prepare_ws(struct work_struct *work)
if (sec_left < 0) {
pr_err("pblk: corrupted GC line (%d)\n", line->id);
- goto fail_free_emeta;
+ goto fail_free_lba_list;
}
bit = -1;
next_rq:
gc_rq = kmalloc(sizeof(struct pblk_gc_rq), GFP_KERNEL);
if (!gc_rq)
- goto fail_free_emeta;
+ goto fail_free_lba_list;
nr_secs = 0;
do {
@@ -240,7 +267,7 @@ next_rq:
goto next_rq;
out:
- pblk_mfree(emeta_buf, l_mg->emeta_alloc_type);
+ pblk_mfree(lba_list, l_mg->emeta_alloc_type);
kfree(line_ws);
kfree(invalid_bitmap);
@@ -251,9 +278,8 @@ out:
fail_free_gc_rq:
kfree(gc_rq);
-fail_free_emeta:
- pblk_mfree(emeta_buf, l_mg->emeta_alloc_type);
-fail_free_bitmap:
+fail_free_lba_list:
+ pblk_mfree(lba_list, l_mg->emeta_alloc_type);
kfree(invalid_bitmap);
fail_free_ws:
kfree(line_ws);
@@ -349,12 +375,14 @@ static struct pblk_line *pblk_gc_get_victim_line(struct pblk *pblk,
static bool pblk_gc_should_run(struct pblk_gc *gc, struct pblk_rl *rl)
{
unsigned int nr_blocks_free, nr_blocks_need;
+ unsigned int werr_lines = atomic_read(&rl->werr_lines);
nr_blocks_need = pblk_rl_high_thrs(rl);
nr_blocks_free = pblk_rl_nr_free_blks(rl);
/* This is not critical, no need to take lock here */
- return ((gc->gc_active) && (nr_blocks_need > nr_blocks_free));
+ return ((werr_lines > 0) ||
+ ((gc->gc_active) && (nr_blocks_need > nr_blocks_free)));
}
void pblk_gc_free_full_lines(struct pblk *pblk)
@@ -649,7 +677,7 @@ fail_free_main_kthread:
return ret;
}
-void pblk_gc_exit(struct pblk *pblk)
+void pblk_gc_exit(struct pblk *pblk, bool graceful)
{
struct pblk_gc *gc = &pblk->gc;
@@ -663,10 +691,12 @@ void pblk_gc_exit(struct pblk *pblk)
if (gc->gc_reader_ts)
kthread_stop(gc->gc_reader_ts);
- flush_workqueue(gc->gc_reader_wq);
- destroy_workqueue(gc->gc_reader_wq);
+ if (graceful) {
+ flush_workqueue(gc->gc_reader_wq);
+ flush_workqueue(gc->gc_line_reader_wq);
+ }
- flush_workqueue(gc->gc_line_reader_wq);
+ destroy_workqueue(gc->gc_reader_wq);
destroy_workqueue(gc->gc_line_reader_wq);
if (gc->gc_writer_ts)
diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c
index 91a5bc2556a3..ce561f5d48ce 100644
--- a/drivers/lightnvm/pblk-init.c
+++ b/drivers/lightnvm/pblk-init.c
@@ -20,10 +20,15 @@
#include "pblk.h"
+unsigned int write_buffer_size;
+
+module_param(write_buffer_size, uint, 0644);
+MODULE_PARM_DESC(write_buffer_size, "number of entries in a write buffer");
+
static struct kmem_cache *pblk_ws_cache, *pblk_rec_cache, *pblk_g_rq_cache,
*pblk_w_rq_cache;
static DECLARE_RWSEM(pblk_lock);
-struct bio_set *pblk_bio_set;
+struct bio_set pblk_bio_set;
static int pblk_rw_io(struct request_queue *q, struct pblk *pblk,
struct bio *bio)
@@ -127,10 +132,8 @@ static int pblk_l2p_recover(struct pblk *pblk, bool factory_init)
if (!line) {
/* Configure next line for user data */
line = pblk_line_get_first_data(pblk);
- if (!line) {
- pr_err("pblk: line list corrupted\n");
+ if (!line)
return -EFAULT;
- }
}
return 0;
@@ -141,6 +144,7 @@ static int pblk_l2p_init(struct pblk *pblk, bool factory_init)
sector_t i;
struct ppa_addr ppa;
size_t map_size;
+ int ret = 0;
map_size = pblk_trans_map_size(pblk);
pblk->trans_map = vmalloc(map_size);
@@ -152,7 +156,11 @@ static int pblk_l2p_init(struct pblk *pblk, bool factory_init)
for (i = 0; i < pblk->rl.nr_secs; i++)
pblk_trans_map_set(pblk, i, ppa);
- return pblk_l2p_recover(pblk, factory_init);
+ ret = pblk_l2p_recover(pblk, factory_init);
+ if (ret)
+ vfree(pblk->trans_map);
+
+ return ret;
}
static void pblk_rwb_free(struct pblk *pblk)
@@ -169,10 +177,15 @@ static int pblk_rwb_init(struct pblk *pblk)
struct nvm_tgt_dev *dev = pblk->dev;
struct nvm_geo *geo = &dev->geo;
struct pblk_rb_entry *entries;
- unsigned long nr_entries;
+ unsigned long nr_entries, buffer_size;
unsigned int power_size, power_seg_sz;
- nr_entries = pblk_rb_calculate_size(pblk->pgs_in_buffer);
+ if (write_buffer_size && (write_buffer_size > pblk->pgs_in_buffer))
+ buffer_size = write_buffer_size;
+ else
+ buffer_size = pblk->pgs_in_buffer;
+
+ nr_entries = pblk_rb_calculate_size(buffer_size);
entries = vzalloc(nr_entries * sizeof(struct pblk_rb_entry));
if (!entries)
@@ -341,7 +354,7 @@ static int pblk_core_init(struct pblk *pblk)
{
struct nvm_tgt_dev *dev = pblk->dev;
struct nvm_geo *geo = &dev->geo;
- int max_write_ppas;
+ int ret, max_write_ppas;
atomic64_set(&pblk->user_wa, 0);
atomic64_set(&pblk->pad_wa, 0);
@@ -375,33 +388,33 @@ static int pblk_core_init(struct pblk *pblk)
goto fail_free_pad_dist;
/* Internal bios can be at most the sectors signaled by the device. */
- pblk->page_bio_pool = mempool_create_page_pool(NVM_MAX_VLBA, 0);
- if (!pblk->page_bio_pool)
+ ret = mempool_init_page_pool(&pblk->page_bio_pool, NVM_MAX_VLBA, 0);
+ if (ret)
goto free_global_caches;
- pblk->gen_ws_pool = mempool_create_slab_pool(PBLK_GEN_WS_POOL_SIZE,
- pblk_ws_cache);
- if (!pblk->gen_ws_pool)
+ ret = mempool_init_slab_pool(&pblk->gen_ws_pool, PBLK_GEN_WS_POOL_SIZE,
+ pblk_ws_cache);
+ if (ret)
goto free_page_bio_pool;
- pblk->rec_pool = mempool_create_slab_pool(geo->all_luns,
- pblk_rec_cache);
- if (!pblk->rec_pool)
+ ret = mempool_init_slab_pool(&pblk->rec_pool, geo->all_luns,
+ pblk_rec_cache);
+ if (ret)
goto free_gen_ws_pool;
- pblk->r_rq_pool = mempool_create_slab_pool(geo->all_luns,
- pblk_g_rq_cache);
- if (!pblk->r_rq_pool)
+ ret = mempool_init_slab_pool(&pblk->r_rq_pool, geo->all_luns,
+ pblk_g_rq_cache);
+ if (ret)
goto free_rec_pool;
- pblk->e_rq_pool = mempool_create_slab_pool(geo->all_luns,
- pblk_g_rq_cache);
- if (!pblk->e_rq_pool)
+ ret = mempool_init_slab_pool(&pblk->e_rq_pool, geo->all_luns,
+ pblk_g_rq_cache);
+ if (ret)
goto free_r_rq_pool;
- pblk->w_rq_pool = mempool_create_slab_pool(geo->all_luns,
- pblk_w_rq_cache);
- if (!pblk->w_rq_pool)
+ ret = mempool_init_slab_pool(&pblk->w_rq_pool, geo->all_luns,
+ pblk_w_rq_cache);
+ if (ret)
goto free_e_rq_pool;
pblk->close_wq = alloc_workqueue("pblk-close-wq",
@@ -423,6 +436,7 @@ static int pblk_core_init(struct pblk *pblk)
goto free_r_end_wq;
INIT_LIST_HEAD(&pblk->compl_list);
+ INIT_LIST_HEAD(&pblk->resubmit_list);
return 0;
@@ -433,17 +447,17 @@ free_bb_wq:
free_close_wq:
destroy_workqueue(pblk->close_wq);
free_w_rq_pool:
- mempool_destroy(pblk->w_rq_pool);
+ mempool_exit(&pblk->w_rq_pool);
free_e_rq_pool:
- mempool_destroy(pblk->e_rq_pool);
+ mempool_exit(&pblk->e_rq_pool);
free_r_rq_pool:
- mempool_destroy(pblk->r_rq_pool);
+ mempool_exit(&pblk->r_rq_pool);
free_rec_pool:
- mempool_destroy(pblk->rec_pool);
+ mempool_exit(&pblk->rec_pool);
free_gen_ws_pool:
- mempool_destroy(pblk->gen_ws_pool);
+ mempool_exit(&pblk->gen_ws_pool);
free_page_bio_pool:
- mempool_destroy(pblk->page_bio_pool);
+ mempool_exit(&pblk->page_bio_pool);
free_global_caches:
pblk_free_global_caches(pblk);
fail_free_pad_dist:
@@ -462,12 +476,12 @@ static void pblk_core_free(struct pblk *pblk)
if (pblk->bb_wq)
destroy_workqueue(pblk->bb_wq);
- mempool_destroy(pblk->page_bio_pool);
- mempool_destroy(pblk->gen_ws_pool);
- mempool_destroy(pblk->rec_pool);
- mempool_destroy(pblk->r_rq_pool);
- mempool_destroy(pblk->e_rq_pool);
- mempool_destroy(pblk->w_rq_pool);
+ mempool_exit(&pblk->page_bio_pool);
+ mempool_exit(&pblk->gen_ws_pool);
+ mempool_exit(&pblk->rec_pool);
+ mempool_exit(&pblk->r_rq_pool);
+ mempool_exit(&pblk->e_rq_pool);
+ mempool_exit(&pblk->w_rq_pool);
pblk_free_global_caches(pblk);
kfree(pblk->pad_dist);
@@ -489,11 +503,17 @@ static void pblk_line_mg_free(struct pblk *pblk)
}
}
-static void pblk_line_meta_free(struct pblk_line *line)
+static void pblk_line_meta_free(struct pblk_line_mgmt *l_mg,
+ struct pblk_line *line)
{
+ struct pblk_w_err_gc *w_err_gc = line->w_err_gc;
+
kfree(line->blk_bitmap);
kfree(line->erase_bitmap);
kfree(line->chks);
+
+ pblk_mfree(w_err_gc->lba_list, l_mg->emeta_alloc_type);
+ kfree(w_err_gc);
}
static void pblk_lines_free(struct pblk *pblk)
@@ -506,8 +526,8 @@ static void pblk_lines_free(struct pblk *pblk)
for (i = 0; i < l_mg->nr_lines; i++) {
line = &pblk->lines[i];
- pblk_line_free(pblk, line);
- pblk_line_meta_free(line);
+ pblk_line_free(line);
+ pblk_line_meta_free(l_mg, line);
}
spin_unlock(&l_mg->free_lock);
@@ -748,14 +768,14 @@ static int pblk_setup_line_meta_20(struct pblk *pblk, struct pblk_line *line,
chunk->cnlb = chunk_meta->cnlb;
chunk->wp = chunk_meta->wp;
- if (!(chunk->state & NVM_CHK_ST_OFFLINE))
- continue;
-
if (chunk->type & NVM_CHK_TP_SZ_SPEC) {
WARN_ONCE(1, "pblk: custom-sized chunks unsupported\n");
continue;
}
+ if (!(chunk->state & NVM_CHK_ST_OFFLINE))
+ continue;
+
set_bit(pos, line->blk_bitmap);
nr_bad_chks++;
}
@@ -809,20 +829,28 @@ static int pblk_alloc_line_meta(struct pblk *pblk, struct pblk_line *line)
return -ENOMEM;
line->erase_bitmap = kzalloc(lm->blk_bitmap_len, GFP_KERNEL);
- if (!line->erase_bitmap) {
- kfree(line->blk_bitmap);
- return -ENOMEM;
- }
+ if (!line->erase_bitmap)
+ goto free_blk_bitmap;
+
line->chks = kmalloc(lm->blk_per_line * sizeof(struct nvm_chk_meta),
GFP_KERNEL);
- if (!line->chks) {
- kfree(line->erase_bitmap);
- kfree(line->blk_bitmap);
- return -ENOMEM;
- }
+ if (!line->chks)
+ goto free_erase_bitmap;
+
+ line->w_err_gc = kzalloc(sizeof(struct pblk_w_err_gc), GFP_KERNEL);
+ if (!line->w_err_gc)
+ goto free_chks;
return 0;
+
+free_chks:
+ kfree(line->chks);
+free_erase_bitmap:
+ kfree(line->erase_bitmap);
+free_blk_bitmap:
+ kfree(line->blk_bitmap);
+ return -ENOMEM;
}
static int pblk_line_mg_init(struct pblk *pblk)
@@ -847,12 +875,14 @@ static int pblk_line_mg_init(struct pblk *pblk)
INIT_LIST_HEAD(&l_mg->gc_mid_list);
INIT_LIST_HEAD(&l_mg->gc_low_list);
INIT_LIST_HEAD(&l_mg->gc_empty_list);
+ INIT_LIST_HEAD(&l_mg->gc_werr_list);
INIT_LIST_HEAD(&l_mg->emeta_list);
- l_mg->gc_lists[0] = &l_mg->gc_high_list;
- l_mg->gc_lists[1] = &l_mg->gc_mid_list;
- l_mg->gc_lists[2] = &l_mg->gc_low_list;
+ l_mg->gc_lists[0] = &l_mg->gc_werr_list;
+ l_mg->gc_lists[1] = &l_mg->gc_high_list;
+ l_mg->gc_lists[2] = &l_mg->gc_mid_list;
+ l_mg->gc_lists[3] = &l_mg->gc_low_list;
spin_lock_init(&l_mg->free_lock);
spin_lock_init(&l_mg->close_lock);
@@ -1047,6 +1077,11 @@ static int pblk_lines_init(struct pblk *pblk)
nr_free_chks += pblk_setup_line_meta(pblk, line, chunk_meta, i);
}
+ if (!nr_free_chks) {
+ pr_err("pblk: too many bad blocks prevent for sane instance\n");
+ return -EINTR;
+ }
+
pblk_set_provision(pblk, nr_free_chks);
kfree(chunk_meta);
@@ -1054,7 +1089,7 @@ static int pblk_lines_init(struct pblk *pblk)
fail_free_lines:
while (--i >= 0)
- pblk_line_meta_free(&pblk->lines[i]);
+ pblk_line_meta_free(l_mg, &pblk->lines[i]);
kfree(pblk->lines);
fail_free_chunk_meta:
kfree(chunk_meta);
@@ -1110,23 +1145,25 @@ static void pblk_free(struct pblk *pblk)
kfree(pblk);
}
-static void pblk_tear_down(struct pblk *pblk)
+static void pblk_tear_down(struct pblk *pblk, bool graceful)
{
- pblk_pipeline_stop(pblk);
+ if (graceful)
+ __pblk_pipeline_flush(pblk);
+ __pblk_pipeline_stop(pblk);
pblk_writer_stop(pblk);
pblk_rb_sync_l2p(&pblk->rwb);
pblk_rl_free(&pblk->rl);
- pr_debug("pblk: consistent tear down\n");
+ pr_debug("pblk: consistent tear down (graceful:%d)\n", graceful);
}
-static void pblk_exit(void *private)
+static void pblk_exit(void *private, bool graceful)
{
struct pblk *pblk = private;
down_write(&pblk_lock);
- pblk_gc_exit(pblk);
- pblk_tear_down(pblk);
+ pblk_gc_exit(pblk, graceful);
+ pblk_tear_down(pblk, graceful);
#ifdef CONFIG_NVM_DEBUG
pr_info("pblk exit: L2P CRC: %x\n", pblk_l2p_crc(pblk));
@@ -1175,6 +1212,7 @@ static void *pblk_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk,
pblk->state = PBLK_STATE_RUNNING;
pblk->gc.gc_enabled = 0;
+ spin_lock_init(&pblk->resubmit_lock);
spin_lock_init(&pblk->trans_lock);
spin_lock_init(&pblk->lock);
@@ -1297,18 +1335,18 @@ static int __init pblk_module_init(void)
{
int ret;
- pblk_bio_set = bioset_create(BIO_POOL_SIZE, 0, 0);
- if (!pblk_bio_set)
- return -ENOMEM;
+ ret = bioset_init(&pblk_bio_set, BIO_POOL_SIZE, 0, 0);
+ if (ret)
+ return ret;
ret = nvm_register_tgt_type(&tt_pblk);
if (ret)
- bioset_free(pblk_bio_set);
+ bioset_exit(&pblk_bio_set);
return ret;
}
static void pblk_module_exit(void)
{
- bioset_free(pblk_bio_set);
+ bioset_exit(&pblk_bio_set);
nvm_unregister_tgt_type(&tt_pblk);
}
diff --git a/drivers/lightnvm/pblk-map.c b/drivers/lightnvm/pblk-map.c
index 20dbaa89c9df..953ca31dda68 100644
--- a/drivers/lightnvm/pblk-map.c
+++ b/drivers/lightnvm/pblk-map.c
@@ -18,11 +18,11 @@
#include "pblk.h"
-static void pblk_map_page_data(struct pblk *pblk, unsigned int sentry,
- struct ppa_addr *ppa_list,
- unsigned long *lun_bitmap,
- struct pblk_sec_meta *meta_list,
- unsigned int valid_secs)
+static int pblk_map_page_data(struct pblk *pblk, unsigned int sentry,
+ struct ppa_addr *ppa_list,
+ unsigned long *lun_bitmap,
+ struct pblk_sec_meta *meta_list,
+ unsigned int valid_secs)
{
struct pblk_line *line = pblk_line_get_data(pblk);
struct pblk_emeta *emeta;
@@ -35,8 +35,14 @@ static void pblk_map_page_data(struct pblk *pblk, unsigned int sentry,
if (pblk_line_is_full(line)) {
struct pblk_line *prev_line = line;
+ /* If we cannot allocate a new line, make sure to store metadata
+ * on current line and then fail
+ */
line = pblk_line_replace_data(pblk);
pblk_line_close_meta(pblk, prev_line);
+
+ if (!line)
+ return -EINTR;
}
emeta = line->emeta;
@@ -74,6 +80,7 @@ static void pblk_map_page_data(struct pblk *pblk, unsigned int sentry,
}
pblk_down_rq(pblk, ppa_list, nr_secs, lun_bitmap);
+ return 0;
}
void pblk_map_rq(struct pblk *pblk, struct nvm_rq *rqd, unsigned int sentry,
@@ -87,8 +94,12 @@ void pblk_map_rq(struct pblk *pblk, struct nvm_rq *rqd, unsigned int sentry,
for (i = off; i < rqd->nr_ppas; i += min) {
map_secs = (i + min > valid_secs) ? (valid_secs % min) : min;
- pblk_map_page_data(pblk, sentry + i, &rqd->ppa_list[i],
- lun_bitmap, &meta_list[i], map_secs);
+ if (pblk_map_page_data(pblk, sentry + i, &rqd->ppa_list[i],
+ lun_bitmap, &meta_list[i], map_secs)) {
+ bio_put(rqd->bio);
+ pblk_free_rqd(pblk, rqd, PBLK_WRITE);
+ pblk_pipeline_stop(pblk);
+ }
}
}
@@ -108,8 +119,12 @@ void pblk_map_erase_rq(struct pblk *pblk, struct nvm_rq *rqd,
for (i = 0; i < rqd->nr_ppas; i += min) {
map_secs = (i + min > valid_secs) ? (valid_secs % min) : min;
- pblk_map_page_data(pblk, sentry + i, &rqd->ppa_list[i],
- lun_bitmap, &meta_list[i], map_secs);
+ if (pblk_map_page_data(pblk, sentry + i, &rqd->ppa_list[i],
+ lun_bitmap, &meta_list[i], map_secs)) {
+ bio_put(rqd->bio);
+ pblk_free_rqd(pblk, rqd, PBLK_WRITE);
+ pblk_pipeline_stop(pblk);
+ }
erase_lun = pblk_ppa_to_pos(geo, rqd->ppa_list[i]);
diff --git a/drivers/lightnvm/pblk-rb.c b/drivers/lightnvm/pblk-rb.c
index 52fdd85dbc97..00cd1f20a196 100644
--- a/drivers/lightnvm/pblk-rb.c
+++ b/drivers/lightnvm/pblk-rb.c
@@ -142,10 +142,9 @@ static void clean_wctx(struct pblk_w_ctx *w_ctx)
{
int flags;
-try:
flags = READ_ONCE(w_ctx->flags);
- if (!(flags & PBLK_SUBMITTED_ENTRY))
- goto try;
+ WARN_ONCE(!(flags & PBLK_SUBMITTED_ENTRY),
+ "pblk: overwriting unsubmitted data\n");
/* Release flags on context. Protect from writes and reads */
smp_store_release(&w_ctx->flags, PBLK_WRITABLE_ENTRY);
@@ -350,7 +349,7 @@ void pblk_rb_write_entry_gc(struct pblk_rb *rb, void *data,
}
static int pblk_rb_flush_point_set(struct pblk_rb *rb, struct bio *bio,
- unsigned int pos)
+ unsigned int pos)
{
struct pblk_rb_entry *entry;
unsigned int sync, flush_point;
@@ -420,7 +419,7 @@ void pblk_rb_flush(struct pblk_rb *rb)
if (pblk_rb_flush_point_set(rb, NULL, mem))
return;
- pblk_write_should_kick(pblk);
+ pblk_write_kick(pblk);
}
static int pblk_rb_may_write_flush(struct pblk_rb *rb, unsigned int nr_entries,
@@ -504,45 +503,6 @@ int pblk_rb_may_write_gc(struct pblk_rb *rb, unsigned int nr_entries,
}
/*
- * The caller of this function must ensure that the backpointer will not
- * overwrite the entries passed on the list.
- */
-unsigned int pblk_rb_read_to_bio_list(struct pblk_rb *rb, struct bio *bio,
- struct list_head *list,
- unsigned int max)
-{
- struct pblk_rb_entry *entry, *tentry;
- struct page *page;
- unsigned int read = 0;
- int ret;
-
- list_for_each_entry_safe(entry, tentry, list, index) {
- if (read > max) {
- pr_err("pblk: too many entries on list\n");
- goto out;
- }
-
- page = virt_to_page(entry->data);
- if (!page) {
- pr_err("pblk: could not allocate write bio page\n");
- goto out;
- }
-
- ret = bio_add_page(bio, page, rb->seg_size, 0);
- if (ret != rb->seg_size) {
- pr_err("pblk: could not add page to write bio\n");
- goto out;
- }
-
- list_del(&entry->index);
- read++;
- }
-
-out:
- return read;
-}
-
-/*
* Read available entries on rb and add them to the given bio. To avoid a memory
* copy, a page reference to the write buffer is used to be added to the bio.
*
diff --git a/drivers/lightnvm/pblk-read.c b/drivers/lightnvm/pblk-read.c
index 9eee10f69df0..18694694e5f0 100644
--- a/drivers/lightnvm/pblk-read.c
+++ b/drivers/lightnvm/pblk-read.c
@@ -39,10 +39,10 @@ static int pblk_read_from_cache(struct pblk *pblk, struct bio *bio,
}
static void pblk_read_ppalist_rq(struct pblk *pblk, struct nvm_rq *rqd,
- sector_t blba, unsigned long *read_bitmap)
+ struct bio *bio, sector_t blba,
+ unsigned long *read_bitmap)
{
struct pblk_sec_meta *meta_list = rqd->meta_list;
- struct bio *bio = rqd->bio;
struct ppa_addr ppas[PBLK_MAX_REQ_ADDRS];
int nr_secs = rqd->nr_ppas;
bool advanced_bio = false;
@@ -102,32 +102,69 @@ next:
#endif
}
-static int pblk_submit_read_io(struct pblk *pblk, struct nvm_rq *rqd)
+
+static void pblk_read_check_seq(struct pblk *pblk, struct nvm_rq *rqd,
+ sector_t blba)
{
- int err;
+ struct pblk_sec_meta *meta_lba_list = rqd->meta_list;
+ int nr_lbas = rqd->nr_ppas;
+ int i;
- err = pblk_submit_io(pblk, rqd);
- if (err)
- return NVM_IO_ERR;
+ for (i = 0; i < nr_lbas; i++) {
+ u64 lba = le64_to_cpu(meta_lba_list[i].lba);
+
+ if (lba == ADDR_EMPTY)
+ continue;
+
+ if (lba != blba + i) {
+#ifdef CONFIG_NVM_DEBUG
+ struct ppa_addr *p;
- return NVM_IO_OK;
+ p = (nr_lbas == 1) ? &rqd->ppa_list[i] : &rqd->ppa_addr;
+ print_ppa(&pblk->dev->geo, p, "seq", i);
+#endif
+ pr_err("pblk: corrupted read LBA (%llu/%llu)\n",
+ lba, (u64)blba + i);
+ WARN_ON(1);
+ }
+ }
}
-static void pblk_read_check(struct pblk *pblk, struct nvm_rq *rqd,
- sector_t blba)
+/*
+ * There can be holes in the lba list.
+ */
+static void pblk_read_check_rand(struct pblk *pblk, struct nvm_rq *rqd,
+ u64 *lba_list, int nr_lbas)
{
- struct pblk_sec_meta *meta_list = rqd->meta_list;
- int nr_lbas = rqd->nr_ppas;
- int i;
+ struct pblk_sec_meta *meta_lba_list = rqd->meta_list;
+ int i, j;
- for (i = 0; i < nr_lbas; i++) {
- u64 lba = le64_to_cpu(meta_list[i].lba);
+ for (i = 0, j = 0; i < nr_lbas; i++) {
+ u64 lba = lba_list[i];
+ u64 meta_lba;
if (lba == ADDR_EMPTY)
continue;
- WARN(lba != blba + i, "pblk: corrupted read LBA\n");
+ meta_lba = le64_to_cpu(meta_lba_list[j].lba);
+
+ if (lba != meta_lba) {
+#ifdef CONFIG_NVM_DEBUG
+ struct ppa_addr *p;
+ int nr_ppas = rqd->nr_ppas;
+
+ p = (nr_ppas == 1) ? &rqd->ppa_list[j] : &rqd->ppa_addr;
+ print_ppa(&pblk->dev->geo, p, "seq", j);
+#endif
+ pr_err("pblk: corrupted read LBA (%llu/%llu)\n",
+ lba, meta_lba);
+ WARN_ON(1);
+ }
+
+ j++;
}
+
+ WARN_ONCE(j != rqd->nr_ppas, "pblk: corrupted random request\n");
}
static void pblk_read_put_rqd_kref(struct pblk *pblk, struct nvm_rq *rqd)
@@ -152,7 +189,6 @@ static void pblk_end_user_read(struct bio *bio)
WARN_ONCE(bio->bi_status, "pblk: corrupted read bio\n");
#endif
bio_endio(bio);
- bio_put(bio);
}
static void __pblk_end_io_read(struct pblk *pblk, struct nvm_rq *rqd,
@@ -160,23 +196,18 @@ static void __pblk_end_io_read(struct pblk *pblk, struct nvm_rq *rqd,
{
struct nvm_tgt_dev *dev = pblk->dev;
struct pblk_g_ctx *r_ctx = nvm_rq_to_pdu(rqd);
- struct bio *bio = rqd->bio;
+ struct bio *int_bio = rqd->bio;
unsigned long start_time = r_ctx->start_time;
generic_end_io_acct(dev->q, READ, &pblk->disk->part0, start_time);
if (rqd->error)
pblk_log_read_err(pblk, rqd);
-#ifdef CONFIG_NVM_DEBUG
- else
- WARN_ONCE(bio->bi_status, "pblk: corrupted read error\n");
-#endif
- pblk_read_check(pblk, rqd, r_ctx->lba);
+ pblk_read_check_seq(pblk, rqd, r_ctx->lba);
- bio_put(bio);
- if (r_ctx->private)
- pblk_end_user_read((struct bio *)r_ctx->private);
+ if (int_bio)
+ bio_put(int_bio);
if (put_line)
pblk_read_put_rqd_kref(pblk, rqd);
@@ -193,16 +224,19 @@ static void __pblk_end_io_read(struct pblk *pblk, struct nvm_rq *rqd,
static void pblk_end_io_read(struct nvm_rq *rqd)
{
struct pblk *pblk = rqd->private;
+ struct pblk_g_ctx *r_ctx = nvm_rq_to_pdu(rqd);
+ struct bio *bio = (struct bio *)r_ctx->private;
+ pblk_end_user_read(bio);
__pblk_end_io_read(pblk, rqd, true);
}
-static int pblk_partial_read_bio(struct pblk *pblk, struct nvm_rq *rqd,
- unsigned int bio_init_idx,
- unsigned long *read_bitmap)
+static int pblk_partial_read(struct pblk *pblk, struct nvm_rq *rqd,
+ struct bio *orig_bio, unsigned int bio_init_idx,
+ unsigned long *read_bitmap)
{
- struct bio *new_bio, *bio = rqd->bio;
struct pblk_sec_meta *meta_list = rqd->meta_list;
+ struct bio *new_bio;
struct bio_vec src_bv, dst_bv;
void *ppa_ptr = NULL;
void *src_p, *dst_p;
@@ -219,11 +253,11 @@ static int pblk_partial_read_bio(struct pblk *pblk, struct nvm_rq *rqd,
new_bio = bio_alloc(GFP_KERNEL, nr_holes);
if (pblk_bio_add_pages(pblk, new_bio, GFP_KERNEL, nr_holes))
- goto err;
+ goto fail_add_pages;
if (nr_holes != new_bio->bi_vcnt) {
pr_err("pblk: malformed bio\n");
- goto err;
+ goto fail;
}
for (i = 0; i < nr_secs; i++)
@@ -246,7 +280,7 @@ static int pblk_partial_read_bio(struct pblk *pblk, struct nvm_rq *rqd,
if (ret) {
bio_put(rqd->bio);
pr_err("pblk: sync read IO submission failed\n");
- goto err;
+ goto fail;
}
if (rqd->error) {
@@ -282,7 +316,7 @@ static int pblk_partial_read_bio(struct pblk *pblk, struct nvm_rq *rqd,
meta_list[hole].lba = lba_list_media[i];
src_bv = new_bio->bi_io_vec[i++];
- dst_bv = bio->bi_io_vec[bio_init_idx + hole];
+ dst_bv = orig_bio->bi_io_vec[bio_init_idx + hole];
src_p = kmap_atomic(src_bv.bv_page);
dst_p = kmap_atomic(dst_bv.bv_page);
@@ -294,35 +328,33 @@ static int pblk_partial_read_bio(struct pblk *pblk, struct nvm_rq *rqd,
kunmap_atomic(src_p);
kunmap_atomic(dst_p);
- mempool_free(src_bv.bv_page, pblk->page_bio_pool);
+ mempool_free(src_bv.bv_page, &pblk->page_bio_pool);
hole = find_next_zero_bit(read_bitmap, nr_secs, hole + 1);
} while (hole < nr_secs);
bio_put(new_bio);
- /* Complete the original bio and associated request */
- bio_endio(bio);
- rqd->bio = bio;
+ /* restore original request */
+ rqd->bio = NULL;
rqd->nr_ppas = nr_secs;
__pblk_end_io_read(pblk, rqd, false);
- return NVM_IO_OK;
-
-err:
- pr_err("pblk: failed to perform partial read\n");
+ return NVM_IO_DONE;
+fail:
/* Free allocated pages in new bio */
- pblk_bio_free_pages(pblk, bio, 0, new_bio->bi_vcnt);
+ pblk_bio_free_pages(pblk, new_bio, 0, new_bio->bi_vcnt);
+fail_add_pages:
+ pr_err("pblk: failed to perform partial read\n");
__pblk_end_io_read(pblk, rqd, false);
return NVM_IO_ERR;
}
-static void pblk_read_rq(struct pblk *pblk, struct nvm_rq *rqd,
+static void pblk_read_rq(struct pblk *pblk, struct nvm_rq *rqd, struct bio *bio,
sector_t lba, unsigned long *read_bitmap)
{
struct pblk_sec_meta *meta_list = rqd->meta_list;
- struct bio *bio = rqd->bio;
struct ppa_addr ppa;
pblk_lookup_l2p_seq(pblk, &ppa, lba, 1);
@@ -386,14 +418,15 @@ int pblk_submit_read(struct pblk *pblk, struct bio *bio)
rqd = pblk_alloc_rqd(pblk, PBLK_READ);
rqd->opcode = NVM_OP_PREAD;
- rqd->bio = bio;
rqd->nr_ppas = nr_secs;
+ rqd->bio = NULL; /* cloned bio if needed */
rqd->private = pblk;
rqd->end_io = pblk_end_io_read;
r_ctx = nvm_rq_to_pdu(rqd);
r_ctx->start_time = jiffies;
r_ctx->lba = blba;
+ r_ctx->private = bio; /* original bio */
/* Save the index for this bio's start. This is needed in case
* we need to fill a partial read.
@@ -411,17 +444,15 @@ int pblk_submit_read(struct pblk *pblk, struct bio *bio)
rqd->ppa_list = rqd->meta_list + pblk_dma_meta_size;
rqd->dma_ppa_list = rqd->dma_meta_list + pblk_dma_meta_size;
- pblk_read_ppalist_rq(pblk, rqd, blba, &read_bitmap);
+ pblk_read_ppalist_rq(pblk, rqd, bio, blba, &read_bitmap);
} else {
- pblk_read_rq(pblk, rqd, blba, &read_bitmap);
+ pblk_read_rq(pblk, rqd, bio, blba, &read_bitmap);
}
- bio_get(bio);
if (bitmap_full(&read_bitmap, nr_secs)) {
- bio_endio(bio);
atomic_inc(&pblk->inflight_io);
__pblk_end_io_read(pblk, rqd, false);
- return NVM_IO_OK;
+ return NVM_IO_DONE;
}
/* All sectors are to be read from the device */
@@ -429,20 +460,17 @@ int pblk_submit_read(struct pblk *pblk, struct bio *bio)
struct bio *int_bio = NULL;
/* Clone read bio to deal with read errors internally */
- int_bio = bio_clone_fast(bio, GFP_KERNEL, pblk_bio_set);
+ int_bio = bio_clone_fast(bio, GFP_KERNEL, &pblk_bio_set);
if (!int_bio) {
pr_err("pblk: could not clone read bio\n");
goto fail_end_io;
}
rqd->bio = int_bio;
- r_ctx->private = bio;
- ret = pblk_submit_read_io(pblk, rqd);
- if (ret) {
+ if (pblk_submit_io(pblk, rqd)) {
pr_err("pblk: read IO submission failed\n");
- if (int_bio)
- bio_put(int_bio);
+ ret = NVM_IO_ERR;
goto fail_end_io;
}
@@ -452,7 +480,7 @@ int pblk_submit_read(struct pblk *pblk, struct bio *bio)
/* The read bio request could be partially filled by the write buffer,
* but there are some holes that need to be read from the drive.
*/
- return pblk_partial_read_bio(pblk, rqd, bio_init_idx, &read_bitmap);
+ return pblk_partial_read(pblk, rqd, bio, bio_init_idx, &read_bitmap);
fail_rqd_free:
pblk_free_rqd(pblk, rqd, PBLK_READ);
@@ -585,6 +613,8 @@ int pblk_submit_read_gc(struct pblk *pblk, struct pblk_gc_rq *gc_rq)
goto err_free_bio;
}
+ pblk_read_check_rand(pblk, &rqd, gc_rq->lba_list, gc_rq->nr_secs);
+
atomic_dec(&pblk->inflight_io);
if (rqd.error) {
diff --git a/drivers/lightnvm/pblk-recovery.c b/drivers/lightnvm/pblk-recovery.c
index 3e079c2afa6e..598342833d0d 100644
--- a/drivers/lightnvm/pblk-recovery.c
+++ b/drivers/lightnvm/pblk-recovery.c
@@ -16,97 +16,6 @@
#include "pblk.h"
-void pblk_submit_rec(struct work_struct *work)
-{
- struct pblk_rec_ctx *recovery =
- container_of(work, struct pblk_rec_ctx, ws_rec);
- struct pblk *pblk = recovery->pblk;
- struct nvm_rq *rqd = recovery->rqd;
- struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd);
- struct bio *bio;
- unsigned int nr_rec_secs;
- unsigned int pgs_read;
- int ret;
-
- nr_rec_secs = bitmap_weight((unsigned long int *)&rqd->ppa_status,
- NVM_MAX_VLBA);
-
- bio = bio_alloc(GFP_KERNEL, nr_rec_secs);
-
- bio->bi_iter.bi_sector = 0;
- bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
- rqd->bio = bio;
- rqd->nr_ppas = nr_rec_secs;
-
- pgs_read = pblk_rb_read_to_bio_list(&pblk->rwb, bio, &recovery->failed,
- nr_rec_secs);
- if (pgs_read != nr_rec_secs) {
- pr_err("pblk: could not read recovery entries\n");
- goto err;
- }
-
- if (pblk_setup_w_rec_rq(pblk, rqd, c_ctx)) {
- pr_err("pblk: could not setup recovery request\n");
- goto err;
- }
-
-#ifdef CONFIG_NVM_DEBUG
- atomic_long_add(nr_rec_secs, &pblk->recov_writes);
-#endif
-
- ret = pblk_submit_io(pblk, rqd);
- if (ret) {
- pr_err("pblk: I/O submission failed: %d\n", ret);
- goto err;
- }
-
- mempool_free(recovery, pblk->rec_pool);
- return;
-
-err:
- bio_put(bio);
- pblk_free_rqd(pblk, rqd, PBLK_WRITE);
-}
-
-int pblk_recov_setup_rq(struct pblk *pblk, struct pblk_c_ctx *c_ctx,
- struct pblk_rec_ctx *recovery, u64 *comp_bits,
- unsigned int comp)
-{
- struct nvm_rq *rec_rqd;
- struct pblk_c_ctx *rec_ctx;
- int nr_entries = c_ctx->nr_valid + c_ctx->nr_padded;
-
- rec_rqd = pblk_alloc_rqd(pblk, PBLK_WRITE);
- rec_ctx = nvm_rq_to_pdu(rec_rqd);
-
- /* Copy completion bitmap, but exclude the first X completed entries */
- bitmap_shift_right((unsigned long int *)&rec_rqd->ppa_status,
- (unsigned long int *)comp_bits,
- comp, NVM_MAX_VLBA);
-
- /* Save the context for the entries that need to be re-written and
- * update current context with the completed entries.
- */
- rec_ctx->sentry = pblk_rb_wrap_pos(&pblk->rwb, c_ctx->sentry + comp);
- if (comp >= c_ctx->nr_valid) {
- rec_ctx->nr_valid = 0;
- rec_ctx->nr_padded = nr_entries - comp;
-
- c_ctx->nr_padded = comp - c_ctx->nr_valid;
- } else {
- rec_ctx->nr_valid = c_ctx->nr_valid - comp;
- rec_ctx->nr_padded = c_ctx->nr_padded;
-
- c_ctx->nr_valid = comp;
- c_ctx->nr_padded = 0;
- }
-
- recovery->rqd = rec_rqd;
- recovery->pblk = pblk;
-
- return 0;
-}
-
int pblk_recov_check_emeta(struct pblk *pblk, struct line_emeta *emeta_buf)
{
u32 crc;
@@ -865,18 +774,30 @@ static void pblk_recov_wa_counters(struct pblk *pblk,
}
static int pblk_line_was_written(struct pblk_line *line,
- struct pblk_line_meta *lm)
+ struct pblk *pblk)
{
- int i;
- int state_mask = NVM_CHK_ST_OFFLINE | NVM_CHK_ST_FREE;
+ struct pblk_line_meta *lm = &pblk->lm;
+ struct nvm_tgt_dev *dev = pblk->dev;
+ struct nvm_geo *geo = &dev->geo;
+ struct nvm_chk_meta *chunk;
+ struct ppa_addr bppa;
+ int smeta_blk;
- for (i = 0; i < lm->blk_per_line; i++) {
- if (!(line->chks[i].state & state_mask))
- return 1;
- }
+ if (line->state == PBLK_LINESTATE_BAD)
+ return 0;
- return 0;
+ smeta_blk = find_first_zero_bit(line->blk_bitmap, lm->blk_per_line);
+ if (smeta_blk >= lm->blk_per_line)
+ return 0;
+
+ bppa = pblk->luns[smeta_blk].bppa;
+ chunk = &line->chks[pblk_ppa_to_pos(geo, bppa)];
+
+ if (chunk->state & NVM_CHK_ST_FREE)
+ return 0;
+
+ return 1;
}
struct pblk_line *pblk_recov_l2p(struct pblk *pblk)
@@ -915,7 +836,7 @@ struct pblk_line *pblk_recov_l2p(struct pblk *pblk)
line->lun_bitmap = ((void *)(smeta_buf)) +
sizeof(struct line_smeta);
- if (!pblk_line_was_written(line, lm))
+ if (!pblk_line_was_written(line, pblk))
continue;
/* Lines that cannot be read are assumed as not written here */
diff --git a/drivers/lightnvm/pblk-rl.c b/drivers/lightnvm/pblk-rl.c
index 883a7113b19d..6a0616a6fcaf 100644
--- a/drivers/lightnvm/pblk-rl.c
+++ b/drivers/lightnvm/pblk-rl.c
@@ -73,6 +73,16 @@ void pblk_rl_user_in(struct pblk_rl *rl, int nr_entries)
pblk_rl_kick_u_timer(rl);
}
+void pblk_rl_werr_line_in(struct pblk_rl *rl)
+{
+ atomic_inc(&rl->werr_lines);
+}
+
+void pblk_rl_werr_line_out(struct pblk_rl *rl)
+{
+ atomic_dec(&rl->werr_lines);
+}
+
void pblk_rl_gc_in(struct pblk_rl *rl, int nr_entries)
{
atomic_add(nr_entries, &rl->rb_gc_cnt);
@@ -99,11 +109,21 @@ static void __pblk_rl_update_rates(struct pblk_rl *rl,
{
struct pblk *pblk = container_of(rl, struct pblk, rl);
int max = rl->rb_budget;
+ int werr_gc_needed = atomic_read(&rl->werr_lines);
if (free_blocks >= rl->high) {
- rl->rb_user_max = max;
- rl->rb_gc_max = 0;
- rl->rb_state = PBLK_RL_HIGH;
+ if (werr_gc_needed) {
+ /* Allocate a small budget for recovering
+ * lines with write errors
+ */
+ rl->rb_gc_max = 1 << rl->rb_windows_pw;
+ rl->rb_user_max = max - rl->rb_gc_max;
+ rl->rb_state = PBLK_RL_WERR;
+ } else {
+ rl->rb_user_max = max;
+ rl->rb_gc_max = 0;
+ rl->rb_state = PBLK_RL_OFF;
+ }
} else if (free_blocks < rl->high) {
int shift = rl->high_pw - rl->rb_windows_pw;
int user_windows = free_blocks >> shift;
@@ -124,7 +144,7 @@ static void __pblk_rl_update_rates(struct pblk_rl *rl,
rl->rb_state = PBLK_RL_LOW;
}
- if (rl->rb_state == (PBLK_RL_MID | PBLK_RL_LOW))
+ if (rl->rb_state != PBLK_RL_OFF)
pblk_gc_should_start(pblk);
else
pblk_gc_should_stop(pblk);
@@ -221,6 +241,7 @@ void pblk_rl_init(struct pblk_rl *rl, int budget)
atomic_set(&rl->rb_user_cnt, 0);
atomic_set(&rl->rb_gc_cnt, 0);
atomic_set(&rl->rb_space, -1);
+ atomic_set(&rl->werr_lines, 0);
timer_setup(&rl->u_timer, pblk_rl_u_timer, 0);
diff --git a/drivers/lightnvm/pblk-sysfs.c b/drivers/lightnvm/pblk-sysfs.c
index e61909af23a5..88a0a7c407aa 100644
--- a/drivers/lightnvm/pblk-sysfs.c
+++ b/drivers/lightnvm/pblk-sysfs.c
@@ -173,6 +173,8 @@ static ssize_t pblk_sysfs_lines(struct pblk *pblk, char *page)
int free_line_cnt = 0, closed_line_cnt = 0, emeta_line_cnt = 0;
int d_line_cnt = 0, l_line_cnt = 0;
int gc_full = 0, gc_high = 0, gc_mid = 0, gc_low = 0, gc_empty = 0;
+ int gc_werr = 0;
+
int bad = 0, cor = 0;
int msecs = 0, cur_sec = 0, vsc = 0, sec_in_line = 0;
int map_weight = 0, meta_weight = 0;
@@ -237,6 +239,15 @@ static ssize_t pblk_sysfs_lines(struct pblk *pblk, char *page)
gc_empty++;
}
+ list_for_each_entry(line, &l_mg->gc_werr_list, list) {
+ if (line->type == PBLK_LINETYPE_DATA)
+ d_line_cnt++;
+ else if (line->type == PBLK_LINETYPE_LOG)
+ l_line_cnt++;
+ closed_line_cnt++;
+ gc_werr++;
+ }
+
list_for_each_entry(line, &l_mg->bad_list, list)
bad++;
list_for_each_entry(line, &l_mg->corrupt_list, list)
@@ -275,8 +286,8 @@ static ssize_t pblk_sysfs_lines(struct pblk *pblk, char *page)
l_mg->nr_lines);
sz += snprintf(page + sz, PAGE_SIZE - sz,
- "GC: full:%d, high:%d, mid:%d, low:%d, empty:%d, queue:%d\n",
- gc_full, gc_high, gc_mid, gc_low, gc_empty,
+ "GC: full:%d, high:%d, mid:%d, low:%d, empty:%d, werr: %d, queue:%d\n",
+ gc_full, gc_high, gc_mid, gc_low, gc_empty, gc_werr,
atomic_read(&pblk->gc.read_inflight_gc));
sz += snprintf(page + sz, PAGE_SIZE - sz,
diff --git a/drivers/lightnvm/pblk-write.c b/drivers/lightnvm/pblk-write.c
index 3e6f1ebd743a..f353e52941f5 100644
--- a/drivers/lightnvm/pblk-write.c
+++ b/drivers/lightnvm/pblk-write.c
@@ -103,68 +103,150 @@ retry:
pblk_rb_sync_end(&pblk->rwb, &flags);
}
-/* When a write fails, we are not sure whether the block has grown bad or a page
- * range is more susceptible to write errors. If a high number of pages fail, we
- * assume that the block is bad and we mark it accordingly. In all cases, we
- * remap and resubmit the failed entries as fast as possible; if a flush is
- * waiting on a completion, the whole stack would stall otherwise.
- */
-static void pblk_end_w_fail(struct pblk *pblk, struct nvm_rq *rqd)
+/* Map remaining sectors in chunk, starting from ppa */
+static void pblk_map_remaining(struct pblk *pblk, struct ppa_addr *ppa)
{
- void *comp_bits = &rqd->ppa_status;
- struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd);
- struct pblk_rec_ctx *recovery;
- struct ppa_addr *ppa_list = rqd->ppa_list;
- int nr_ppas = rqd->nr_ppas;
- unsigned int c_entries;
- int bit, ret;
+ struct nvm_tgt_dev *dev = pblk->dev;
+ struct nvm_geo *geo = &dev->geo;
+ struct pblk_line *line;
+ struct ppa_addr map_ppa = *ppa;
+ u64 paddr;
+ int done = 0;
- if (unlikely(nr_ppas == 1))
- ppa_list = &rqd->ppa_addr;
+ line = &pblk->lines[pblk_ppa_to_line(*ppa)];
+ spin_lock(&line->lock);
- recovery = mempool_alloc(pblk->rec_pool, GFP_ATOMIC);
+ while (!done) {
+ paddr = pblk_dev_ppa_to_line_addr(pblk, map_ppa);
- INIT_LIST_HEAD(&recovery->failed);
+ if (!test_and_set_bit(paddr, line->map_bitmap))
+ line->left_msecs--;
- bit = -1;
- while ((bit = find_next_bit(comp_bits, nr_ppas, bit + 1)) < nr_ppas) {
- struct pblk_rb_entry *entry;
- struct ppa_addr ppa;
+ if (!test_and_set_bit(paddr, line->invalid_bitmap))
+ le32_add_cpu(line->vsc, -1);
- /* Logic error */
- if (bit > c_ctx->nr_valid) {
- WARN_ONCE(1, "pblk: corrupted write request\n");
- mempool_free(recovery, pblk->rec_pool);
- goto out;
+ if (geo->version == NVM_OCSSD_SPEC_12) {
+ map_ppa.ppa++;
+ if (map_ppa.g.pg == geo->num_pg)
+ done = 1;
+ } else {
+ map_ppa.m.sec++;
+ if (map_ppa.m.sec == geo->clba)
+ done = 1;
}
+ }
- ppa = ppa_list[bit];
- entry = pblk_rb_sync_scan_entry(&pblk->rwb, &ppa);
- if (!entry) {
- pr_err("pblk: could not scan entry on write failure\n");
- mempool_free(recovery, pblk->rec_pool);
- goto out;
- }
+ line->w_err_gc->has_write_err = 1;
+ spin_unlock(&line->lock);
+}
- /* The list is filled first and emptied afterwards. No need for
- * protecting it with a lock
+static void pblk_prepare_resubmit(struct pblk *pblk, unsigned int sentry,
+ unsigned int nr_entries)
+{
+ struct pblk_rb *rb = &pblk->rwb;
+ struct pblk_rb_entry *entry;
+ struct pblk_line *line;
+ struct pblk_w_ctx *w_ctx;
+ struct ppa_addr ppa_l2p;
+ int flags;
+ unsigned int pos, i;
+
+ spin_lock(&pblk->trans_lock);
+ pos = sentry;
+ for (i = 0; i < nr_entries; i++) {
+ entry = &rb->entries[pos];
+ w_ctx = &entry->w_ctx;
+
+ /* Check if the lba has been overwritten */
+ ppa_l2p = pblk_trans_map_get(pblk, w_ctx->lba);
+ if (!pblk_ppa_comp(ppa_l2p, entry->cacheline))
+ w_ctx->lba = ADDR_EMPTY;
+
+ /* Mark up the entry as submittable again */
+ flags = READ_ONCE(w_ctx->flags);
+ flags |= PBLK_WRITTEN_DATA;
+ /* Release flags on write context. Protect from writes */
+ smp_store_release(&w_ctx->flags, flags);
+
+ /* Decrese the reference count to the line as we will
+ * re-map these entries
*/
- list_add_tail(&entry->index, &recovery->failed);
+ line = &pblk->lines[pblk_ppa_to_line(w_ctx->ppa)];
+ kref_put(&line->ref, pblk_line_put);
+
+ pos = (pos + 1) & (rb->nr_entries - 1);
}
+ spin_unlock(&pblk->trans_lock);
+}
- c_entries = find_first_bit(comp_bits, nr_ppas);
- ret = pblk_recov_setup_rq(pblk, c_ctx, recovery, comp_bits, c_entries);
- if (ret) {
- pr_err("pblk: could not recover from write failure\n");
- mempool_free(recovery, pblk->rec_pool);
- goto out;
+static void pblk_queue_resubmit(struct pblk *pblk, struct pblk_c_ctx *c_ctx)
+{
+ struct pblk_c_ctx *r_ctx;
+
+ r_ctx = kzalloc(sizeof(struct pblk_c_ctx), GFP_KERNEL);
+ if (!r_ctx)
+ return;
+
+ r_ctx->lun_bitmap = NULL;
+ r_ctx->sentry = c_ctx->sentry;
+ r_ctx->nr_valid = c_ctx->nr_valid;
+ r_ctx->nr_padded = c_ctx->nr_padded;
+
+ spin_lock(&pblk->resubmit_lock);
+ list_add_tail(&r_ctx->list, &pblk->resubmit_list);
+ spin_unlock(&pblk->resubmit_lock);
+
+#ifdef CONFIG_NVM_DEBUG
+ atomic_long_add(c_ctx->nr_valid, &pblk->recov_writes);
+#endif
+}
+
+static void pblk_submit_rec(struct work_struct *work)
+{
+ struct pblk_rec_ctx *recovery =
+ container_of(work, struct pblk_rec_ctx, ws_rec);
+ struct pblk *pblk = recovery->pblk;
+ struct nvm_rq *rqd = recovery->rqd;
+ struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd);
+ struct ppa_addr *ppa_list;
+
+ pblk_log_write_err(pblk, rqd);
+
+ if (rqd->nr_ppas == 1)
+ ppa_list = &rqd->ppa_addr;
+ else
+ ppa_list = rqd->ppa_list;
+
+ pblk_map_remaining(pblk, ppa_list);
+ pblk_queue_resubmit(pblk, c_ctx);
+
+ pblk_up_rq(pblk, rqd->ppa_list, rqd->nr_ppas, c_ctx->lun_bitmap);
+ if (c_ctx->nr_padded)
+ pblk_bio_free_pages(pblk, rqd->bio, c_ctx->nr_valid,
+ c_ctx->nr_padded);
+ bio_put(rqd->bio);
+ pblk_free_rqd(pblk, rqd, PBLK_WRITE);
+ mempool_free(recovery, &pblk->rec_pool);
+
+ atomic_dec(&pblk->inflight_io);
+}
+
+
+static void pblk_end_w_fail(struct pblk *pblk, struct nvm_rq *rqd)
+{
+ struct pblk_rec_ctx *recovery;
+
+ recovery = mempool_alloc(&pblk->rec_pool, GFP_ATOMIC);
+ if (!recovery) {
+ pr_err("pblk: could not allocate recovery work\n");
+ return;
}
+ recovery->pblk = pblk;
+ recovery->rqd = rqd;
+
INIT_WORK(&recovery->ws_rec, pblk_submit_rec);
queue_work(pblk->close_wq, &recovery->ws_rec);
-
-out:
- pblk_complete_write(pblk, rqd, c_ctx);
}
static void pblk_end_io_write(struct nvm_rq *rqd)
@@ -173,8 +255,8 @@ static void pblk_end_io_write(struct nvm_rq *rqd)
struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd);
if (rqd->error) {
- pblk_log_write_err(pblk, rqd);
- return pblk_end_w_fail(pblk, rqd);
+ pblk_end_w_fail(pblk, rqd);
+ return;
}
#ifdef CONFIG_NVM_DEBUG
else
@@ -198,6 +280,7 @@ static void pblk_end_io_write_meta(struct nvm_rq *rqd)
if (rqd->error) {
pblk_log_write_err(pblk, rqd);
pr_err("pblk: metadata I/O failed. Line %d\n", line->id);
+ line->w_err_gc->has_write_err = 1;
}
sync = atomic_add_return(rqd->nr_ppas, &emeta->sync);
@@ -266,31 +349,6 @@ static int pblk_setup_w_rq(struct pblk *pblk, struct nvm_rq *rqd,
return 0;
}
-int pblk_setup_w_rec_rq(struct pblk *pblk, struct nvm_rq *rqd,
- struct pblk_c_ctx *c_ctx)
-{
- struct pblk_line_meta *lm = &pblk->lm;
- unsigned long *lun_bitmap;
- int ret;
-
- lun_bitmap = kzalloc(lm->lun_bitmap_len, GFP_KERNEL);
- if (!lun_bitmap)
- return -ENOMEM;
-
- c_ctx->lun_bitmap = lun_bitmap;
-
- ret = pblk_alloc_w_rq(pblk, rqd, rqd->nr_ppas, pblk_end_io_write);
- if (ret)
- return ret;
-
- pblk_map_rq(pblk, rqd, c_ctx->sentry, lun_bitmap, c_ctx->nr_valid, 0);
-
- rqd->ppa_status = (u64)0;
- rqd->flags = pblk_set_progr_mode(pblk, PBLK_WRITE);
-
- return ret;
-}
-
static int pblk_calc_secs_to_sync(struct pblk *pblk, unsigned int secs_avail,
unsigned int secs_to_flush)
{
@@ -339,6 +397,7 @@ int pblk_submit_meta_io(struct pblk *pblk, struct pblk_line *meta_line)
bio = pblk_bio_map_addr(pblk, data, rq_ppas, rq_len,
l_mg->emeta_alloc_type, GFP_KERNEL);
if (IS_ERR(bio)) {
+ pr_err("pblk: failed to map emeta io");
ret = PTR_ERR(bio);
goto fail_free_rqd;
}
@@ -515,26 +574,54 @@ static int pblk_submit_write(struct pblk *pblk)
unsigned int secs_avail, secs_to_sync, secs_to_com;
unsigned int secs_to_flush;
unsigned long pos;
+ unsigned int resubmit;
- /* If there are no sectors in the cache, flushes (bios without data)
- * will be cleared on the cache threads
- */
- secs_avail = pblk_rb_read_count(&pblk->rwb);
- if (!secs_avail)
- return 1;
-
- secs_to_flush = pblk_rb_flush_point_count(&pblk->rwb);
- if (!secs_to_flush && secs_avail < pblk->min_write_pgs)
- return 1;
-
- secs_to_sync = pblk_calc_secs_to_sync(pblk, secs_avail, secs_to_flush);
- if (secs_to_sync > pblk->max_write_pgs) {
- pr_err("pblk: bad buffer sync calculation\n");
- return 1;
- }
+ spin_lock(&pblk->resubmit_lock);
+ resubmit = !list_empty(&pblk->resubmit_list);
+ spin_unlock(&pblk->resubmit_lock);
+
+ /* Resubmit failed writes first */
+ if (resubmit) {
+ struct pblk_c_ctx *r_ctx;
+
+ spin_lock(&pblk->resubmit_lock);
+ r_ctx = list_first_entry(&pblk->resubmit_list,
+ struct pblk_c_ctx, list);
+ list_del(&r_ctx->list);
+ spin_unlock(&pblk->resubmit_lock);
+
+ secs_avail = r_ctx->nr_valid;
+ pos = r_ctx->sentry;
+
+ pblk_prepare_resubmit(pblk, pos, secs_avail);
+ secs_to_sync = pblk_calc_secs_to_sync(pblk, secs_avail,
+ secs_avail);
- secs_to_com = (secs_to_sync > secs_avail) ? secs_avail : secs_to_sync;
- pos = pblk_rb_read_commit(&pblk->rwb, secs_to_com);
+ kfree(r_ctx);
+ } else {
+ /* If there are no sectors in the cache,
+ * flushes (bios without data) will be cleared on
+ * the cache threads
+ */
+ secs_avail = pblk_rb_read_count(&pblk->rwb);
+ if (!secs_avail)
+ return 1;
+
+ secs_to_flush = pblk_rb_flush_point_count(&pblk->rwb);
+ if (!secs_to_flush && secs_avail < pblk->min_write_pgs)
+ return 1;
+
+ secs_to_sync = pblk_calc_secs_to_sync(pblk, secs_avail,
+ secs_to_flush);
+ if (secs_to_sync > pblk->max_write_pgs) {
+ pr_err("pblk: bad buffer sync calculation\n");
+ return 1;
+ }
+
+ secs_to_com = (secs_to_sync > secs_avail) ?
+ secs_avail : secs_to_sync;
+ pos = pblk_rb_read_commit(&pblk->rwb, secs_to_com);
+ }
bio = bio_alloc(GFP_KERNEL, secs_to_sync);
diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h
index 9c682acfc5d1..34cc1d64a9d4 100644
--- a/drivers/lightnvm/pblk.h
+++ b/drivers/lightnvm/pblk.h
@@ -89,12 +89,14 @@ struct pblk_sec_meta {
/* The number of GC lists and the rate-limiter states go together. This way the
* rate-limiter can dictate how much GC is needed based on resource utilization.
*/
-#define PBLK_GC_NR_LISTS 3
+#define PBLK_GC_NR_LISTS 4
enum {
- PBLK_RL_HIGH = 1,
- PBLK_RL_MID = 2,
- PBLK_RL_LOW = 3,
+ PBLK_RL_OFF = 0,
+ PBLK_RL_WERR = 1,
+ PBLK_RL_HIGH = 2,
+ PBLK_RL_MID = 3,
+ PBLK_RL_LOW = 4
};
#define pblk_dma_meta_size (sizeof(struct pblk_sec_meta) * PBLK_MAX_REQ_ADDRS)
@@ -128,7 +130,6 @@ struct pblk_pad_rq {
struct pblk_rec_ctx {
struct pblk *pblk;
struct nvm_rq *rqd;
- struct list_head failed;
struct work_struct ws_rec;
};
@@ -279,6 +280,8 @@ struct pblk_rl {
int rb_user_active;
int rb_gc_active;
+ atomic_t werr_lines; /* Number of write error lines that needs gc */
+
struct timer_list u_timer;
unsigned long long nr_secs;
@@ -312,6 +315,7 @@ enum {
PBLK_LINEGC_MID = 23,
PBLK_LINEGC_HIGH = 24,
PBLK_LINEGC_FULL = 25,
+ PBLK_LINEGC_WERR = 26
};
#define PBLK_MAGIC 0x70626c6b /*pblk*/
@@ -413,6 +417,11 @@ struct pblk_smeta {
struct line_smeta *buf; /* smeta buffer in persistent format */
};
+struct pblk_w_err_gc {
+ int has_write_err;
+ __le64 *lba_list;
+};
+
struct pblk_line {
struct pblk *pblk;
unsigned int id; /* Line number corresponds to the
@@ -458,6 +467,8 @@ struct pblk_line {
struct kref ref; /* Write buffer L2P references */
+ struct pblk_w_err_gc *w_err_gc; /* Write error gc recovery metadata */
+
spinlock_t lock; /* Necessary for invalid_bitmap only */
};
@@ -489,6 +500,8 @@ struct pblk_line_mgmt {
struct list_head gc_mid_list; /* Full lines ready to GC, mid isc */
struct list_head gc_low_list; /* Full lines ready to GC, low isc */
+ struct list_head gc_werr_list; /* Write err recovery list */
+
struct list_head gc_full_list; /* Full lines ready to GC, no valid */
struct list_head gc_empty_list; /* Full lines close, all valid */
@@ -664,12 +677,15 @@ struct pblk {
struct list_head compl_list;
- mempool_t *page_bio_pool;
- mempool_t *gen_ws_pool;
- mempool_t *rec_pool;
- mempool_t *r_rq_pool;
- mempool_t *w_rq_pool;
- mempool_t *e_rq_pool;
+ spinlock_t resubmit_lock; /* Resubmit list lock */
+ struct list_head resubmit_list; /* Resubmit list for failed writes*/
+
+ mempool_t page_bio_pool;
+ mempool_t gen_ws_pool;
+ mempool_t rec_pool;
+ mempool_t r_rq_pool;
+ mempool_t w_rq_pool;
+ mempool_t e_rq_pool;
struct workqueue_struct *close_wq;
struct workqueue_struct *bb_wq;
@@ -713,9 +729,6 @@ void pblk_rb_sync_l2p(struct pblk_rb *rb);
unsigned int pblk_rb_read_to_bio(struct pblk_rb *rb, struct nvm_rq *rqd,
unsigned int pos, unsigned int nr_entries,
unsigned int count);
-unsigned int pblk_rb_read_to_bio_list(struct pblk_rb *rb, struct bio *bio,
- struct list_head *list,
- unsigned int max);
int pblk_rb_copy_to_bio(struct pblk_rb *rb, struct bio *bio, sector_t lba,
struct ppa_addr ppa, int bio_iter, bool advanced_bio);
unsigned int pblk_rb_read_commit(struct pblk_rb *rb, unsigned int entries);
@@ -766,11 +779,13 @@ struct pblk_line *pblk_line_get_data(struct pblk *pblk);
struct pblk_line *pblk_line_get_erase(struct pblk *pblk);
int pblk_line_erase(struct pblk *pblk, struct pblk_line *line);
int pblk_line_is_full(struct pblk_line *line);
-void pblk_line_free(struct pblk *pblk, struct pblk_line *line);
+void pblk_line_free(struct pblk_line *line);
void pblk_line_close_meta(struct pblk *pblk, struct pblk_line *line);
void pblk_line_close(struct pblk *pblk, struct pblk_line *line);
void pblk_line_close_ws(struct work_struct *work);
void pblk_pipeline_stop(struct pblk *pblk);
+void __pblk_pipeline_stop(struct pblk *pblk);
+void __pblk_pipeline_flush(struct pblk *pblk);
void pblk_gen_run_ws(struct pblk *pblk, struct pblk_line *line, void *priv,
void (*work)(struct work_struct *), gfp_t gfp_mask,
struct workqueue_struct *wq);
@@ -794,7 +809,6 @@ void pblk_down_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas,
void pblk_down_page(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas);
void pblk_up_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas,
unsigned long *lun_bitmap);
-void pblk_end_io_sync(struct nvm_rq *rqd);
int pblk_bio_add_pages(struct pblk *pblk, struct bio *bio, gfp_t flags,
int nr_pages);
void pblk_bio_free_pages(struct pblk *pblk, struct bio *bio, int off,
@@ -837,23 +851,20 @@ void pblk_map_rq(struct pblk *pblk, struct nvm_rq *rqd, unsigned int sentry,
int pblk_write_ts(void *data);
void pblk_write_timer_fn(struct timer_list *t);
void pblk_write_should_kick(struct pblk *pblk);
+void pblk_write_kick(struct pblk *pblk);
/*
* pblk read path
*/
-extern struct bio_set *pblk_bio_set;
+extern struct bio_set pblk_bio_set;
int pblk_submit_read(struct pblk *pblk, struct bio *bio);
int pblk_submit_read_gc(struct pblk *pblk, struct pblk_gc_rq *gc_rq);
/*
* pblk recovery
*/
-void pblk_submit_rec(struct work_struct *work);
struct pblk_line *pblk_recov_l2p(struct pblk *pblk);
int pblk_recov_pad(struct pblk *pblk);
int pblk_recov_check_emeta(struct pblk *pblk, struct line_emeta *emeta);
-int pblk_recov_setup_rq(struct pblk *pblk, struct pblk_c_ctx *c_ctx,
- struct pblk_rec_ctx *recovery, u64 *comp_bits,
- unsigned int comp);
/*
* pblk gc
@@ -864,7 +875,7 @@ int pblk_recov_setup_rq(struct pblk *pblk, struct pblk_c_ctx *c_ctx,
#define PBLK_GC_RSV_LINE 1 /* Reserved lines for GC */
int pblk_gc_init(struct pblk *pblk);
-void pblk_gc_exit(struct pblk *pblk);
+void pblk_gc_exit(struct pblk *pblk, bool graceful);
void pblk_gc_should_start(struct pblk *pblk);
void pblk_gc_should_stop(struct pblk *pblk);
void pblk_gc_should_kick(struct pblk *pblk);
@@ -894,6 +905,9 @@ void pblk_rl_free_lines_dec(struct pblk_rl *rl, struct pblk_line *line,
bool used);
int pblk_rl_is_limit(struct pblk_rl *rl);
+void pblk_rl_werr_line_in(struct pblk_rl *rl);
+void pblk_rl_werr_line_out(struct pblk_rl *rl);
+
/*
* pblk sysfs
*/
diff --git a/drivers/macintosh/via-pmu.c b/drivers/macintosh/via-pmu.c
index 433dbeddfcf9..6663893f41c4 100644
--- a/drivers/macintosh/via-pmu.c
+++ b/drivers/macintosh/via-pmu.c
@@ -191,10 +191,10 @@ static int init_pmu(void);
static void pmu_start(void);
static irqreturn_t via_pmu_interrupt(int irq, void *arg);
static irqreturn_t gpio1_interrupt(int irq, void *arg);
-static const struct file_operations pmu_info_proc_fops;
-static const struct file_operations pmu_irqstats_proc_fops;
+static int pmu_info_proc_show(struct seq_file *m, void *v);
+static int pmu_irqstats_proc_show(struct seq_file *m, void *v);
+static int pmu_battery_proc_show(struct seq_file *m, void *v);
static void pmu_pass_intr(unsigned char *data, int len);
-static const struct file_operations pmu_battery_proc_fops;
static const struct file_operations pmu_options_proc_fops;
#ifdef CONFIG_ADB
@@ -511,13 +511,15 @@ static int __init via_pmu_dev_init(void)
for (i=0; i<pmu_battery_count; i++) {
char title[16];
sprintf(title, "battery_%ld", i);
- proc_pmu_batt[i] = proc_create_data(title, 0, proc_pmu_root,
- &pmu_battery_proc_fops, (void *)i);
+ proc_pmu_batt[i] = proc_create_single_data(title, 0,
+ proc_pmu_root, pmu_battery_proc_show,
+ (void *)i);
}
- proc_pmu_info = proc_create("info", 0, proc_pmu_root, &pmu_info_proc_fops);
- proc_pmu_irqstats = proc_create("interrupts", 0, proc_pmu_root,
- &pmu_irqstats_proc_fops);
+ proc_pmu_info = proc_create_single("info", 0, proc_pmu_root,
+ pmu_info_proc_show);
+ proc_pmu_irqstats = proc_create_single("interrupts", 0,
+ proc_pmu_root, pmu_irqstats_proc_show);
proc_pmu_options = proc_create("options", 0600, proc_pmu_root,
&pmu_options_proc_fops);
}
@@ -811,19 +813,6 @@ static int pmu_info_proc_show(struct seq_file *m, void *v)
return 0;
}
-static int pmu_info_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, pmu_info_proc_show, NULL);
-}
-
-static const struct file_operations pmu_info_proc_fops = {
- .owner = THIS_MODULE,
- .open = pmu_info_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
static int pmu_irqstats_proc_show(struct seq_file *m, void *v)
{
int i;
@@ -848,19 +837,6 @@ static int pmu_irqstats_proc_show(struct seq_file *m, void *v)
return 0;
}
-static int pmu_irqstats_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, pmu_irqstats_proc_show, NULL);
-}
-
-static const struct file_operations pmu_irqstats_proc_fops = {
- .owner = THIS_MODULE,
- .open = pmu_irqstats_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
static int pmu_battery_proc_show(struct seq_file *m, void *v)
{
long batnum = (long)m->private;
@@ -875,19 +851,6 @@ static int pmu_battery_proc_show(struct seq_file *m, void *v)
return 0;
}
-static int pmu_battery_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, pmu_battery_proc_show, PDE_DATA(inode));
-}
-
-static const struct file_operations pmu_battery_proc_fops = {
- .owner = THIS_MODULE,
- .open = pmu_battery_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
static int pmu_options_proc_show(struct seq_file *m, void *v)
{
#if defined(CONFIG_SUSPEND) && defined(CONFIG_PPC32)
diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c
index 004cc3cc6123..7fa2631b422c 100644
--- a/drivers/md/bcache/alloc.c
+++ b/drivers/md/bcache/alloc.c
@@ -290,7 +290,7 @@ do { \
if (kthread_should_stop() || \
test_bit(CACHE_SET_IO_DISABLE, &ca->set->flags)) { \
set_current_state(TASK_RUNNING); \
- return 0; \
+ goto out; \
} \
\
schedule(); \
@@ -378,6 +378,9 @@ retry_invalidate:
bch_prio_write(ca);
}
}
+out:
+ wait_for_kthread_stop();
+ return 0;
}
/* Allocation */
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
index d338b7086013..d6bf294f3907 100644
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
@@ -269,7 +269,7 @@ struct bcache_device {
atomic_t *stripe_sectors_dirty;
unsigned long *full_dirty_stripes;
- struct bio_set *bio_split;
+ struct bio_set bio_split;
unsigned data_csum:1;
@@ -345,6 +345,7 @@ struct cached_dev {
struct keybuf writeback_keys;
+ struct task_struct *status_update_thread;
/*
* Order the write-half of writeback operations strongly in dispatch
* order. (Maintain LBA order; don't allow reads completing out of
@@ -392,6 +393,9 @@ struct cached_dev {
#define DEFAULT_CACHED_DEV_ERROR_LIMIT 64
atomic_t io_errors;
unsigned error_limit;
+ unsigned offline_seconds;
+
+ char backing_dev_name[BDEVNAME_SIZE];
};
enum alloc_reserve {
@@ -464,6 +468,8 @@ struct cache {
atomic_long_t meta_sectors_written;
atomic_long_t btree_sectors_written;
atomic_long_t sectors_written;
+
+ char cache_dev_name[BDEVNAME_SIZE];
};
struct gc_stat {
@@ -524,9 +530,9 @@ struct cache_set {
struct closure sb_write;
struct semaphore sb_write_mutex;
- mempool_t *search;
- mempool_t *bio_meta;
- struct bio_set *bio_split;
+ mempool_t search;
+ mempool_t bio_meta;
+ struct bio_set bio_split;
/* For the btree cache */
struct shrinker shrink;
@@ -651,7 +657,7 @@ struct cache_set {
* A btree node on disk could have too many bsets for an iterator to fit
* on the stack - have to dynamically allocate them
*/
- mempool_t *fill_iter;
+ mempool_t fill_iter;
struct bset_sort_state sort;
@@ -952,8 +958,6 @@ void bch_prio_write(struct cache *);
void bch_write_bdev_super(struct cached_dev *, struct closure *);
extern struct workqueue_struct *bcache_wq;
-extern const char * const bch_cache_modes[];
-extern const char * const bch_stop_on_failure_modes[];
extern struct mutex bch_register_lock;
extern struct list_head bch_cache_sets;
diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c
index 579c696a5fe0..f3403b45bc28 100644
--- a/drivers/md/bcache/bset.c
+++ b/drivers/md/bcache/bset.c
@@ -1118,8 +1118,7 @@ struct bkey *bch_btree_iter_next_filter(struct btree_iter *iter,
void bch_bset_sort_state_free(struct bset_sort_state *state)
{
- if (state->pool)
- mempool_destroy(state->pool);
+ mempool_exit(&state->pool);
}
int bch_bset_sort_state_init(struct bset_sort_state *state, unsigned page_order)
@@ -1129,11 +1128,7 @@ int bch_bset_sort_state_init(struct bset_sort_state *state, unsigned page_order)
state->page_order = page_order;
state->crit_factor = int_sqrt(1 << page_order);
- state->pool = mempool_create_page_pool(1, page_order);
- if (!state->pool)
- return -ENOMEM;
-
- return 0;
+ return mempool_init_page_pool(&state->pool, 1, page_order);
}
EXPORT_SYMBOL(bch_bset_sort_state_init);
@@ -1191,7 +1186,7 @@ static void __btree_sort(struct btree_keys *b, struct btree_iter *iter,
BUG_ON(order > state->page_order);
- outp = mempool_alloc(state->pool, GFP_NOIO);
+ outp = mempool_alloc(&state->pool, GFP_NOIO);
out = page_address(outp);
used_mempool = true;
order = state->page_order;
@@ -1220,7 +1215,7 @@ static void __btree_sort(struct btree_keys *b, struct btree_iter *iter,
}
if (used_mempool)
- mempool_free(virt_to_page(out), state->pool);
+ mempool_free(virt_to_page(out), &state->pool);
else
free_pages((unsigned long) out, order);
diff --git a/drivers/md/bcache/bset.h b/drivers/md/bcache/bset.h
index 0c24280f3b98..b867f2200495 100644
--- a/drivers/md/bcache/bset.h
+++ b/drivers/md/bcache/bset.h
@@ -347,7 +347,7 @@ static inline struct bkey *bch_bset_search(struct btree_keys *b,
/* Sorting */
struct bset_sort_state {
- mempool_t *pool;
+ mempool_t pool;
unsigned page_order;
unsigned crit_factor;
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index 17936b2dc7d6..2a0968c04e21 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -204,7 +204,7 @@ void bch_btree_node_read_done(struct btree *b)
struct bset *i = btree_bset_first(b);
struct btree_iter *iter;
- iter = mempool_alloc(b->c->fill_iter, GFP_NOIO);
+ iter = mempool_alloc(&b->c->fill_iter, GFP_NOIO);
iter->size = b->c->sb.bucket_size / b->c->sb.block_size;
iter->used = 0;
@@ -271,7 +271,7 @@ void bch_btree_node_read_done(struct btree *b)
bch_bset_init_next(&b->keys, write_block(b),
bset_magic(&b->c->sb));
out:
- mempool_free(iter, b->c->fill_iter);
+ mempool_free(iter, &b->c->fill_iter);
return;
err:
set_btree_node_io_error(b);
diff --git a/drivers/md/bcache/debug.c b/drivers/md/bcache/debug.c
index 028f7b386e01..d030ce3025a6 100644
--- a/drivers/md/bcache/debug.c
+++ b/drivers/md/bcache/debug.c
@@ -106,7 +106,6 @@ void bch_btree_verify(struct btree *b)
void bch_data_verify(struct cached_dev *dc, struct bio *bio)
{
- char name[BDEVNAME_SIZE];
struct bio *check;
struct bio_vec bv, cbv;
struct bvec_iter iter, citer = { 0 };
@@ -134,7 +133,7 @@ void bch_data_verify(struct cached_dev *dc, struct bio *bio)
bv.bv_len),
dc->disk.c,
"verify failed at dev %s sector %llu",
- bdevname(dc->bdev, name),
+ dc->backing_dev_name,
(uint64_t) bio->bi_iter.bi_sector);
kunmap_atomic(p1);
@@ -251,7 +250,9 @@ void bch_debug_exit(void)
int __init bch_debug_init(struct kobject *kobj)
{
- bcache_debug = debugfs_create_dir("bcache", NULL);
+ if (!IS_ENABLED(CONFIG_DEBUG_FS))
+ return 0;
+ bcache_debug = debugfs_create_dir("bcache", NULL);
return IS_ERR_OR_NULL(bcache_debug);
}
diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c
index 7fac97ae036e..9612873afee2 100644
--- a/drivers/md/bcache/io.c
+++ b/drivers/md/bcache/io.c
@@ -17,12 +17,12 @@
void bch_bbio_free(struct bio *bio, struct cache_set *c)
{
struct bbio *b = container_of(bio, struct bbio, bio);
- mempool_free(b, c->bio_meta);
+ mempool_free(b, &c->bio_meta);
}
struct bio *bch_bbio_alloc(struct cache_set *c)
{
- struct bbio *b = mempool_alloc(c->bio_meta, GFP_NOIO);
+ struct bbio *b = mempool_alloc(&c->bio_meta, GFP_NOIO);
struct bio *bio = &b->bio;
bio_init(bio, bio->bi_inline_vecs, bucket_pages(c));
@@ -52,7 +52,6 @@ void bch_submit_bbio(struct bio *bio, struct cache_set *c,
/* IO errors */
void bch_count_backing_io_errors(struct cached_dev *dc, struct bio *bio)
{
- char buf[BDEVNAME_SIZE];
unsigned errors;
WARN_ONCE(!dc, "NULL pointer of struct cached_dev");
@@ -60,7 +59,7 @@ void bch_count_backing_io_errors(struct cached_dev *dc, struct bio *bio)
errors = atomic_add_return(1, &dc->io_errors);
if (errors < dc->error_limit)
pr_err("%s: IO error on backing device, unrecoverable",
- bio_devname(bio, buf));
+ dc->backing_dev_name);
else
bch_cached_dev_error(dc);
}
@@ -105,19 +104,18 @@ void bch_count_io_errors(struct cache *ca,
}
if (error) {
- char buf[BDEVNAME_SIZE];
unsigned errors = atomic_add_return(1 << IO_ERROR_SHIFT,
&ca->io_errors);
errors >>= IO_ERROR_SHIFT;
if (errors < ca->set->error_limit)
pr_err("%s: IO error on %s%s",
- bdevname(ca->bdev, buf), m,
+ ca->cache_dev_name, m,
is_read ? ", recovering." : ".");
else
bch_cache_set_error(ca->set,
"%s: too many IO errors %s",
- bdevname(ca->bdev, buf), m);
+ ca->cache_dev_name, m);
}
}
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index a65e3365eeb9..ae67f5fa8047 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -213,7 +213,7 @@ static void bch_data_insert_start(struct closure *cl)
do {
unsigned i;
struct bkey *k;
- struct bio_set *split = op->c->bio_split;
+ struct bio_set *split = &op->c->bio_split;
/* 1 for the device pointer and 1 for the chksum */
if (bch_keylist_realloc(&op->insert_keys,
@@ -548,7 +548,7 @@ static int cache_lookup_fn(struct btree_op *op, struct btree *b, struct bkey *k)
n = bio_next_split(bio, min_t(uint64_t, INT_MAX,
KEY_OFFSET(k) - bio->bi_iter.bi_sector),
- GFP_NOIO, s->d->bio_split);
+ GFP_NOIO, &s->d->bio_split);
bio_key = &container_of(n, struct bbio, bio)->key;
bch_bkey_copy_single_ptr(bio_key, k, ptr);
@@ -649,11 +649,8 @@ static void backing_request_endio(struct bio *bio)
*/
if (unlikely(s->iop.writeback &&
bio->bi_opf & REQ_PREFLUSH)) {
- char buf[BDEVNAME_SIZE];
-
- bio_devname(bio, buf);
pr_err("Can't flush %s: returned bi_status %i",
- buf, bio->bi_status);
+ dc->backing_dev_name, bio->bi_status);
} else {
/* set to orig_bio->bi_status in bio_complete() */
s->iop.status = bio->bi_status;
@@ -710,7 +707,7 @@ static void search_free(struct closure *cl)
bio_complete(s);
closure_debug_destroy(cl);
- mempool_free(s, s->d->c->search);
+ mempool_free(s, &s->d->c->search);
}
static inline struct search *search_alloc(struct bio *bio,
@@ -718,7 +715,7 @@ static inline struct search *search_alloc(struct bio *bio,
{
struct search *s;
- s = mempool_alloc(d->c->search, GFP_NOIO);
+ s = mempool_alloc(&d->c->search, GFP_NOIO);
closure_init(&s->cl, NULL);
do_bio_hook(s, bio, request_endio);
@@ -867,7 +864,7 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s,
s->cache_missed = 1;
if (s->cache_miss || s->iop.bypass) {
- miss = bio_next_split(bio, sectors, GFP_NOIO, s->d->bio_split);
+ miss = bio_next_split(bio, sectors, GFP_NOIO, &s->d->bio_split);
ret = miss == bio ? MAP_DONE : MAP_CONTINUE;
goto out_submit;
}
@@ -890,14 +887,14 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s,
s->iop.replace = true;
- miss = bio_next_split(bio, sectors, GFP_NOIO, s->d->bio_split);
+ miss = bio_next_split(bio, sectors, GFP_NOIO, &s->d->bio_split);
/* btree_search_recurse()'s btree iterator is no good anymore */
ret = miss == bio ? MAP_DONE : -EINTR;
cache_bio = bio_alloc_bioset(GFP_NOWAIT,
DIV_ROUND_UP(s->insert_bio_sectors, PAGE_SECTORS),
- dc->disk.bio_split);
+ &dc->disk.bio_split);
if (!cache_bio)
goto out_submit;
@@ -1011,7 +1008,7 @@ static void cached_dev_write(struct cached_dev *dc, struct search *s)
struct bio *flush;
flush = bio_alloc_bioset(GFP_NOIO, 0,
- dc->disk.bio_split);
+ &dc->disk.bio_split);
if (!flush) {
s->iop.status = BLK_STS_RESOURCE;
goto insert_data;
@@ -1024,7 +1021,7 @@ static void cached_dev_write(struct cached_dev *dc, struct search *s)
closure_bio_submit(s->iop.c, flush, cl);
}
} else {
- s->iop.bio = bio_clone_fast(bio, GFP_NOIO, dc->disk.bio_split);
+ s->iop.bio = bio_clone_fast(bio, GFP_NOIO, &dc->disk.bio_split);
/* I/O request sent to backing device */
bio->bi_end_io = backing_request_endio;
closure_bio_submit(s->iop.c, bio, cl);
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index d90d9e59ca00..a31e55bcc4e5 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -37,24 +37,6 @@ static const char invalid_uuid[] = {
0xc8, 0x50, 0xfc, 0x5e, 0xcb, 0x16, 0xcd, 0x99
};
-/* Default is -1; we skip past it for struct cached_dev's cache mode */
-const char * const bch_cache_modes[] = {
- "default",
- "writethrough",
- "writeback",
- "writearound",
- "none",
- NULL
-};
-
-/* Default is -1; we skip past it for stop_when_cache_set_failed */
-const char * const bch_stop_on_failure_modes[] = {
- "default",
- "auto",
- "always",
- NULL
-};
-
static struct kobject *bcache_kobj;
struct mutex bch_register_lock;
LIST_HEAD(bch_cache_sets);
@@ -654,6 +636,11 @@ static int ioctl_dev(struct block_device *b, fmode_t mode,
unsigned int cmd, unsigned long arg)
{
struct bcache_device *d = b->bd_disk->private_data;
+ struct cached_dev *dc = container_of(d, struct cached_dev, disk);
+
+ if (dc->io_disable)
+ return -EIO;
+
return d->ioctl(d, mode, cmd, arg);
}
@@ -766,8 +753,7 @@ static void bcache_device_free(struct bcache_device *d)
put_disk(d->disk);
}
- if (d->bio_split)
- bioset_free(d->bio_split);
+ bioset_exit(&d->bio_split);
kvfree(d->full_dirty_stripes);
kvfree(d->stripe_sectors_dirty);
@@ -809,9 +795,8 @@ static int bcache_device_init(struct bcache_device *d, unsigned block_size,
if (idx < 0)
return idx;
- if (!(d->bio_split = bioset_create(4, offsetof(struct bbio, bio),
- BIOSET_NEED_BVECS |
- BIOSET_NEED_RESCUER)) ||
+ if (bioset_init(&d->bio_split, 4, offsetof(struct bbio, bio),
+ BIOSET_NEED_BVECS|BIOSET_NEED_RESCUER) ||
!(d->disk = alloc_disk(BCACHE_MINORS))) {
ida_simple_remove(&bcache_device_idx, idx);
return -ENOMEM;
@@ -864,6 +849,44 @@ static void calc_cached_dev_sectors(struct cache_set *c)
c->cached_dev_sectors = sectors;
}
+#define BACKING_DEV_OFFLINE_TIMEOUT 5
+static int cached_dev_status_update(void *arg)
+{
+ struct cached_dev *dc = arg;
+ struct request_queue *q;
+
+ /*
+ * If this delayed worker is stopping outside, directly quit here.
+ * dc->io_disable might be set via sysfs interface, so check it
+ * here too.
+ */
+ while (!kthread_should_stop() && !dc->io_disable) {
+ q = bdev_get_queue(dc->bdev);
+ if (blk_queue_dying(q))
+ dc->offline_seconds++;
+ else
+ dc->offline_seconds = 0;
+
+ if (dc->offline_seconds >= BACKING_DEV_OFFLINE_TIMEOUT) {
+ pr_err("%s: device offline for %d seconds",
+ dc->backing_dev_name,
+ BACKING_DEV_OFFLINE_TIMEOUT);
+ pr_err("%s: disable I/O request due to backing "
+ "device offline", dc->disk.name);
+ dc->io_disable = true;
+ /* let others know earlier that io_disable is true */
+ smp_mb();
+ bcache_device_stop(&dc->disk);
+ break;
+ }
+ schedule_timeout_interruptible(HZ);
+ }
+
+ wait_for_kthread_stop();
+ return 0;
+}
+
+
void bch_cached_dev_run(struct cached_dev *dc)
{
struct bcache_device *d = &dc->disk;
@@ -906,6 +929,14 @@ void bch_cached_dev_run(struct cached_dev *dc)
if (sysfs_create_link(&d->kobj, &disk_to_dev(d->disk)->kobj, "dev") ||
sysfs_create_link(&disk_to_dev(d->disk)->kobj, &d->kobj, "bcache"))
pr_debug("error creating sysfs link");
+
+ dc->status_update_thread = kthread_run(cached_dev_status_update,
+ dc, "bcache_status_update");
+ if (IS_ERR(dc->status_update_thread)) {
+ pr_warn("failed to create bcache_status_update kthread, "
+ "continue to run without monitoring backing "
+ "device status");
+ }
}
/*
@@ -936,7 +967,6 @@ static void cancel_writeback_rate_update_dwork(struct cached_dev *dc)
static void cached_dev_detach_finish(struct work_struct *w)
{
struct cached_dev *dc = container_of(w, struct cached_dev, detach);
- char buf[BDEVNAME_SIZE];
struct closure cl;
closure_init_stack(&cl);
@@ -967,7 +997,7 @@ static void cached_dev_detach_finish(struct work_struct *w)
mutex_unlock(&bch_register_lock);
- pr_info("Caching disabled for %s", bdevname(dc->bdev, buf));
+ pr_info("Caching disabled for %s", dc->backing_dev_name);
/* Drop ref we took in cached_dev_detach() */
closure_put(&dc->disk.cl);
@@ -999,29 +1029,28 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c,
{
uint32_t rtime = cpu_to_le32(get_seconds());
struct uuid_entry *u;
- char buf[BDEVNAME_SIZE];
struct cached_dev *exist_dc, *t;
- bdevname(dc->bdev, buf);
-
if ((set_uuid && memcmp(set_uuid, c->sb.set_uuid, 16)) ||
(!set_uuid && memcmp(dc->sb.set_uuid, c->sb.set_uuid, 16)))
return -ENOENT;
if (dc->disk.c) {
- pr_err("Can't attach %s: already attached", buf);
+ pr_err("Can't attach %s: already attached",
+ dc->backing_dev_name);
return -EINVAL;
}
if (test_bit(CACHE_SET_STOPPING, &c->flags)) {
- pr_err("Can't attach %s: shutting down", buf);
+ pr_err("Can't attach %s: shutting down",
+ dc->backing_dev_name);
return -EINVAL;
}
if (dc->sb.block_size < c->sb.block_size) {
/* Will die */
pr_err("Couldn't attach %s: block size less than set's block size",
- buf);
+ dc->backing_dev_name);
return -EINVAL;
}
@@ -1029,7 +1058,7 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c,
list_for_each_entry_safe(exist_dc, t, &c->cached_devs, list) {
if (!memcmp(dc->sb.uuid, exist_dc->sb.uuid, 16)) {
pr_err("Tried to attach %s but duplicate UUID already attached",
- buf);
+ dc->backing_dev_name);
return -EINVAL;
}
@@ -1047,13 +1076,15 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c,
if (!u) {
if (BDEV_STATE(&dc->sb) == BDEV_STATE_DIRTY) {
- pr_err("Couldn't find uuid for %s in set", buf);
+ pr_err("Couldn't find uuid for %s in set",
+ dc->backing_dev_name);
return -ENOENT;
}
u = uuid_find_empty(c);
if (!u) {
- pr_err("Not caching %s, no room for UUID", buf);
+ pr_err("Not caching %s, no room for UUID",
+ dc->backing_dev_name);
return -EINVAL;
}
}
@@ -1112,7 +1143,8 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c,
up_write(&dc->writeback_lock);
pr_info("Caching %s as %s on set %pU",
- bdevname(dc->bdev, buf), dc->disk.disk->disk_name,
+ dc->backing_dev_name,
+ dc->disk.disk->disk_name,
dc->disk.c->sb.set_uuid);
return 0;
}
@@ -1138,6 +1170,8 @@ static void cached_dev_free(struct closure *cl)
kthread_stop(dc->writeback_thread);
if (dc->writeback_write_wq)
destroy_workqueue(dc->writeback_write_wq);
+ if (!IS_ERR_OR_NULL(dc->status_update_thread))
+ kthread_stop(dc->status_update_thread);
if (atomic_read(&dc->running))
bd_unlink_disk_holder(dc->bdev, dc->disk.disk);
@@ -1225,10 +1259,10 @@ static void register_bdev(struct cache_sb *sb, struct page *sb_page,
struct block_device *bdev,
struct cached_dev *dc)
{
- char name[BDEVNAME_SIZE];
const char *err = "cannot allocate memory";
struct cache_set *c;
+ bdevname(bdev, dc->backing_dev_name);
memcpy(&dc->sb, sb, sizeof(struct cache_sb));
dc->bdev = bdev;
dc->bdev->bd_holder = dc;
@@ -1237,6 +1271,7 @@ static void register_bdev(struct cache_sb *sb, struct page *sb_page,
bio_first_bvec_all(&dc->sb_bio)->bv_page = sb_page;
get_page(sb_page);
+
if (cached_dev_init(dc, sb->block_size << 9))
goto err;
@@ -1247,7 +1282,7 @@ static void register_bdev(struct cache_sb *sb, struct page *sb_page,
if (bch_cache_accounting_add_kobjs(&dc->accounting, &dc->disk.kobj))
goto err;
- pr_info("registered backing device %s", bdevname(bdev, name));
+ pr_info("registered backing device %s", dc->backing_dev_name);
list_add(&dc->list, &uncached_devices);
list_for_each_entry(c, &bch_cache_sets, list)
@@ -1259,7 +1294,7 @@ static void register_bdev(struct cache_sb *sb, struct page *sb_page,
return;
err:
- pr_notice("error %s: %s", bdevname(bdev, name), err);
+ pr_notice("error %s: %s", dc->backing_dev_name, err);
bcache_device_stop(&dc->disk);
}
@@ -1367,7 +1402,7 @@ int bch_flash_dev_create(struct cache_set *c, uint64_t size)
bool bch_cached_dev_error(struct cached_dev *dc)
{
- char name[BDEVNAME_SIZE];
+ struct cache_set *c;
if (!dc || test_bit(BCACHE_DEV_CLOSING, &dc->disk.flags))
return false;
@@ -1377,7 +1412,22 @@ bool bch_cached_dev_error(struct cached_dev *dc)
smp_mb();
pr_err("stop %s: too many IO errors on backing device %s\n",
- dc->disk.disk->disk_name, bdevname(dc->bdev, name));
+ dc->disk.disk->disk_name, dc->backing_dev_name);
+
+ /*
+ * If the cached device is still attached to a cache set,
+ * even dc->io_disable is true and no more I/O requests
+ * accepted, cache device internal I/O (writeback scan or
+ * garbage collection) may still prevent bcache device from
+ * being stopped. So here CACHE_SET_IO_DISABLE should be
+ * set to c->flags too, to make the internal I/O to cache
+ * device rejected and stopped immediately.
+ * If c is NULL, that means the bcache device is not attached
+ * to any cache set, then no CACHE_SET_IO_DISABLE bit to set.
+ */
+ c = dc->disk.c;
+ if (c && test_and_set_bit(CACHE_SET_IO_DISABLE, &c->flags))
+ pr_info("CACHE_SET_IO_DISABLE already set");
bcache_device_stop(&dc->disk);
return true;
@@ -1395,7 +1445,7 @@ bool bch_cache_set_error(struct cache_set *c, const char *fmt, ...)
return false;
if (test_and_set_bit(CACHE_SET_IO_DISABLE, &c->flags))
- pr_warn("CACHE_SET_IO_DISABLE already set");
+ pr_info("CACHE_SET_IO_DISABLE already set");
/* XXX: we can be called from atomic context
acquire_console_sem();
@@ -1448,14 +1498,10 @@ static void cache_set_free(struct closure *cl)
if (c->moving_gc_wq)
destroy_workqueue(c->moving_gc_wq);
- if (c->bio_split)
- bioset_free(c->bio_split);
- if (c->fill_iter)
- mempool_destroy(c->fill_iter);
- if (c->bio_meta)
- mempool_destroy(c->bio_meta);
- if (c->search)
- mempool_destroy(c->search);
+ bioset_exit(&c->bio_split);
+ mempool_exit(&c->fill_iter);
+ mempool_exit(&c->bio_meta);
+ mempool_exit(&c->search);
kfree(c->devices);
mutex_lock(&bch_register_lock);
@@ -1539,6 +1585,20 @@ static void conditional_stop_bcache_device(struct cache_set *c,
*/
pr_warn("stop_when_cache_set_failed of %s is \"auto\" and cache is dirty, stop it to avoid potential data corruption.",
d->disk->disk_name);
+ /*
+ * There might be a small time gap that cache set is
+ * released but bcache device is not. Inside this time
+ * gap, regular I/O requests will directly go into
+ * backing device as no cache set attached to. This
+ * behavior may also introduce potential inconsistence
+ * data in writeback mode while cache is dirty.
+ * Therefore before calling bcache_device_stop() due
+ * to a broken cache device, dc->io_disable should be
+ * explicitly set to true.
+ */
+ dc->io_disable = true;
+ /* make others know io_disable is true earlier */
+ smp_mb();
bcache_device_stop(d);
} else {
/*
@@ -1652,21 +1712,17 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
INIT_LIST_HEAD(&c->btree_cache_freed);
INIT_LIST_HEAD(&c->data_buckets);
- c->search = mempool_create_slab_pool(32, bch_search_cache);
- if (!c->search)
- goto err;
-
iter_size = (sb->bucket_size / sb->block_size + 1) *
sizeof(struct btree_iter_set);
if (!(c->devices = kzalloc(c->nr_uuids * sizeof(void *), GFP_KERNEL)) ||
- !(c->bio_meta = mempool_create_kmalloc_pool(2,
- sizeof(struct bbio) + sizeof(struct bio_vec) *
- bucket_pages(c))) ||
- !(c->fill_iter = mempool_create_kmalloc_pool(1, iter_size)) ||
- !(c->bio_split = bioset_create(4, offsetof(struct bbio, bio),
- BIOSET_NEED_BVECS |
- BIOSET_NEED_RESCUER)) ||
+ mempool_init_slab_pool(&c->search, 32, bch_search_cache) ||
+ mempool_init_kmalloc_pool(&c->bio_meta, 2,
+ sizeof(struct bbio) + sizeof(struct bio_vec) *
+ bucket_pages(c)) ||
+ mempool_init_kmalloc_pool(&c->fill_iter, 1, iter_size) ||
+ bioset_init(&c->bio_split, 4, offsetof(struct bbio, bio),
+ BIOSET_NEED_BVECS|BIOSET_NEED_RESCUER) ||
!(c->uuids = alloc_bucket_pages(GFP_KERNEL, c)) ||
!(c->moving_gc_wq = alloc_workqueue("bcache_gc",
WQ_MEM_RECLAIM, 0)) ||
@@ -2003,12 +2059,10 @@ static int cache_alloc(struct cache *ca)
static int register_cache(struct cache_sb *sb, struct page *sb_page,
struct block_device *bdev, struct cache *ca)
{
- char name[BDEVNAME_SIZE];
const char *err = NULL; /* must be set for any error case */
int ret = 0;
- bdevname(bdev, name);
-
+ bdevname(bdev, ca->cache_dev_name);
memcpy(&ca->sb, sb, sizeof(struct cache_sb));
ca->bdev = bdev;
ca->bdev->bd_holder = ca;
@@ -2045,14 +2099,14 @@ static int register_cache(struct cache_sb *sb, struct page *sb_page,
goto out;
}
- pr_info("registered cache device %s", name);
+ pr_info("registered cache device %s", ca->cache_dev_name);
out:
kobject_put(&ca->kobj);
err:
if (err)
- pr_notice("error %s: %s", name, err);
+ pr_notice("error %s: %s", ca->cache_dev_name, err);
return ret;
}
diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
index dfeef583ee50..8ccbc8f3b3af 100644
--- a/drivers/md/bcache/sysfs.c
+++ b/drivers/md/bcache/sysfs.c
@@ -16,6 +16,22 @@
#include <linux/sort.h>
#include <linux/sched/clock.h>
+/* Default is -1; we skip past it for struct cached_dev's cache mode */
+static const char * const bch_cache_modes[] = {
+ "writethrough",
+ "writeback",
+ "writearound",
+ "none",
+ NULL
+};
+
+/* Default is -1; we skip past it for stop_when_cache_set_failed */
+static const char * const bch_stop_on_failure_modes[] = {
+ "auto",
+ "always",
+ NULL
+};
+
static const char * const cache_replacement_policies[] = {
"lru",
"fifo",
@@ -114,6 +130,20 @@ rw_attribute(btree_shrinker_disabled);
rw_attribute(copy_gc_enabled);
rw_attribute(size);
+static ssize_t bch_snprint_string_list(char *buf, size_t size, const char * const list[],
+ size_t selected)
+{
+ char *out = buf;
+ size_t i;
+
+ for (i = 0; list[i]; i++)
+ out += snprintf(out, buf + size - out,
+ i == selected ? "[%s] " : "%s ", list[i]);
+
+ out[-1] = '\n';
+ return out - buf;
+}
+
SHOW(__bch_cached_dev)
{
struct cached_dev *dc = container_of(kobj, struct cached_dev,
@@ -124,12 +154,12 @@ SHOW(__bch_cached_dev)
if (attr == &sysfs_cache_mode)
return bch_snprint_string_list(buf, PAGE_SIZE,
- bch_cache_modes + 1,
+ bch_cache_modes,
BDEV_CACHE_MODE(&dc->sb));
if (attr == &sysfs_stop_when_cache_set_failed)
return bch_snprint_string_list(buf, PAGE_SIZE,
- bch_stop_on_failure_modes + 1,
+ bch_stop_on_failure_modes,
dc->stop_when_cache_set_failed);
@@ -253,8 +283,7 @@ STORE(__cached_dev)
bch_cached_dev_run(dc);
if (attr == &sysfs_cache_mode) {
- v = bch_read_string_list(buf, bch_cache_modes + 1);
-
+ v = __sysfs_match_string(bch_cache_modes, -1, buf);
if (v < 0)
return v;
@@ -265,8 +294,7 @@ STORE(__cached_dev)
}
if (attr == &sysfs_stop_when_cache_set_failed) {
- v = bch_read_string_list(buf, bch_stop_on_failure_modes + 1);
-
+ v = __sysfs_match_string(bch_stop_on_failure_modes, -1, buf);
if (v < 0)
return v;
@@ -635,6 +663,7 @@ SHOW_LOCKED(bch_cache_set)
STORE(__bch_cache_set)
{
struct cache_set *c = container_of(kobj, struct cache_set, kobj);
+ ssize_t v;
if (attr == &sysfs_unregister)
bch_cache_set_unregister(c);
@@ -698,8 +727,7 @@ STORE(__bch_cache_set)
c->congested_write_threshold_us);
if (attr == &sysfs_errors) {
- ssize_t v = bch_read_string_list(buf, error_actions);
-
+ v = __sysfs_match_string(error_actions, -1, buf);
if (v < 0)
return v;
@@ -714,8 +742,7 @@ STORE(__bch_cache_set)
c->error_decay = strtoul_or_return(buf) / 88;
if (attr == &sysfs_io_disable) {
- int v = strtoul_or_return(buf);
-
+ v = strtoul_or_return(buf);
if (v) {
if (test_and_set_bit(CACHE_SET_IO_DISABLE,
&c->flags))
@@ -929,6 +956,7 @@ SHOW_LOCKED(bch_cache)
STORE(__bch_cache)
{
struct cache *ca = container_of(kobj, struct cache, kobj);
+ ssize_t v;
if (attr == &sysfs_discard) {
bool v = strtoul_or_return(buf);
@@ -943,8 +971,7 @@ STORE(__bch_cache)
}
if (attr == &sysfs_cache_replacement_policy) {
- ssize_t v = bch_read_string_list(buf, cache_replacement_policies);
-
+ v = __sysfs_match_string(cache_replacement_policies, -1, buf);
if (v < 0)
return v;
diff --git a/drivers/md/bcache/util.c b/drivers/md/bcache/util.c
index 74febd5230df..fc479b026d6d 100644
--- a/drivers/md/bcache/util.c
+++ b/drivers/md/bcache/util.c
@@ -120,41 +120,6 @@ ssize_t bch_hprint(char *buf, int64_t v)
return sprintf(buf, "%llu.%i%c", q, t * 10 / 1024, units[u]);
}
-ssize_t bch_snprint_string_list(char *buf, size_t size, const char * const list[],
- size_t selected)
-{
- char *out = buf;
- size_t i;
-
- for (i = 0; list[i]; i++)
- out += snprintf(out, buf + size - out,
- i == selected ? "[%s] " : "%s ", list[i]);
-
- out[-1] = '\n';
- return out - buf;
-}
-
-ssize_t bch_read_string_list(const char *buf, const char * const list[])
-{
- size_t i;
- char *s, *d = kstrndup(buf, PAGE_SIZE - 1, GFP_KERNEL);
- if (!d)
- return -ENOMEM;
-
- s = strim(d);
-
- for (i = 0; list[i]; i++)
- if (!strcmp(list[i], s))
- break;
-
- kfree(d);
-
- if (!list[i])
- return -EINVAL;
-
- return i;
-}
-
bool bch_is_zero(const char *p, size_t n)
{
size_t i;
diff --git a/drivers/md/bcache/util.h b/drivers/md/bcache/util.h
index 268024529edd..cced87f8eb27 100644
--- a/drivers/md/bcache/util.h
+++ b/drivers/md/bcache/util.h
@@ -365,11 +365,6 @@ ssize_t bch_hprint(char *buf, int64_t v);
bool bch_is_zero(const char *p, size_t n);
int bch_parse_uuid(const char *s, char *uuid);
-ssize_t bch_snprint_string_list(char *buf, size_t size, const char * const list[],
- size_t selected);
-
-ssize_t bch_read_string_list(const char *buf, const char * const list[]);
-
struct time_stats {
spinlock_t lock;
/*
diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
index 4a9547cdcdc5..ad45ebe1a74b 100644
--- a/drivers/md/bcache/writeback.c
+++ b/drivers/md/bcache/writeback.c
@@ -244,8 +244,10 @@ static void dirty_endio(struct bio *bio)
struct keybuf_key *w = bio->bi_private;
struct dirty_io *io = w->private;
- if (bio->bi_status)
+ if (bio->bi_status) {
SET_KEY_DIRTY(&w->key, false);
+ bch_count_backing_io_errors(io->dc, bio);
+ }
closure_put(&io->cl);
}
diff --git a/drivers/md/dm-bio-prison-v1.c b/drivers/md/dm-bio-prison-v1.c
index 874841f0fc83..8e33a3808368 100644
--- a/drivers/md/dm-bio-prison-v1.c
+++ b/drivers/md/dm-bio-prison-v1.c
@@ -19,7 +19,7 @@
struct dm_bio_prison {
spinlock_t lock;
- mempool_t *cell_pool;
+ mempool_t cell_pool;
struct rb_root cells;
};
@@ -34,14 +34,15 @@ static struct kmem_cache *_cell_cache;
struct dm_bio_prison *dm_bio_prison_create(void)
{
struct dm_bio_prison *prison = kmalloc(sizeof(*prison), GFP_KERNEL);
+ int ret;
if (!prison)
return NULL;
spin_lock_init(&prison->lock);
- prison->cell_pool = mempool_create_slab_pool(MIN_CELLS, _cell_cache);
- if (!prison->cell_pool) {
+ ret = mempool_init_slab_pool(&prison->cell_pool, MIN_CELLS, _cell_cache);
+ if (ret) {
kfree(prison);
return NULL;
}
@@ -54,21 +55,21 @@ EXPORT_SYMBOL_GPL(dm_bio_prison_create);
void dm_bio_prison_destroy(struct dm_bio_prison *prison)
{
- mempool_destroy(prison->cell_pool);
+ mempool_exit(&prison->cell_pool);
kfree(prison);
}
EXPORT_SYMBOL_GPL(dm_bio_prison_destroy);
struct dm_bio_prison_cell *dm_bio_prison_alloc_cell(struct dm_bio_prison *prison, gfp_t gfp)
{
- return mempool_alloc(prison->cell_pool, gfp);
+ return mempool_alloc(&prison->cell_pool, gfp);
}
EXPORT_SYMBOL_GPL(dm_bio_prison_alloc_cell);
void dm_bio_prison_free_cell(struct dm_bio_prison *prison,
struct dm_bio_prison_cell *cell)
{
- mempool_free(cell, prison->cell_pool);
+ mempool_free(cell, &prison->cell_pool);
}
EXPORT_SYMBOL_GPL(dm_bio_prison_free_cell);
diff --git a/drivers/md/dm-bio-prison-v2.c b/drivers/md/dm-bio-prison-v2.c
index 8ce3a1a588cf..601b1569206a 100644
--- a/drivers/md/dm-bio-prison-v2.c
+++ b/drivers/md/dm-bio-prison-v2.c
@@ -21,7 +21,7 @@ struct dm_bio_prison_v2 {
struct workqueue_struct *wq;
spinlock_t lock;
- mempool_t *cell_pool;
+ mempool_t cell_pool;
struct rb_root cells;
};
@@ -36,6 +36,7 @@ static struct kmem_cache *_cell_cache;
struct dm_bio_prison_v2 *dm_bio_prison_create_v2(struct workqueue_struct *wq)
{
struct dm_bio_prison_v2 *prison = kmalloc(sizeof(*prison), GFP_KERNEL);
+ int ret;
if (!prison)
return NULL;
@@ -43,8 +44,8 @@ struct dm_bio_prison_v2 *dm_bio_prison_create_v2(struct workqueue_struct *wq)
prison->wq = wq;
spin_lock_init(&prison->lock);
- prison->cell_pool = mempool_create_slab_pool(MIN_CELLS, _cell_cache);
- if (!prison->cell_pool) {
+ ret = mempool_init_slab_pool(&prison->cell_pool, MIN_CELLS, _cell_cache);
+ if (ret) {
kfree(prison);
return NULL;
}
@@ -57,21 +58,21 @@ EXPORT_SYMBOL_GPL(dm_bio_prison_create_v2);
void dm_bio_prison_destroy_v2(struct dm_bio_prison_v2 *prison)
{
- mempool_destroy(prison->cell_pool);
+ mempool_exit(&prison->cell_pool);
kfree(prison);
}
EXPORT_SYMBOL_GPL(dm_bio_prison_destroy_v2);
struct dm_bio_prison_cell_v2 *dm_bio_prison_alloc_cell_v2(struct dm_bio_prison_v2 *prison, gfp_t gfp)
{
- return mempool_alloc(prison->cell_pool, gfp);
+ return mempool_alloc(&prison->cell_pool, gfp);
}
EXPORT_SYMBOL_GPL(dm_bio_prison_alloc_cell_v2);
void dm_bio_prison_free_cell_v2(struct dm_bio_prison_v2 *prison,
struct dm_bio_prison_cell_v2 *cell)
{
- mempool_free(cell, prison->cell_pool);
+ mempool_free(cell, &prison->cell_pool);
}
EXPORT_SYMBOL_GPL(dm_bio_prison_free_cell_v2);
diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c
index 12aa9ca21d8c..dc385b70e4c3 100644
--- a/drivers/md/dm-bufio.c
+++ b/drivers/md/dm-bufio.c
@@ -1681,8 +1681,9 @@ struct dm_bufio_client *dm_bufio_client_create(struct block_device *bdev, unsign
if (block_size <= KMALLOC_MAX_SIZE &&
(block_size < PAGE_SIZE || !is_power_of_2(block_size))) {
- snprintf(slab_name, sizeof slab_name, "dm_bufio_cache-%u", c->block_size);
- c->slab_cache = kmem_cache_create(slab_name, c->block_size, ARCH_KMALLOC_MINALIGN,
+ unsigned align = min(1U << __ffs(block_size), (unsigned)PAGE_SIZE);
+ snprintf(slab_name, sizeof slab_name, "dm_bufio_cache-%u", block_size);
+ c->slab_cache = kmem_cache_create(slab_name, block_size, align,
SLAB_RECLAIM_ACCOUNT, NULL);
if (!c->slab_cache) {
r = -ENOMEM;
diff --git a/drivers/md/dm-cache-background-tracker.c b/drivers/md/dm-cache-background-tracker.c
index 1d0af0a21fc7..84814e819e4c 100644
--- a/drivers/md/dm-cache-background-tracker.c
+++ b/drivers/md/dm-cache-background-tracker.c
@@ -166,7 +166,7 @@ static bool max_work_reached(struct background_tracker *b)
atomic_read(&b->pending_demotes) >= b->max_work;
}
-struct bt_work *alloc_work(struct background_tracker *b)
+static struct bt_work *alloc_work(struct background_tracker *b)
{
if (max_work_reached(b))
return NULL;
diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c
index da208638fba4..001c71248246 100644
--- a/drivers/md/dm-cache-target.c
+++ b/drivers/md/dm-cache-target.c
@@ -447,9 +447,9 @@ struct cache {
struct work_struct migration_worker;
struct delayed_work waker;
struct dm_bio_prison_v2 *prison;
- struct bio_set *bs;
+ struct bio_set bs;
- mempool_t *migration_pool;
+ mempool_t migration_pool;
struct dm_cache_policy *policy;
unsigned policy_nr_args;
@@ -550,7 +550,7 @@ static struct dm_cache_migration *alloc_migration(struct cache *cache)
{
struct dm_cache_migration *mg;
- mg = mempool_alloc(cache->migration_pool, GFP_NOWAIT);
+ mg = mempool_alloc(&cache->migration_pool, GFP_NOWAIT);
if (!mg)
return NULL;
@@ -569,7 +569,7 @@ static void free_migration(struct dm_cache_migration *mg)
if (atomic_dec_and_test(&cache->nr_allocated_migrations))
wake_up(&cache->migration_wait);
- mempool_free(mg, cache->migration_pool);
+ mempool_free(mg, &cache->migration_pool);
}
/*----------------------------------------------------------------*/
@@ -924,7 +924,7 @@ static void issue_op(struct bio *bio, void *context)
static void remap_to_origin_and_cache(struct cache *cache, struct bio *bio,
dm_oblock_t oblock, dm_cblock_t cblock)
{
- struct bio *origin_bio = bio_clone_fast(bio, GFP_NOIO, cache->bs);
+ struct bio *origin_bio = bio_clone_fast(bio, GFP_NOIO, &cache->bs);
BUG_ON(!origin_bio);
@@ -2011,7 +2011,7 @@ static void destroy(struct cache *cache)
{
unsigned i;
- mempool_destroy(cache->migration_pool);
+ mempool_exit(&cache->migration_pool);
if (cache->prison)
dm_bio_prison_destroy_v2(cache->prison);
@@ -2047,8 +2047,7 @@ static void destroy(struct cache *cache)
kfree(cache->ctr_args[i]);
kfree(cache->ctr_args);
- if (cache->bs)
- bioset_free(cache->bs);
+ bioset_exit(&cache->bs);
kfree(cache);
}
@@ -2498,8 +2497,8 @@ static int cache_create(struct cache_args *ca, struct cache **result)
cache->features = ca->features;
if (writethrough_mode(cache)) {
/* Create bioset for writethrough bios issued to origin */
- cache->bs = bioset_create(BIO_POOL_SIZE, 0, 0);
- if (!cache->bs)
+ r = bioset_init(&cache->bs, BIO_POOL_SIZE, 0, 0);
+ if (r)
goto bad;
}
@@ -2630,9 +2629,9 @@ static int cache_create(struct cache_args *ca, struct cache **result)
goto bad;
}
- cache->migration_pool = mempool_create_slab_pool(MIGRATION_POOL_SIZE,
- migration_cache);
- if (!cache->migration_pool) {
+ r = mempool_init_slab_pool(&cache->migration_pool, MIGRATION_POOL_SIZE,
+ migration_cache);
+ if (r) {
*error = "Error creating cache's migration mempool";
goto bad;
}
diff --git a/drivers/md/dm-core.h b/drivers/md/dm-core.h
index 3222e21cbbf8..f21c5d21bf1b 100644
--- a/drivers/md/dm-core.h
+++ b/drivers/md/dm-core.h
@@ -91,8 +91,8 @@ struct mapped_device {
/*
* io objects are allocated from here.
*/
- struct bio_set *io_bs;
- struct bio_set *bs;
+ struct bio_set io_bs;
+ struct bio_set bs;
/*
* freeze/thaw support require holding onto a super block
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 44ff473dab3e..da02f4d8e4b9 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -143,14 +143,14 @@ struct crypt_config {
* pool for per bio private data, crypto requests,
* encryption requeusts/buffer pages and integrity tags
*/
- mempool_t *req_pool;
- mempool_t *page_pool;
- mempool_t *tag_pool;
+ mempool_t req_pool;
+ mempool_t page_pool;
+ mempool_t tag_pool;
unsigned tag_pool_max_sectors;
struct percpu_counter n_allocated_pages;
- struct bio_set *bs;
+ struct bio_set bs;
struct mutex bio_alloc_lock;
struct workqueue_struct *io_queue;
@@ -1245,7 +1245,7 @@ static void crypt_alloc_req_skcipher(struct crypt_config *cc,
unsigned key_index = ctx->cc_sector & (cc->tfms_count - 1);
if (!ctx->r.req)
- ctx->r.req = mempool_alloc(cc->req_pool, GFP_NOIO);
+ ctx->r.req = mempool_alloc(&cc->req_pool, GFP_NOIO);
skcipher_request_set_tfm(ctx->r.req, cc->cipher_tfm.tfms[key_index]);
@@ -1262,7 +1262,7 @@ static void crypt_alloc_req_aead(struct crypt_config *cc,
struct convert_context *ctx)
{
if (!ctx->r.req_aead)
- ctx->r.req_aead = mempool_alloc(cc->req_pool, GFP_NOIO);
+ ctx->r.req_aead = mempool_alloc(&cc->req_pool, GFP_NOIO);
aead_request_set_tfm(ctx->r.req_aead, cc->cipher_tfm.tfms_aead[0]);
@@ -1290,7 +1290,7 @@ static void crypt_free_req_skcipher(struct crypt_config *cc,
struct dm_crypt_io *io = dm_per_bio_data(base_bio, cc->per_bio_data_size);
if ((struct skcipher_request *)(io + 1) != req)
- mempool_free(req, cc->req_pool);
+ mempool_free(req, &cc->req_pool);
}
static void crypt_free_req_aead(struct crypt_config *cc,
@@ -1299,7 +1299,7 @@ static void crypt_free_req_aead(struct crypt_config *cc,
struct dm_crypt_io *io = dm_per_bio_data(base_bio, cc->per_bio_data_size);
if ((struct aead_request *)(io + 1) != req)
- mempool_free(req, cc->req_pool);
+ mempool_free(req, &cc->req_pool);
}
static void crypt_free_req(struct crypt_config *cc, void *req, struct bio *base_bio)
@@ -1409,7 +1409,7 @@ retry:
if (unlikely(gfp_mask & __GFP_DIRECT_RECLAIM))
mutex_lock(&cc->bio_alloc_lock);
- clone = bio_alloc_bioset(GFP_NOIO, nr_iovecs, cc->bs);
+ clone = bio_alloc_bioset(GFP_NOIO, nr_iovecs, &cc->bs);
if (!clone)
goto out;
@@ -1418,7 +1418,7 @@ retry:
remaining_size = size;
for (i = 0; i < nr_iovecs; i++) {
- page = mempool_alloc(cc->page_pool, gfp_mask);
+ page = mempool_alloc(&cc->page_pool, gfp_mask);
if (!page) {
crypt_free_buffer_pages(cc, clone);
bio_put(clone);
@@ -1453,7 +1453,7 @@ static void crypt_free_buffer_pages(struct crypt_config *cc, struct bio *clone)
bio_for_each_segment_all(bv, clone, i) {
BUG_ON(!bv->bv_page);
- mempool_free(bv->bv_page, cc->page_pool);
+ mempool_free(bv->bv_page, &cc->page_pool);
}
}
@@ -1492,7 +1492,7 @@ static void crypt_dec_pending(struct dm_crypt_io *io)
crypt_free_req(cc, io->ctx.r.req, base_bio);
if (unlikely(io->integrity_metadata_from_pool))
- mempool_free(io->integrity_metadata, io->cc->tag_pool);
+ mempool_free(io->integrity_metadata, &io->cc->tag_pool);
else
kfree(io->integrity_metadata);
@@ -1565,7 +1565,7 @@ static int kcryptd_io_read(struct dm_crypt_io *io, gfp_t gfp)
* biovecs we don't need to worry about the block layer
* modifying the biovec array; so leverage bio_clone_fast().
*/
- clone = bio_clone_fast(io->base_bio, gfp, cc->bs);
+ clone = bio_clone_fast(io->base_bio, gfp, &cc->bs);
if (!clone)
return 1;
@@ -2219,15 +2219,13 @@ static void crypt_dtr(struct dm_target *ti)
crypt_free_tfms(cc);
- if (cc->bs)
- bioset_free(cc->bs);
+ bioset_exit(&cc->bs);
- mempool_destroy(cc->page_pool);
- mempool_destroy(cc->req_pool);
- mempool_destroy(cc->tag_pool);
+ mempool_exit(&cc->page_pool);
+ mempool_exit(&cc->req_pool);
+ mempool_exit(&cc->tag_pool);
- if (cc->page_pool)
- WARN_ON(percpu_counter_sum(&cc->n_allocated_pages) != 0);
+ WARN_ON(percpu_counter_sum(&cc->n_allocated_pages) != 0);
percpu_counter_destroy(&cc->n_allocated_pages);
if (cc->iv_gen_ops && cc->iv_gen_ops->dtr)
@@ -2743,8 +2741,6 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
iv_size_padding = align_mask;
}
- ret = -ENOMEM;
-
/* ...| IV + padding | original IV | original sec. number | bio tag offset | */
additional_req_size = sizeof(struct dm_crypt_request) +
iv_size_padding + cc->iv_size +
@@ -2752,8 +2748,8 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
sizeof(uint64_t) +
sizeof(unsigned int);
- cc->req_pool = mempool_create_kmalloc_pool(MIN_IOS, cc->dmreq_start + additional_req_size);
- if (!cc->req_pool) {
+ ret = mempool_init_kmalloc_pool(&cc->req_pool, MIN_IOS, cc->dmreq_start + additional_req_size);
+ if (ret) {
ti->error = "Cannot allocate crypt request mempool";
goto bad;
}
@@ -2762,14 +2758,14 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
ALIGN(sizeof(struct dm_crypt_io) + cc->dmreq_start + additional_req_size,
ARCH_KMALLOC_MINALIGN);
- cc->page_pool = mempool_create(BIO_MAX_PAGES, crypt_page_alloc, crypt_page_free, cc);
- if (!cc->page_pool) {
+ ret = mempool_init(&cc->page_pool, BIO_MAX_PAGES, crypt_page_alloc, crypt_page_free, cc);
+ if (ret) {
ti->error = "Cannot allocate page mempool";
goto bad;
}
- cc->bs = bioset_create(MIN_IOS, 0, BIOSET_NEED_BVECS);
- if (!cc->bs) {
+ ret = bioset_init(&cc->bs, MIN_IOS, 0, BIOSET_NEED_BVECS);
+ if (ret) {
ti->error = "Cannot allocate crypt bioset";
goto bad;
}
@@ -2806,11 +2802,10 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
if (!cc->tag_pool_max_sectors)
cc->tag_pool_max_sectors = 1;
- cc->tag_pool = mempool_create_kmalloc_pool(MIN_IOS,
+ ret = mempool_init_kmalloc_pool(&cc->tag_pool, MIN_IOS,
cc->tag_pool_max_sectors * cc->on_disk_tag_size);
- if (!cc->tag_pool) {
+ if (ret) {
ti->error = "Cannot allocate integrity tags mempool";
- ret = -ENOMEM;
goto bad;
}
@@ -2903,7 +2898,7 @@ static int crypt_map(struct dm_target *ti, struct bio *bio)
GFP_NOIO | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN)))) {
if (bio_sectors(bio) > cc->tag_pool_max_sectors)
dm_accept_partial_bio(bio, cc->tag_pool_max_sectors);
- io->integrity_metadata = mempool_alloc(cc->tag_pool, GFP_NOIO);
+ io->integrity_metadata = mempool_alloc(&cc->tag_pool, GFP_NOIO);
io->integrity_metadata_from_pool = true;
}
}
diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c
index 77d9fe58dae2..fc68c7aaef8e 100644
--- a/drivers/md/dm-integrity.c
+++ b/drivers/md/dm-integrity.c
@@ -142,7 +142,7 @@ struct dm_integrity_c {
unsigned tag_size;
__s8 log2_tag_size;
sector_t start;
- mempool_t *journal_io_mempool;
+ mempool_t journal_io_mempool;
struct dm_io_client *io;
struct dm_bufio_client *bufio;
struct workqueue_struct *metadata_wq;
@@ -1817,7 +1817,7 @@ static void complete_copy_from_journal(unsigned long error, void *context)
struct journal_completion *comp = io->comp;
struct dm_integrity_c *ic = comp->ic;
remove_range(ic, &io->range);
- mempool_free(io, ic->journal_io_mempool);
+ mempool_free(io, &ic->journal_io_mempool);
if (unlikely(error != 0))
dm_integrity_io_error(ic, "copying from journal", -EIO);
complete_journal_op(comp);
@@ -1886,7 +1886,7 @@ static void do_journal_write(struct dm_integrity_c *ic, unsigned write_start,
}
next_loop = k - 1;
- io = mempool_alloc(ic->journal_io_mempool, GFP_NOIO);
+ io = mempool_alloc(&ic->journal_io_mempool, GFP_NOIO);
io->comp = &comp;
io->range.logical_sector = sec;
io->range.n_sectors = (k - j) << ic->sb->log2_sectors_per_block;
@@ -1918,7 +1918,7 @@ static void do_journal_write(struct dm_integrity_c *ic, unsigned write_start,
if (j == k) {
remove_range_unlocked(ic, &io->range);
spin_unlock_irq(&ic->endio_wait.lock);
- mempool_free(io, ic->journal_io_mempool);
+ mempool_free(io, &ic->journal_io_mempool);
goto skip_io;
}
for (l = j; l < k; l++) {
@@ -2440,7 +2440,7 @@ static void dm_integrity_free_journal_scatterlist(struct dm_integrity_c *ic, str
unsigned i;
for (i = 0; i < ic->journal_sections; i++)
kvfree(sl[i]);
- kfree(sl);
+ kvfree(sl);
}
static struct scatterlist **dm_integrity_alloc_journal_scatterlist(struct dm_integrity_c *ic, struct page_list *pl)
@@ -2980,9 +2980,8 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv)
goto bad;
}
- ic->journal_io_mempool = mempool_create_slab_pool(JOURNAL_IO_MEMPOOL, journal_io_cache);
- if (!ic->journal_io_mempool) {
- r = -ENOMEM;
+ r = mempool_init_slab_pool(&ic->journal_io_mempool, JOURNAL_IO_MEMPOOL, journal_io_cache);
+ if (r) {
ti->error = "Cannot allocate mempool";
goto bad;
}
@@ -3196,7 +3195,7 @@ static void dm_integrity_dtr(struct dm_target *ti)
destroy_workqueue(ic->writer_wq);
if (ic->bufio)
dm_bufio_client_destroy(ic->bufio);
- mempool_destroy(ic->journal_io_mempool);
+ mempool_exit(&ic->journal_io_mempool);
if (ic->io)
dm_io_client_destroy(ic->io);
if (ic->dev)
diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c
index a8d914d5abbe..53c6ed0eaa1f 100644
--- a/drivers/md/dm-io.c
+++ b/drivers/md/dm-io.c
@@ -22,8 +22,8 @@
#define DM_IO_MAX_REGIONS BITS_PER_LONG
struct dm_io_client {
- mempool_t *pool;
- struct bio_set *bios;
+ mempool_t pool;
+ struct bio_set bios;
};
/*
@@ -49,32 +49,33 @@ struct dm_io_client *dm_io_client_create(void)
{
struct dm_io_client *client;
unsigned min_ios = dm_get_reserved_bio_based_ios();
+ int ret;
client = kmalloc(sizeof(*client), GFP_KERNEL);
if (!client)
return ERR_PTR(-ENOMEM);
- client->pool = mempool_create_slab_pool(min_ios, _dm_io_cache);
- if (!client->pool)
+ ret = mempool_init_slab_pool(&client->pool, min_ios, _dm_io_cache);
+ if (ret)
goto bad;
- client->bios = bioset_create(min_ios, 0, BIOSET_NEED_BVECS);
- if (!client->bios)
+ ret = bioset_init(&client->bios, min_ios, 0, BIOSET_NEED_BVECS);
+ if (ret)
goto bad;
return client;
bad:
- mempool_destroy(client->pool);
+ mempool_exit(&client->pool);
kfree(client);
- return ERR_PTR(-ENOMEM);
+ return ERR_PTR(ret);
}
EXPORT_SYMBOL(dm_io_client_create);
void dm_io_client_destroy(struct dm_io_client *client)
{
- mempool_destroy(client->pool);
- bioset_free(client->bios);
+ mempool_exit(&client->pool);
+ bioset_exit(&client->bios);
kfree(client);
}
EXPORT_SYMBOL(dm_io_client_destroy);
@@ -120,7 +121,7 @@ static void complete_io(struct io *io)
invalidate_kernel_vmap_range(io->vma_invalidate_address,
io->vma_invalidate_size);
- mempool_free(io, io->client->pool);
+ mempool_free(io, &io->client->pool);
fn(error_bits, context);
}
@@ -344,7 +345,7 @@ static void do_region(int op, int op_flags, unsigned region,
dm_sector_div_up(remaining, (PAGE_SIZE >> SECTOR_SHIFT)));
}
- bio = bio_alloc_bioset(GFP_NOIO, num_bvecs, io->client->bios);
+ bio = bio_alloc_bioset(GFP_NOIO, num_bvecs, &io->client->bios);
bio->bi_iter.bi_sector = where->sector + (where->count - remaining);
bio_set_dev(bio, where->bdev);
bio->bi_end_io = endio;
@@ -442,7 +443,7 @@ static int sync_io(struct dm_io_client *client, unsigned int num_regions,
init_completion(&sio.wait);
- io = mempool_alloc(client->pool, GFP_NOIO);
+ io = mempool_alloc(&client->pool, GFP_NOIO);
io->error_bits = 0;
atomic_set(&io->count, 1); /* see dispatch_io() */
io->client = client;
@@ -474,7 +475,7 @@ static int async_io(struct dm_io_client *client, unsigned int num_regions,
return -EIO;
}
- io = mempool_alloc(client->pool, GFP_NOIO);
+ io = mempool_alloc(&client->pool, GFP_NOIO);
io->error_bits = 0;
atomic_set(&io->count, 1); /* see dispatch_io() */
io->client = client;
diff --git a/drivers/md/dm-kcopyd.c b/drivers/md/dm-kcopyd.c
index e6e7c686646d..c89a675a2aac 100644
--- a/drivers/md/dm-kcopyd.c
+++ b/drivers/md/dm-kcopyd.c
@@ -47,7 +47,7 @@ struct dm_kcopyd_client {
wait_queue_head_t destroyq;
atomic_t nr_jobs;
- mempool_t *job_pool;
+ mempool_t job_pool;
struct workqueue_struct *kcopyd_wq;
struct work_struct kcopyd_work;
@@ -479,7 +479,7 @@ static int run_complete_job(struct kcopyd_job *job)
*/
if (job->master_job == job) {
mutex_destroy(&job->lock);
- mempool_free(job, kc->job_pool);
+ mempool_free(job, &kc->job_pool);
}
fn(read_err, write_err, context);
@@ -751,7 +751,7 @@ int dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from,
* Allocate an array of jobs consisting of one master job
* followed by SPLIT_COUNT sub jobs.
*/
- job = mempool_alloc(kc->job_pool, GFP_NOIO);
+ job = mempool_alloc(&kc->job_pool, GFP_NOIO);
mutex_init(&job->lock);
/*
@@ -835,7 +835,7 @@ void *dm_kcopyd_prepare_callback(struct dm_kcopyd_client *kc,
{
struct kcopyd_job *job;
- job = mempool_alloc(kc->job_pool, GFP_NOIO);
+ job = mempool_alloc(&kc->job_pool, GFP_NOIO);
memset(job, 0, sizeof(struct kcopyd_job));
job->kc = kc;
@@ -879,7 +879,7 @@ int kcopyd_cancel(struct kcopyd_job *job, int block)
*---------------------------------------------------------------*/
struct dm_kcopyd_client *dm_kcopyd_client_create(struct dm_kcopyd_throttle *throttle)
{
- int r = -ENOMEM;
+ int r;
struct dm_kcopyd_client *kc;
kc = kmalloc(sizeof(*kc), GFP_KERNEL);
@@ -892,14 +892,16 @@ struct dm_kcopyd_client *dm_kcopyd_client_create(struct dm_kcopyd_throttle *thro
INIT_LIST_HEAD(&kc->pages_jobs);
kc->throttle = throttle;
- kc->job_pool = mempool_create_slab_pool(MIN_JOBS, _job_cache);
- if (!kc->job_pool)
+ r = mempool_init_slab_pool(&kc->job_pool, MIN_JOBS, _job_cache);
+ if (r)
goto bad_slab;
INIT_WORK(&kc->kcopyd_work, do_work);
kc->kcopyd_wq = alloc_workqueue("kcopyd", WQ_MEM_RECLAIM, 0);
- if (!kc->kcopyd_wq)
+ if (!kc->kcopyd_wq) {
+ r = -ENOMEM;
goto bad_workqueue;
+ }
kc->pages = NULL;
kc->nr_reserved_pages = kc->nr_free_pages = 0;
@@ -923,7 +925,7 @@ bad_io_client:
bad_client_pages:
destroy_workqueue(kc->kcopyd_wq);
bad_workqueue:
- mempool_destroy(kc->job_pool);
+ mempool_exit(&kc->job_pool);
bad_slab:
kfree(kc);
@@ -942,7 +944,7 @@ void dm_kcopyd_client_destroy(struct dm_kcopyd_client *kc)
destroy_workqueue(kc->kcopyd_wq);
dm_io_client_destroy(kc->io_client);
client_free_pages(kc);
- mempool_destroy(kc->job_pool);
+ mempool_exit(&kc->job_pool);
kfree(kc);
}
EXPORT_SYMBOL(dm_kcopyd_client_destroy);
diff --git a/drivers/md/dm-log-userspace-base.c b/drivers/md/dm-log-userspace-base.c
index 53b7b06d0aa8..52090bee17c2 100644
--- a/drivers/md/dm-log-userspace-base.c
+++ b/drivers/md/dm-log-userspace-base.c
@@ -76,7 +76,7 @@ struct log_c {
*/
uint32_t integrated_flush;
- mempool_t *flush_entry_pool;
+ mempool_t flush_entry_pool;
};
static struct kmem_cache *_flush_entry_cache;
@@ -249,11 +249,10 @@ static int userspace_ctr(struct dm_dirty_log *log, struct dm_target *ti,
goto out;
}
- lc->flush_entry_pool = mempool_create_slab_pool(FLUSH_ENTRY_POOL_SIZE,
- _flush_entry_cache);
- if (!lc->flush_entry_pool) {
+ r = mempool_init_slab_pool(&lc->flush_entry_pool, FLUSH_ENTRY_POOL_SIZE,
+ _flush_entry_cache);
+ if (r) {
DMERR("Failed to create flush_entry_pool");
- r = -ENOMEM;
goto out;
}
@@ -313,7 +312,7 @@ static int userspace_ctr(struct dm_dirty_log *log, struct dm_target *ti,
out:
kfree(devices_rdata);
if (r) {
- mempool_destroy(lc->flush_entry_pool);
+ mempool_exit(&lc->flush_entry_pool);
kfree(lc);
kfree(ctr_str);
} else {
@@ -342,7 +341,7 @@ static void userspace_dtr(struct dm_dirty_log *log)
if (lc->log_dev)
dm_put_device(lc->ti, lc->log_dev);
- mempool_destroy(lc->flush_entry_pool);
+ mempool_exit(&lc->flush_entry_pool);
kfree(lc->usr_argv_str);
kfree(lc);
@@ -570,7 +569,7 @@ static int userspace_flush(struct dm_dirty_log *log)
int mark_list_is_empty;
int clear_list_is_empty;
struct dm_dirty_log_flush_entry *fe, *tmp_fe;
- mempool_t *flush_entry_pool = lc->flush_entry_pool;
+ mempool_t *flush_entry_pool = &lc->flush_entry_pool;
spin_lock_irqsave(&lc->flush_lock, flags);
list_splice_init(&lc->mark_list, &mark_list);
@@ -653,7 +652,7 @@ static void userspace_mark_region(struct dm_dirty_log *log, region_t region)
struct dm_dirty_log_flush_entry *fe;
/* Wait for an allocation, but _never_ fail */
- fe = mempool_alloc(lc->flush_entry_pool, GFP_NOIO);
+ fe = mempool_alloc(&lc->flush_entry_pool, GFP_NOIO);
BUG_ON(!fe);
spin_lock_irqsave(&lc->flush_lock, flags);
@@ -687,7 +686,7 @@ static void userspace_clear_region(struct dm_dirty_log *log, region_t region)
* to cause the region to be resync'ed when the
* device is activated next time.
*/
- fe = mempool_alloc(lc->flush_entry_pool, GFP_ATOMIC);
+ fe = mempool_alloc(&lc->flush_entry_pool, GFP_ATOMIC);
if (!fe) {
DMERR("Failed to allocate memory to clear region.");
return;
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index 203a0419d2b0..d94ba6f72ff5 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -520,7 +520,8 @@ static int multipath_clone_and_map(struct dm_target *ti, struct request *rq,
bdev = pgpath->path.dev->bdev;
q = bdev_get_queue(bdev);
- clone = blk_get_request(q, rq->cmd_flags | REQ_NOMERGE, GFP_ATOMIC);
+ clone = blk_get_request(q, rq->cmd_flags | REQ_NOMERGE,
+ BLK_MQ_REQ_NOWAIT);
if (IS_ERR(clone)) {
/* EBUSY, ENODEV or EWOULDBLOCK: requeue */
if (blk_queue_dying(q)) {
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index 580c49cc8079..5903e492bb34 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -23,6 +23,8 @@
#define MAX_RECOVERY 1 /* Maximum number of regions recovered in parallel. */
+#define MAX_NR_MIRRORS (DM_KCOPYD_MAX_REGIONS + 1)
+
#define DM_RAID1_HANDLE_ERRORS 0x01
#define DM_RAID1_KEEP_LOG 0x02
#define errors_handled(p) ((p)->features & DM_RAID1_HANDLE_ERRORS)
@@ -255,7 +257,7 @@ static int mirror_flush(struct dm_target *ti)
unsigned long error_bits;
unsigned int i;
- struct dm_io_region io[ms->nr_mirrors];
+ struct dm_io_region io[MAX_NR_MIRRORS];
struct mirror *m;
struct dm_io_request io_req = {
.bi_op = REQ_OP_WRITE,
@@ -651,7 +653,7 @@ static void write_callback(unsigned long error, void *context)
static void do_write(struct mirror_set *ms, struct bio *bio)
{
unsigned int i;
- struct dm_io_region io[ms->nr_mirrors], *dest = io;
+ struct dm_io_region io[MAX_NR_MIRRORS], *dest = io;
struct mirror *m;
struct dm_io_request io_req = {
.bi_op = REQ_OP_WRITE,
@@ -1083,7 +1085,7 @@ static int mirror_ctr(struct dm_target *ti, unsigned int argc, char **argv)
argc -= args_used;
if (!argc || sscanf(argv[0], "%u%c", &nr_mirrors, &dummy) != 1 ||
- nr_mirrors < 2 || nr_mirrors > DM_KCOPYD_MAX_REGIONS + 1) {
+ nr_mirrors < 2 || nr_mirrors > MAX_NR_MIRRORS) {
ti->error = "Invalid number of mirrors";
dm_dirty_log_destroy(dl);
return -EINVAL;
@@ -1404,7 +1406,7 @@ static void mirror_status(struct dm_target *ti, status_type_t type,
int num_feature_args = 0;
struct mirror_set *ms = (struct mirror_set *) ti->private;
struct dm_dirty_log *log = dm_rh_dirty_log(ms->rh);
- char buffer[ms->nr_mirrors + 1];
+ char buffer[MAX_NR_MIRRORS + 1];
switch (type) {
case STATUSTYPE_INFO:
diff --git a/drivers/md/dm-region-hash.c b/drivers/md/dm-region-hash.c
index 85c32b22a420..43149eb49375 100644
--- a/drivers/md/dm-region-hash.c
+++ b/drivers/md/dm-region-hash.c
@@ -63,7 +63,7 @@ struct dm_region_hash {
/* hash table */
rwlock_t hash_lock;
- mempool_t *region_pool;
+ mempool_t region_pool;
unsigned mask;
unsigned nr_buckets;
unsigned prime;
@@ -169,6 +169,7 @@ struct dm_region_hash *dm_region_hash_create(
struct dm_region_hash *rh;
unsigned nr_buckets, max_buckets;
size_t i;
+ int ret;
/*
* Calculate a suitable number of buckets for our hash
@@ -220,9 +221,9 @@ struct dm_region_hash *dm_region_hash_create(
INIT_LIST_HEAD(&rh->failed_recovered_regions);
rh->flush_failure = 0;
- rh->region_pool = mempool_create_kmalloc_pool(MIN_REGIONS,
- sizeof(struct dm_region));
- if (!rh->region_pool) {
+ ret = mempool_init_kmalloc_pool(&rh->region_pool, MIN_REGIONS,
+ sizeof(struct dm_region));
+ if (ret) {
vfree(rh->buckets);
kfree(rh);
rh = ERR_PTR(-ENOMEM);
@@ -242,14 +243,14 @@ void dm_region_hash_destroy(struct dm_region_hash *rh)
list_for_each_entry_safe(reg, nreg, rh->buckets + h,
hash_list) {
BUG_ON(atomic_read(&reg->pending));
- mempool_free(reg, rh->region_pool);
+ mempool_free(reg, &rh->region_pool);
}
}
if (rh->log)
dm_dirty_log_destroy(rh->log);
- mempool_destroy(rh->region_pool);
+ mempool_exit(&rh->region_pool);
vfree(rh->buckets);
kfree(rh);
}
@@ -287,7 +288,7 @@ static struct dm_region *__rh_alloc(struct dm_region_hash *rh, region_t region)
{
struct dm_region *reg, *nreg;
- nreg = mempool_alloc(rh->region_pool, GFP_ATOMIC);
+ nreg = mempool_alloc(&rh->region_pool, GFP_ATOMIC);
if (unlikely(!nreg))
nreg = kmalloc(sizeof(*nreg), GFP_NOIO | __GFP_NOFAIL);
@@ -303,7 +304,7 @@ static struct dm_region *__rh_alloc(struct dm_region_hash *rh, region_t region)
reg = __rh_lookup(rh, region);
if (reg)
/* We lost the race. */
- mempool_free(nreg, rh->region_pool);
+ mempool_free(nreg, &rh->region_pool);
else {
__rh_insert(rh, nreg);
if (nreg->state == DM_RH_CLEAN) {
@@ -481,17 +482,17 @@ void dm_rh_update_states(struct dm_region_hash *rh, int errors_handled)
list_for_each_entry_safe(reg, next, &recovered, list) {
rh->log->type->clear_region(rh->log, reg->key);
complete_resync_work(reg, 1);
- mempool_free(reg, rh->region_pool);
+ mempool_free(reg, &rh->region_pool);
}
list_for_each_entry_safe(reg, next, &failed_recovered, list) {
complete_resync_work(reg, errors_handled ? 0 : 1);
- mempool_free(reg, rh->region_pool);
+ mempool_free(reg, &rh->region_pool);
}
list_for_each_entry_safe(reg, next, &clean, list) {
rh->log->type->clear_region(rh->log, reg->key);
- mempool_free(reg, rh->region_pool);
+ mempool_free(reg, &rh->region_pool);
}
rh->log->type->flush(rh->log);
diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c
index bf0b840645cc..6e547b8dd298 100644
--- a/drivers/md/dm-rq.c
+++ b/drivers/md/dm-rq.c
@@ -406,7 +406,7 @@ static blk_status_t dm_dispatch_clone_request(struct request *clone, struct requ
if (blk_queue_io_stat(clone->q))
clone->rq_flags |= RQF_IO_STAT;
- clone->start_time = jiffies;
+ clone->start_time_ns = ktime_get_ns();
r = blk_insert_cloned_request(clone->q, clone);
if (r != BLK_STS_OK && r != BLK_STS_RESOURCE && r != BLK_STS_DEV_RESOURCE)
/* must complete clone in terms of original request */
@@ -433,7 +433,7 @@ static int setup_clone(struct request *clone, struct request *rq,
{
int r;
- r = blk_rq_prep_clone(clone, rq, tio->md->bs, gfp_mask,
+ r = blk_rq_prep_clone(clone, rq, &tio->md->bs, gfp_mask,
dm_rq_bio_constructor, tio);
if (r)
return r;
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index 216035be5661..b11ddc55f297 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -87,7 +87,7 @@ struct dm_snapshot {
*/
struct list_head out_of_order_list;
- mempool_t *pending_pool;
+ mempool_t pending_pool;
struct dm_exception_table pending;
struct dm_exception_table complete;
@@ -682,7 +682,7 @@ static void free_completed_exception(struct dm_exception *e)
static struct dm_snap_pending_exception *alloc_pending_exception(struct dm_snapshot *s)
{
- struct dm_snap_pending_exception *pe = mempool_alloc(s->pending_pool,
+ struct dm_snap_pending_exception *pe = mempool_alloc(&s->pending_pool,
GFP_NOIO);
atomic_inc(&s->pending_exceptions_count);
@@ -695,7 +695,7 @@ static void free_pending_exception(struct dm_snap_pending_exception *pe)
{
struct dm_snapshot *s = pe->snap;
- mempool_free(pe, s->pending_pool);
+ mempool_free(pe, &s->pending_pool);
smp_mb__before_atomic();
atomic_dec(&s->pending_exceptions_count);
}
@@ -1196,10 +1196,9 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
goto bad_kcopyd;
}
- s->pending_pool = mempool_create_slab_pool(MIN_IOS, pending_cache);
- if (!s->pending_pool) {
+ r = mempool_init_slab_pool(&s->pending_pool, MIN_IOS, pending_cache);
+ if (r) {
ti->error = "Could not allocate mempool for pending exceptions";
- r = -ENOMEM;
goto bad_pending_pool;
}
@@ -1259,7 +1258,7 @@ bad_read_metadata:
unregister_snapshot(s);
bad_load_and_register:
- mempool_destroy(s->pending_pool);
+ mempool_exit(&s->pending_pool);
bad_pending_pool:
dm_kcopyd_client_destroy(s->kcopyd_client);
@@ -1355,7 +1354,7 @@ static void snapshot_dtr(struct dm_target *ti)
while (atomic_read(&s->pending_exceptions_count))
msleep(1);
/*
- * Ensure instructions in mempool_destroy aren't reordered
+ * Ensure instructions in mempool_exit aren't reordered
* before atomic_read.
*/
smp_mb();
@@ -1367,7 +1366,7 @@ static void snapshot_dtr(struct dm_target *ti)
__free_exceptions(s);
- mempool_destroy(s->pending_pool);
+ mempool_exit(&s->pending_pool);
dm_exception_store_destroy(s->store);
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index b11107497d2e..6c923824ec91 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -260,7 +260,7 @@ struct pool {
struct dm_deferred_set *all_io_ds;
struct dm_thin_new_mapping *next_mapping;
- mempool_t *mapping_pool;
+ mempool_t mapping_pool;
process_bio_fn process_bio;
process_bio_fn process_discard;
@@ -917,7 +917,7 @@ static void process_prepared_mapping_fail(struct dm_thin_new_mapping *m)
{
cell_error(m->tc->pool, m->cell);
list_del(&m->list);
- mempool_free(m, m->tc->pool->mapping_pool);
+ mempool_free(m, &m->tc->pool->mapping_pool);
}
static void process_prepared_mapping(struct dm_thin_new_mapping *m)
@@ -961,7 +961,7 @@ static void process_prepared_mapping(struct dm_thin_new_mapping *m)
out:
list_del(&m->list);
- mempool_free(m, pool->mapping_pool);
+ mempool_free(m, &pool->mapping_pool);
}
/*----------------------------------------------------------------*/
@@ -971,7 +971,7 @@ static void free_discard_mapping(struct dm_thin_new_mapping *m)
struct thin_c *tc = m->tc;
if (m->cell)
cell_defer_no_holder(tc, m->cell);
- mempool_free(m, tc->pool->mapping_pool);
+ mempool_free(m, &tc->pool->mapping_pool);
}
static void process_prepared_discard_fail(struct dm_thin_new_mapping *m)
@@ -999,7 +999,7 @@ static void process_prepared_discard_no_passdown(struct dm_thin_new_mapping *m)
bio_endio(m->bio);
cell_defer_no_holder(tc, m->cell);
- mempool_free(m, tc->pool->mapping_pool);
+ mempool_free(m, &tc->pool->mapping_pool);
}
/*----------------------------------------------------------------*/
@@ -1092,7 +1092,7 @@ static void process_prepared_discard_passdown_pt1(struct dm_thin_new_mapping *m)
metadata_operation_failed(pool, "dm_thin_remove_range", r);
bio_io_error(m->bio);
cell_defer_no_holder(tc, m->cell);
- mempool_free(m, pool->mapping_pool);
+ mempool_free(m, &pool->mapping_pool);
return;
}
@@ -1105,7 +1105,7 @@ static void process_prepared_discard_passdown_pt1(struct dm_thin_new_mapping *m)
metadata_operation_failed(pool, "dm_pool_inc_data_range", r);
bio_io_error(m->bio);
cell_defer_no_holder(tc, m->cell);
- mempool_free(m, pool->mapping_pool);
+ mempool_free(m, &pool->mapping_pool);
return;
}
@@ -1150,7 +1150,7 @@ static void process_prepared_discard_passdown_pt2(struct dm_thin_new_mapping *m)
bio_endio(m->bio);
cell_defer_no_holder(tc, m->cell);
- mempool_free(m, pool->mapping_pool);
+ mempool_free(m, &pool->mapping_pool);
}
static void process_prepared(struct pool *pool, struct list_head *head,
@@ -1196,7 +1196,7 @@ static int ensure_next_mapping(struct pool *pool)
if (pool->next_mapping)
return 0;
- pool->next_mapping = mempool_alloc(pool->mapping_pool, GFP_ATOMIC);
+ pool->next_mapping = mempool_alloc(&pool->mapping_pool, GFP_ATOMIC);
return pool->next_mapping ? 0 : -ENOMEM;
}
@@ -2835,8 +2835,8 @@ static void __pool_destroy(struct pool *pool)
destroy_workqueue(pool->wq);
if (pool->next_mapping)
- mempool_free(pool->next_mapping, pool->mapping_pool);
- mempool_destroy(pool->mapping_pool);
+ mempool_free(pool->next_mapping, &pool->mapping_pool);
+ mempool_exit(&pool->mapping_pool);
dm_deferred_set_destroy(pool->shared_read_ds);
dm_deferred_set_destroy(pool->all_io_ds);
kfree(pool);
@@ -2931,11 +2931,11 @@ static struct pool *pool_create(struct mapped_device *pool_md,
}
pool->next_mapping = NULL;
- pool->mapping_pool = mempool_create_slab_pool(MAPPING_POOL_SIZE,
- _new_mapping_cache);
- if (!pool->mapping_pool) {
+ r = mempool_init_slab_pool(&pool->mapping_pool, MAPPING_POOL_SIZE,
+ _new_mapping_cache);
+ if (r) {
*error = "Error creating pool's mapping mempool";
- err_p = ERR_PTR(-ENOMEM);
+ err_p = ERR_PTR(r);
goto bad_mapping_pool;
}
@@ -2955,7 +2955,7 @@ static struct pool *pool_create(struct mapped_device *pool_md,
return pool;
bad_sort_array:
- mempool_destroy(pool->mapping_pool);
+ mempool_exit(&pool->mapping_pool);
bad_mapping_pool:
dm_deferred_set_destroy(pool->all_io_ds);
bad_all_io_ds:
diff --git a/drivers/md/dm-verity-fec.c b/drivers/md/dm-verity-fec.c
index e13f90832b6b..86405869f1af 100644
--- a/drivers/md/dm-verity-fec.c
+++ b/drivers/md/dm-verity-fec.c
@@ -309,13 +309,13 @@ static int fec_alloc_bufs(struct dm_verity *v, struct dm_verity_fec_io *fio)
unsigned n;
if (!fio->rs)
- fio->rs = mempool_alloc(v->fec->rs_pool, GFP_NOIO);
+ fio->rs = mempool_alloc(&v->fec->rs_pool, GFP_NOIO);
fec_for_each_prealloc_buffer(n) {
if (fio->bufs[n])
continue;
- fio->bufs[n] = mempool_alloc(v->fec->prealloc_pool, GFP_NOWAIT);
+ fio->bufs[n] = mempool_alloc(&v->fec->prealloc_pool, GFP_NOWAIT);
if (unlikely(!fio->bufs[n])) {
DMERR("failed to allocate FEC buffer");
return -ENOMEM;
@@ -327,7 +327,7 @@ static int fec_alloc_bufs(struct dm_verity *v, struct dm_verity_fec_io *fio)
if (fio->bufs[n])
continue;
- fio->bufs[n] = mempool_alloc(v->fec->extra_pool, GFP_NOWAIT);
+ fio->bufs[n] = mempool_alloc(&v->fec->extra_pool, GFP_NOWAIT);
/* we can manage with even one buffer if necessary */
if (unlikely(!fio->bufs[n]))
break;
@@ -335,7 +335,7 @@ static int fec_alloc_bufs(struct dm_verity *v, struct dm_verity_fec_io *fio)
fio->nbufs = n;
if (!fio->output)
- fio->output = mempool_alloc(v->fec->output_pool, GFP_NOIO);
+ fio->output = mempool_alloc(&v->fec->output_pool, GFP_NOIO);
return 0;
}
@@ -493,15 +493,15 @@ void verity_fec_finish_io(struct dm_verity_io *io)
if (!verity_fec_is_enabled(io->v))
return;
- mempool_free(fio->rs, f->rs_pool);
+ mempool_free(fio->rs, &f->rs_pool);
fec_for_each_prealloc_buffer(n)
- mempool_free(fio->bufs[n], f->prealloc_pool);
+ mempool_free(fio->bufs[n], &f->prealloc_pool);
fec_for_each_extra_buffer(fio, n)
- mempool_free(fio->bufs[n], f->extra_pool);
+ mempool_free(fio->bufs[n], &f->extra_pool);
- mempool_free(fio->output, f->output_pool);
+ mempool_free(fio->output, &f->output_pool);
}
/*
@@ -549,9 +549,9 @@ void verity_fec_dtr(struct dm_verity *v)
if (!verity_fec_is_enabled(v))
goto out;
- mempool_destroy(f->rs_pool);
- mempool_destroy(f->prealloc_pool);
- mempool_destroy(f->extra_pool);
+ mempool_exit(&f->rs_pool);
+ mempool_exit(&f->prealloc_pool);
+ mempool_exit(&f->extra_pool);
kmem_cache_destroy(f->cache);
if (f->data_bufio)
@@ -675,6 +675,7 @@ int verity_fec_ctr(struct dm_verity *v)
struct dm_verity_fec *f = v->fec;
struct dm_target *ti = v->ti;
u64 hash_blocks;
+ int ret;
if (!verity_fec_is_enabled(v)) {
verity_fec_dtr(v);
@@ -770,11 +771,11 @@ int verity_fec_ctr(struct dm_verity *v)
}
/* Preallocate an rs_control structure for each worker thread */
- f->rs_pool = mempool_create(num_online_cpus(), fec_rs_alloc,
- fec_rs_free, (void *) v);
- if (!f->rs_pool) {
+ ret = mempool_init(&f->rs_pool, num_online_cpus(), fec_rs_alloc,
+ fec_rs_free, (void *) v);
+ if (ret) {
ti->error = "Cannot allocate RS pool";
- return -ENOMEM;
+ return ret;
}
f->cache = kmem_cache_create("dm_verity_fec_buffers",
@@ -786,26 +787,26 @@ int verity_fec_ctr(struct dm_verity *v)
}
/* Preallocate DM_VERITY_FEC_BUF_PREALLOC buffers for each thread */
- f->prealloc_pool = mempool_create_slab_pool(num_online_cpus() *
- DM_VERITY_FEC_BUF_PREALLOC,
- f->cache);
- if (!f->prealloc_pool) {
+ ret = mempool_init_slab_pool(&f->prealloc_pool, num_online_cpus() *
+ DM_VERITY_FEC_BUF_PREALLOC,
+ f->cache);
+ if (ret) {
ti->error = "Cannot allocate FEC buffer prealloc pool";
- return -ENOMEM;
+ return ret;
}
- f->extra_pool = mempool_create_slab_pool(0, f->cache);
- if (!f->extra_pool) {
+ ret = mempool_init_slab_pool(&f->extra_pool, 0, f->cache);
+ if (ret) {
ti->error = "Cannot allocate FEC buffer extra pool";
- return -ENOMEM;
+ return ret;
}
/* Preallocate an output buffer for each thread */
- f->output_pool = mempool_create_kmalloc_pool(num_online_cpus(),
- 1 << v->data_dev_block_bits);
- if (!f->output_pool) {
+ ret = mempool_init_kmalloc_pool(&f->output_pool, num_online_cpus(),
+ 1 << v->data_dev_block_bits);
+ if (ret) {
ti->error = "Cannot allocate FEC output pool";
- return -ENOMEM;
+ return ret;
}
/* Reserve space for our per-bio data */
diff --git a/drivers/md/dm-verity-fec.h b/drivers/md/dm-verity-fec.h
index bb31ce87a933..6ad803b2b36c 100644
--- a/drivers/md/dm-verity-fec.h
+++ b/drivers/md/dm-verity-fec.h
@@ -46,10 +46,10 @@ struct dm_verity_fec {
sector_t hash_blocks; /* blocks covered after v->hash_start */
unsigned char roots; /* number of parity bytes, M-N of RS(M, N) */
unsigned char rsn; /* N of RS(M, N) */
- mempool_t *rs_pool; /* mempool for fio->rs */
- mempool_t *prealloc_pool; /* mempool for preallocated buffers */
- mempool_t *extra_pool; /* mempool for extra buffers */
- mempool_t *output_pool; /* mempool for output */
+ mempool_t rs_pool; /* mempool for fio->rs */
+ mempool_t prealloc_pool; /* mempool for preallocated buffers */
+ mempool_t extra_pool; /* mempool for extra buffers */
+ mempool_t output_pool; /* mempool for output */
struct kmem_cache *cache; /* cache for buffers */
};
diff --git a/drivers/md/dm-zoned-target.c b/drivers/md/dm-zoned-target.c
index e73b0776683c..30602d15ad9a 100644
--- a/drivers/md/dm-zoned-target.c
+++ b/drivers/md/dm-zoned-target.c
@@ -57,7 +57,7 @@ struct dmz_target {
struct workqueue_struct *chunk_wq;
/* For cloned BIOs to zones */
- struct bio_set *bio_set;
+ struct bio_set bio_set;
/* For flush */
spinlock_t flush_lock;
@@ -121,7 +121,7 @@ static int dmz_submit_read_bio(struct dmz_target *dmz, struct dm_zone *zone,
}
/* Partial BIO: we need to clone the BIO */
- clone = bio_clone_fast(bio, GFP_NOIO, dmz->bio_set);
+ clone = bio_clone_fast(bio, GFP_NOIO, &dmz->bio_set);
if (!clone)
return -ENOMEM;
@@ -779,10 +779,9 @@ static int dmz_ctr(struct dm_target *ti, unsigned int argc, char **argv)
ti->len = (sector_t)dmz_nr_chunks(dmz->metadata) << dev->zone_nr_sectors_shift;
/* Zone BIO */
- dmz->bio_set = bioset_create(DMZ_MIN_BIOS, 0, 0);
- if (!dmz->bio_set) {
+ ret = bioset_init(&dmz->bio_set, DMZ_MIN_BIOS, 0, 0);
+ if (ret) {
ti->error = "Create BIO set failed";
- ret = -ENOMEM;
goto err_meta;
}
@@ -828,7 +827,7 @@ err_cwq:
destroy_workqueue(dmz->chunk_wq);
err_bio:
mutex_destroy(&dmz->chunk_lock);
- bioset_free(dmz->bio_set);
+ bioset_exit(&dmz->bio_set);
err_meta:
dmz_dtr_metadata(dmz->metadata);
err_dev:
@@ -858,7 +857,7 @@ static void dmz_dtr(struct dm_target *ti)
dmz_dtr_metadata(dmz->metadata);
- bioset_free(dmz->bio_set);
+ bioset_exit(&dmz->bio_set);
dmz_put_zoned_device(ti);
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 4ea404dbcf0b..98dff36b89a3 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -148,8 +148,8 @@ static int dm_numa_node = DM_NUMA_NODE;
* For mempools pre-allocation at the table loading time.
*/
struct dm_md_mempools {
- struct bio_set *bs;
- struct bio_set *io_bs;
+ struct bio_set bs;
+ struct bio_set io_bs;
};
struct table_device {
@@ -537,7 +537,7 @@ static struct dm_io *alloc_io(struct mapped_device *md, struct bio *bio)
struct dm_target_io *tio;
struct bio *clone;
- clone = bio_alloc_bioset(GFP_NOIO, 0, md->io_bs);
+ clone = bio_alloc_bioset(GFP_NOIO, 0, &md->io_bs);
if (!clone)
return NULL;
@@ -572,7 +572,7 @@ static struct dm_target_io *alloc_tio(struct clone_info *ci, struct dm_target *t
/* the dm_target_io embedded in ci->io is available */
tio = &ci->io->tio;
} else {
- struct bio *clone = bio_alloc_bioset(gfp_mask, 0, ci->io->md->bs);
+ struct bio *clone = bio_alloc_bioset(gfp_mask, 0, &ci->io->md->bs);
if (!clone)
return NULL;
@@ -1020,7 +1020,8 @@ int dm_set_target_max_io_len(struct dm_target *ti, sector_t len)
EXPORT_SYMBOL_GPL(dm_set_target_max_io_len);
static struct dm_target *dm_dax_get_live_target(struct mapped_device *md,
- sector_t sector, int *srcu_idx)
+ sector_t sector, int *srcu_idx)
+ __acquires(md->io_barrier)
{
struct dm_table *map;
struct dm_target *ti;
@@ -1037,7 +1038,7 @@ static struct dm_target *dm_dax_get_live_target(struct mapped_device *md,
}
static long dm_dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff,
- long nr_pages, void **kaddr, pfn_t *pfn)
+ long nr_pages, void **kaddr, pfn_t *pfn)
{
struct mapped_device *md = dax_get_private(dax_dev);
sector_t sector = pgoff * PAGE_SECTORS;
@@ -1065,7 +1066,7 @@ static long dm_dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff,
}
static size_t dm_dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff,
- void *addr, size_t bytes, struct iov_iter *i)
+ void *addr, size_t bytes, struct iov_iter *i)
{
struct mapped_device *md = dax_get_private(dax_dev);
sector_t sector = pgoff * PAGE_SECTORS;
@@ -1582,7 +1583,7 @@ static blk_qc_t __split_and_process_bio(struct mapped_device *md,
* won't be affected by this reassignment.
*/
struct bio *b = bio_clone_bioset(bio, GFP_NOIO,
- md->queue->bio_split);
+ &md->queue->bio_split);
ci.io->orig_bio = b;
bio_advance(bio, (bio_sectors(bio) - ci.sector_count) << 9);
bio_chain(b, bio);
@@ -1784,10 +1785,8 @@ static void cleanup_mapped_device(struct mapped_device *md)
destroy_workqueue(md->wq);
if (md->kworker_task)
kthread_stop(md->kworker_task);
- if (md->bs)
- bioset_free(md->bs);
- if (md->io_bs)
- bioset_free(md->io_bs);
+ bioset_exit(&md->bs);
+ bioset_exit(&md->io_bs);
if (md->dax_dev) {
kill_dax(md->dax_dev);
@@ -1964,16 +1963,10 @@ static void __bind_mempools(struct mapped_device *md, struct dm_table *t)
* If so, reload bioset because front_pad may have changed
* because a different table was loaded.
*/
- if (md->bs) {
- bioset_free(md->bs);
- md->bs = NULL;
- }
- if (md->io_bs) {
- bioset_free(md->io_bs);
- md->io_bs = NULL;
- }
+ bioset_exit(&md->bs);
+ bioset_exit(&md->io_bs);
- } else if (md->bs) {
+ } else if (bioset_initialized(&md->bs)) {
/*
* There's no need to reload with request-based dm
* because the size of front_pad doesn't change.
@@ -1985,12 +1978,14 @@ static void __bind_mempools(struct mapped_device *md, struct dm_table *t)
goto out;
}
- BUG_ON(!p || md->bs || md->io_bs);
+ BUG_ON(!p ||
+ bioset_initialized(&md->bs) ||
+ bioset_initialized(&md->io_bs));
md->bs = p->bs;
- p->bs = NULL;
+ memset(&p->bs, 0, sizeof(p->bs));
md->io_bs = p->io_bs;
- p->io_bs = NULL;
+ memset(&p->io_bs, 0, sizeof(p->io_bs));
out:
/* mempool bind completed, no longer need any mempools in the table */
dm_table_free_md_mempools(t);
@@ -2904,6 +2899,7 @@ struct dm_md_mempools *dm_alloc_md_mempools(struct mapped_device *md, enum dm_qu
struct dm_md_mempools *pools = kzalloc_node(sizeof(*pools), GFP_KERNEL, md->numa_node_id);
unsigned int pool_size = 0;
unsigned int front_pad, io_front_pad;
+ int ret;
if (!pools)
return NULL;
@@ -2915,10 +2911,10 @@ struct dm_md_mempools *dm_alloc_md_mempools(struct mapped_device *md, enum dm_qu
pool_size = max(dm_get_reserved_bio_based_ios(), min_pool_size);
front_pad = roundup(per_io_data_size, __alignof__(struct dm_target_io)) + offsetof(struct dm_target_io, clone);
io_front_pad = roundup(front_pad, __alignof__(struct dm_io)) + offsetof(struct dm_io, tio);
- pools->io_bs = bioset_create(pool_size, io_front_pad, 0);
- if (!pools->io_bs)
+ ret = bioset_init(&pools->io_bs, pool_size, io_front_pad, 0);
+ if (ret)
goto out;
- if (integrity && bioset_integrity_create(pools->io_bs, pool_size))
+ if (integrity && bioset_integrity_create(&pools->io_bs, pool_size))
goto out;
break;
case DM_TYPE_REQUEST_BASED:
@@ -2931,11 +2927,11 @@ struct dm_md_mempools *dm_alloc_md_mempools(struct mapped_device *md, enum dm_qu
BUG();
}
- pools->bs = bioset_create(pool_size, front_pad, 0);
- if (!pools->bs)
+ ret = bioset_init(&pools->bs, pool_size, front_pad, 0);
+ if (ret)
goto out;
- if (integrity && bioset_integrity_create(pools->bs, pool_size))
+ if (integrity && bioset_integrity_create(&pools->bs, pool_size))
goto out;
return pools;
@@ -2951,10 +2947,8 @@ void dm_free_md_mempools(struct dm_md_mempools *pools)
if (!pools)
return;
- if (pools->bs)
- bioset_free(pools->bs);
- if (pools->io_bs)
- bioset_free(pools->io_bs);
+ bioset_exit(&pools->bs);
+ bioset_exit(&pools->io_bs);
kfree(pools);
}
diff --git a/drivers/md/md-faulty.c b/drivers/md/md-faulty.c
index 38264b38420f..c2fdf899de14 100644
--- a/drivers/md/md-faulty.c
+++ b/drivers/md/md-faulty.c
@@ -214,7 +214,7 @@ static bool faulty_make_request(struct mddev *mddev, struct bio *bio)
}
}
if (failit) {
- struct bio *b = bio_clone_fast(bio, GFP_NOIO, mddev->bio_set);
+ struct bio *b = bio_clone_fast(bio, GFP_NOIO, &mddev->bio_set);
bio_set_dev(b, conf->rdev->bdev);
b->bi_private = bio;
diff --git a/drivers/md/md-linear.c b/drivers/md/md-linear.c
index 4964323d936b..d45c697c0ebe 100644
--- a/drivers/md/md-linear.c
+++ b/drivers/md/md-linear.c
@@ -269,7 +269,7 @@ static bool linear_make_request(struct mddev *mddev, struct bio *bio)
if (unlikely(bio_end_sector(bio) > end_sector)) {
/* This bio crosses a device boundary, so we have to split it */
struct bio *split = bio_split(bio, end_sector - bio_sector,
- GFP_NOIO, mddev->bio_set);
+ GFP_NOIO, &mddev->bio_set);
bio_chain(split, bio);
generic_make_request(bio);
bio = split;
diff --git a/drivers/md/md-multipath.c b/drivers/md/md-multipath.c
index 0a7e99d62c69..f71fcdb9b39c 100644
--- a/drivers/md/md-multipath.c
+++ b/drivers/md/md-multipath.c
@@ -80,7 +80,7 @@ static void multipath_end_bh_io(struct multipath_bh *mp_bh, blk_status_t status)
bio->bi_status = status;
bio_endio(bio);
- mempool_free(mp_bh, conf->pool);
+ mempool_free(mp_bh, &conf->pool);
}
static void multipath_end_request(struct bio *bio)
@@ -117,7 +117,7 @@ static bool multipath_make_request(struct mddev *mddev, struct bio * bio)
return true;
}
- mp_bh = mempool_alloc(conf->pool, GFP_NOIO);
+ mp_bh = mempool_alloc(&conf->pool, GFP_NOIO);
mp_bh->master_bio = bio;
mp_bh->mddev = mddev;
@@ -125,7 +125,7 @@ static bool multipath_make_request(struct mddev *mddev, struct bio * bio)
mp_bh->path = multipath_map(conf);
if (mp_bh->path < 0) {
bio_io_error(bio);
- mempool_free(mp_bh, conf->pool);
+ mempool_free(mp_bh, &conf->pool);
return true;
}
multipath = conf->multipaths + mp_bh->path;
@@ -378,6 +378,7 @@ static int multipath_run (struct mddev *mddev)
struct multipath_info *disk;
struct md_rdev *rdev;
int working_disks;
+ int ret;
if (md_check_no_bitmap(mddev))
return -EINVAL;
@@ -431,9 +432,9 @@ static int multipath_run (struct mddev *mddev)
}
mddev->degraded = conf->raid_disks - working_disks;
- conf->pool = mempool_create_kmalloc_pool(NR_RESERVED_BUFS,
- sizeof(struct multipath_bh));
- if (conf->pool == NULL)
+ ret = mempool_init_kmalloc_pool(&conf->pool, NR_RESERVED_BUFS,
+ sizeof(struct multipath_bh));
+ if (ret)
goto out_free_conf;
mddev->thread = md_register_thread(multipathd, mddev,
@@ -455,7 +456,7 @@ static int multipath_run (struct mddev *mddev)
return 0;
out_free_conf:
- mempool_destroy(conf->pool);
+ mempool_exit(&conf->pool);
kfree(conf->multipaths);
kfree(conf);
mddev->private = NULL;
@@ -467,7 +468,7 @@ static void multipath_free(struct mddev *mddev, void *priv)
{
struct mpconf *conf = priv;
- mempool_destroy(conf->pool);
+ mempool_exit(&conf->pool);
kfree(conf->multipaths);
kfree(conf);
}
diff --git a/drivers/md/md-multipath.h b/drivers/md/md-multipath.h
index 0adb941f485a..b3099e5fc4d7 100644
--- a/drivers/md/md-multipath.h
+++ b/drivers/md/md-multipath.h
@@ -13,7 +13,7 @@ struct mpconf {
spinlock_t device_lock;
struct list_head retry_list;
- mempool_t *pool;
+ mempool_t pool;
};
/*
diff --git a/drivers/md/md.c b/drivers/md/md.c
index c208c01f63a5..fc692b7128bb 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -193,10 +193,10 @@ struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs,
{
struct bio *b;
- if (!mddev || !mddev->bio_set)
+ if (!mddev || !bioset_initialized(&mddev->bio_set))
return bio_alloc(gfp_mask, nr_iovecs);
- b = bio_alloc_bioset(gfp_mask, nr_iovecs, mddev->bio_set);
+ b = bio_alloc_bioset(gfp_mask, nr_iovecs, &mddev->bio_set);
if (!b)
return NULL;
return b;
@@ -205,10 +205,10 @@ EXPORT_SYMBOL_GPL(bio_alloc_mddev);
static struct bio *md_bio_alloc_sync(struct mddev *mddev)
{
- if (!mddev || !mddev->sync_set)
+ if (!mddev || !bioset_initialized(&mddev->sync_set))
return bio_alloc(GFP_NOIO, 1);
- return bio_alloc_bioset(GFP_NOIO, 1, mddev->sync_set);
+ return bio_alloc_bioset(GFP_NOIO, 1, &mddev->sync_set);
}
/*
@@ -510,7 +510,10 @@ static void mddev_delayed_delete(struct work_struct *ws);
static void mddev_put(struct mddev *mddev)
{
- struct bio_set *bs = NULL, *sync_bs = NULL;
+ struct bio_set bs, sync_bs;
+
+ memset(&bs, 0, sizeof(bs));
+ memset(&sync_bs, 0, sizeof(sync_bs));
if (!atomic_dec_and_lock(&mddev->active, &all_mddevs_lock))
return;
@@ -521,8 +524,8 @@ static void mddev_put(struct mddev *mddev)
list_del_init(&mddev->all_mddevs);
bs = mddev->bio_set;
sync_bs = mddev->sync_set;
- mddev->bio_set = NULL;
- mddev->sync_set = NULL;
+ memset(&mddev->bio_set, 0, sizeof(mddev->bio_set));
+ memset(&mddev->sync_set, 0, sizeof(mddev->sync_set));
if (mddev->gendisk) {
/* We did a probe so need to clean up. Call
* queue_work inside the spinlock so that
@@ -535,10 +538,8 @@ static void mddev_put(struct mddev *mddev)
kfree(mddev);
}
spin_unlock(&all_mddevs_lock);
- if (bs)
- bioset_free(bs);
- if (sync_bs)
- bioset_free(sync_bs);
+ bioset_exit(&bs);
+ bioset_exit(&sync_bs);
}
static void md_safemode_timeout(struct timer_list *t);
@@ -2123,7 +2124,7 @@ int md_integrity_register(struct mddev *mddev)
bdev_get_integrity(reference->bdev));
pr_debug("md: data integrity enabled on %s\n", mdname(mddev));
- if (bioset_integrity_create(mddev->bio_set, BIO_POOL_SIZE)) {
+ if (bioset_integrity_create(&mddev->bio_set, BIO_POOL_SIZE)) {
pr_err("md: failed to create integrity pool for %s\n",
mdname(mddev));
return -EINVAL;
@@ -5497,17 +5498,15 @@ int md_run(struct mddev *mddev)
sysfs_notify_dirent_safe(rdev->sysfs_state);
}
- if (mddev->bio_set == NULL) {
- mddev->bio_set = bioset_create(BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
- if (!mddev->bio_set)
- return -ENOMEM;
+ if (!bioset_initialized(&mddev->bio_set)) {
+ err = bioset_init(&mddev->bio_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
+ if (err)
+ return err;
}
- if (mddev->sync_set == NULL) {
- mddev->sync_set = bioset_create(BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
- if (!mddev->sync_set) {
- err = -ENOMEM;
+ if (!bioset_initialized(&mddev->sync_set)) {
+ err = bioset_init(&mddev->sync_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
+ if (err)
goto abort;
- }
}
spin_lock(&pers_lock);
@@ -5668,14 +5667,8 @@ int md_run(struct mddev *mddev)
return 0;
abort:
- if (mddev->bio_set) {
- bioset_free(mddev->bio_set);
- mddev->bio_set = NULL;
- }
- if (mddev->sync_set) {
- bioset_free(mddev->sync_set);
- mddev->sync_set = NULL;
- }
+ bioset_exit(&mddev->bio_set);
+ bioset_exit(&mddev->sync_set);
return err;
}
@@ -5888,14 +5881,8 @@ void md_stop(struct mddev *mddev)
* This is called from dm-raid
*/
__md_stop(mddev);
- if (mddev->bio_set) {
- bioset_free(mddev->bio_set);
- mddev->bio_set = NULL;
- }
- if (mddev->sync_set) {
- bioset_free(mddev->sync_set);
- mddev->sync_set = NULL;
- }
+ bioset_exit(&mddev->bio_set);
+ bioset_exit(&mddev->sync_set);
}
EXPORT_SYMBOL_GPL(md_stop);
diff --git a/drivers/md/md.h b/drivers/md/md.h
index fbc925cce810..3507cab22cb6 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -452,8 +452,8 @@ struct mddev {
struct attribute_group *to_remove;
- struct bio_set *bio_set;
- struct bio_set *sync_set; /* for sync operations like
+ struct bio_set bio_set;
+ struct bio_set sync_set; /* for sync operations like
* metadata and bitmap writes
*/
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index 584c10347267..65ae47a02218 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -479,7 +479,7 @@ static void raid0_handle_discard(struct mddev *mddev, struct bio *bio)
if (bio_end_sector(bio) > zone->zone_end) {
struct bio *split = bio_split(bio,
zone->zone_end - bio->bi_iter.bi_sector, GFP_NOIO,
- mddev->bio_set);
+ &mddev->bio_set);
bio_chain(split, bio);
generic_make_request(bio);
bio = split;
@@ -582,7 +582,8 @@ static bool raid0_make_request(struct mddev *mddev, struct bio *bio)
sector = bio_sector;
if (sectors < bio_sectors(bio)) {
- struct bio *split = bio_split(bio, sectors, GFP_NOIO, mddev->bio_set);
+ struct bio *split = bio_split(bio, sectors, GFP_NOIO,
+ &mddev->bio_set);
bio_chain(split, bio);
generic_make_request(bio);
bio = split;
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index e9e3308cb0a7..bad28520719b 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -221,7 +221,7 @@ static void free_r1bio(struct r1bio *r1_bio)
struct r1conf *conf = r1_bio->mddev->private;
put_all_bios(conf, r1_bio);
- mempool_free(r1_bio, conf->r1bio_pool);
+ mempool_free(r1_bio, &conf->r1bio_pool);
}
static void put_buf(struct r1bio *r1_bio)
@@ -236,7 +236,7 @@ static void put_buf(struct r1bio *r1_bio)
rdev_dec_pending(conf->mirrors[i].rdev, r1_bio->mddev);
}
- mempool_free(r1_bio, conf->r1buf_pool);
+ mempool_free(r1_bio, &conf->r1buf_pool);
lower_barrier(conf, sect);
}
@@ -1178,7 +1178,7 @@ alloc_r1bio(struct mddev *mddev, struct bio *bio)
struct r1conf *conf = mddev->private;
struct r1bio *r1_bio;
- r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO);
+ r1_bio = mempool_alloc(&conf->r1bio_pool, GFP_NOIO);
/* Ensure no bio records IO_BLOCKED */
memset(r1_bio->bios, 0, conf->raid_disks * sizeof(r1_bio->bios[0]));
init_r1bio(r1_bio, mddev, bio);
@@ -1268,7 +1268,7 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio,
if (max_sectors < bio_sectors(bio)) {
struct bio *split = bio_split(bio, max_sectors,
- gfp, conf->bio_split);
+ gfp, &conf->bio_split);
bio_chain(split, bio);
generic_make_request(bio);
bio = split;
@@ -1278,7 +1278,7 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio,
r1_bio->read_disk = rdisk;
- read_bio = bio_clone_fast(bio, gfp, mddev->bio_set);
+ read_bio = bio_clone_fast(bio, gfp, &mddev->bio_set);
r1_bio->bios[rdisk] = read_bio;
@@ -1439,7 +1439,7 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
if (max_sectors < bio_sectors(bio)) {
struct bio *split = bio_split(bio, max_sectors,
- GFP_NOIO, conf->bio_split);
+ GFP_NOIO, &conf->bio_split);
bio_chain(split, bio);
generic_make_request(bio);
bio = split;
@@ -1479,9 +1479,9 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
if (r1_bio->behind_master_bio)
mbio = bio_clone_fast(r1_bio->behind_master_bio,
- GFP_NOIO, mddev->bio_set);
+ GFP_NOIO, &mddev->bio_set);
else
- mbio = bio_clone_fast(bio, GFP_NOIO, mddev->bio_set);
+ mbio = bio_clone_fast(bio, GFP_NOIO, &mddev->bio_set);
if (r1_bio->behind_master_bio) {
if (test_bit(WriteMostly, &conf->mirrors[i].rdev->flags))
@@ -1657,8 +1657,7 @@ static void close_sync(struct r1conf *conf)
_allow_barrier(conf, idx);
}
- mempool_destroy(conf->r1buf_pool);
- conf->r1buf_pool = NULL;
+ mempool_exit(&conf->r1buf_pool);
}
static int raid1_spare_active(struct mddev *mddev)
@@ -2348,10 +2347,10 @@ static int narrow_write_error(struct r1bio *r1_bio, int i)
if (test_bit(R1BIO_BehindIO, &r1_bio->state)) {
wbio = bio_clone_fast(r1_bio->behind_master_bio,
GFP_NOIO,
- mddev->bio_set);
+ &mddev->bio_set);
} else {
wbio = bio_clone_fast(r1_bio->master_bio, GFP_NOIO,
- mddev->bio_set);
+ &mddev->bio_set);
}
bio_set_op_attrs(wbio, REQ_OP_WRITE, 0);
@@ -2564,17 +2563,15 @@ static int init_resync(struct r1conf *conf)
int buffs;
buffs = RESYNC_WINDOW / RESYNC_BLOCK_SIZE;
- BUG_ON(conf->r1buf_pool);
- conf->r1buf_pool = mempool_create(buffs, r1buf_pool_alloc, r1buf_pool_free,
- conf->poolinfo);
- if (!conf->r1buf_pool)
- return -ENOMEM;
- return 0;
+ BUG_ON(mempool_initialized(&conf->r1buf_pool));
+
+ return mempool_init(&conf->r1buf_pool, buffs, r1buf_pool_alloc,
+ r1buf_pool_free, conf->poolinfo);
}
static struct r1bio *raid1_alloc_init_r1buf(struct r1conf *conf)
{
- struct r1bio *r1bio = mempool_alloc(conf->r1buf_pool, GFP_NOIO);
+ struct r1bio *r1bio = mempool_alloc(&conf->r1buf_pool, GFP_NOIO);
struct resync_pages *rps;
struct bio *bio;
int i;
@@ -2617,7 +2614,7 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr,
int idx = sector_to_idx(sector_nr);
int page_idx = 0;
- if (!conf->r1buf_pool)
+ if (!mempool_initialized(&conf->r1buf_pool))
if (init_resync(conf))
return 0;
@@ -2953,14 +2950,13 @@ static struct r1conf *setup_conf(struct mddev *mddev)
if (!conf->poolinfo)
goto abort;
conf->poolinfo->raid_disks = mddev->raid_disks * 2;
- conf->r1bio_pool = mempool_create(NR_RAID1_BIOS, r1bio_pool_alloc,
- r1bio_pool_free,
- conf->poolinfo);
- if (!conf->r1bio_pool)
+ err = mempool_init(&conf->r1bio_pool, NR_RAID1_BIOS, r1bio_pool_alloc,
+ r1bio_pool_free, conf->poolinfo);
+ if (err)
goto abort;
- conf->bio_split = bioset_create(BIO_POOL_SIZE, 0, 0);
- if (!conf->bio_split)
+ err = bioset_init(&conf->bio_split, BIO_POOL_SIZE, 0, 0);
+ if (err)
goto abort;
conf->poolinfo->mddev = mddev;
@@ -3033,7 +3029,7 @@ static struct r1conf *setup_conf(struct mddev *mddev)
abort:
if (conf) {
- mempool_destroy(conf->r1bio_pool);
+ mempool_exit(&conf->r1bio_pool);
kfree(conf->mirrors);
safe_put_page(conf->tmppage);
kfree(conf->poolinfo);
@@ -3041,8 +3037,7 @@ static struct r1conf *setup_conf(struct mddev *mddev)
kfree(conf->nr_waiting);
kfree(conf->nr_queued);
kfree(conf->barrier);
- if (conf->bio_split)
- bioset_free(conf->bio_split);
+ bioset_exit(&conf->bio_split);
kfree(conf);
}
return ERR_PTR(err);
@@ -3144,7 +3139,7 @@ static void raid1_free(struct mddev *mddev, void *priv)
{
struct r1conf *conf = priv;
- mempool_destroy(conf->r1bio_pool);
+ mempool_exit(&conf->r1bio_pool);
kfree(conf->mirrors);
safe_put_page(conf->tmppage);
kfree(conf->poolinfo);
@@ -3152,8 +3147,7 @@ static void raid1_free(struct mddev *mddev, void *priv)
kfree(conf->nr_waiting);
kfree(conf->nr_queued);
kfree(conf->barrier);
- if (conf->bio_split)
- bioset_free(conf->bio_split);
+ bioset_exit(&conf->bio_split);
kfree(conf);
}
@@ -3199,13 +3193,17 @@ static int raid1_reshape(struct mddev *mddev)
* At the same time, we "pack" the devices so that all the missing
* devices have the higher raid_disk numbers.
*/
- mempool_t *newpool, *oldpool;
+ mempool_t newpool, oldpool;
struct pool_info *newpoolinfo;
struct raid1_info *newmirrors;
struct r1conf *conf = mddev->private;
int cnt, raid_disks;
unsigned long flags;
int d, d2;
+ int ret;
+
+ memset(&newpool, 0, sizeof(newpool));
+ memset(&oldpool, 0, sizeof(oldpool));
/* Cannot change chunk_size, layout, or level */
if (mddev->chunk_sectors != mddev->new_chunk_sectors ||
@@ -3237,17 +3235,17 @@ static int raid1_reshape(struct mddev *mddev)
newpoolinfo->mddev = mddev;
newpoolinfo->raid_disks = raid_disks * 2;
- newpool = mempool_create(NR_RAID1_BIOS, r1bio_pool_alloc,
- r1bio_pool_free, newpoolinfo);
- if (!newpool) {
+ ret = mempool_init(&newpool, NR_RAID1_BIOS, r1bio_pool_alloc,
+ r1bio_pool_free, newpoolinfo);
+ if (ret) {
kfree(newpoolinfo);
- return -ENOMEM;
+ return ret;
}
newmirrors = kzalloc(sizeof(struct raid1_info) * raid_disks * 2,
GFP_KERNEL);
if (!newmirrors) {
kfree(newpoolinfo);
- mempool_destroy(newpool);
+ mempool_exit(&newpool);
return -ENOMEM;
}
@@ -3287,7 +3285,7 @@ static int raid1_reshape(struct mddev *mddev)
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
md_wakeup_thread(mddev->thread);
- mempool_destroy(oldpool);
+ mempool_exit(&oldpool);
return 0;
}
diff --git a/drivers/md/raid1.h b/drivers/md/raid1.h
index eb84bc68e2fd..e7ccad898736 100644
--- a/drivers/md/raid1.h
+++ b/drivers/md/raid1.h
@@ -118,10 +118,10 @@ struct r1conf {
* mempools - it changes when the array grows or shrinks
*/
struct pool_info *poolinfo;
- mempool_t *r1bio_pool;
- mempool_t *r1buf_pool;
+ mempool_t r1bio_pool;
+ mempool_t r1buf_pool;
- struct bio_set *bio_split;
+ struct bio_set bio_split;
/* temporary buffer to synchronous IO when attempting to repair
* a read error.
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 3c60774c8430..37d4b236b81b 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -291,14 +291,14 @@ static void free_r10bio(struct r10bio *r10_bio)
struct r10conf *conf = r10_bio->mddev->private;
put_all_bios(conf, r10_bio);
- mempool_free(r10_bio, conf->r10bio_pool);
+ mempool_free(r10_bio, &conf->r10bio_pool);
}
static void put_buf(struct r10bio *r10_bio)
{
struct r10conf *conf = r10_bio->mddev->private;
- mempool_free(r10_bio, conf->r10buf_pool);
+ mempool_free(r10_bio, &conf->r10buf_pool);
lower_barrier(conf);
}
@@ -1204,7 +1204,7 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio,
(unsigned long long)r10_bio->sector);
if (max_sectors < bio_sectors(bio)) {
struct bio *split = bio_split(bio, max_sectors,
- gfp, conf->bio_split);
+ gfp, &conf->bio_split);
bio_chain(split, bio);
generic_make_request(bio);
bio = split;
@@ -1213,7 +1213,7 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio,
}
slot = r10_bio->read_slot;
- read_bio = bio_clone_fast(bio, gfp, mddev->bio_set);
+ read_bio = bio_clone_fast(bio, gfp, &mddev->bio_set);
r10_bio->devs[slot].bio = read_bio;
r10_bio->devs[slot].rdev = rdev;
@@ -1261,7 +1261,7 @@ static void raid10_write_one_disk(struct mddev *mddev, struct r10bio *r10_bio,
} else
rdev = conf->mirrors[devnum].rdev;
- mbio = bio_clone_fast(bio, GFP_NOIO, mddev->bio_set);
+ mbio = bio_clone_fast(bio, GFP_NOIO, &mddev->bio_set);
if (replacement)
r10_bio->devs[n_copy].repl_bio = mbio;
else
@@ -1509,7 +1509,7 @@ retry_write:
if (r10_bio->sectors < bio_sectors(bio)) {
struct bio *split = bio_split(bio, r10_bio->sectors,
- GFP_NOIO, conf->bio_split);
+ GFP_NOIO, &conf->bio_split);
bio_chain(split, bio);
generic_make_request(bio);
bio = split;
@@ -1533,7 +1533,7 @@ static void __make_request(struct mddev *mddev, struct bio *bio, int sectors)
struct r10conf *conf = mddev->private;
struct r10bio *r10_bio;
- r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO);
+ r10_bio = mempool_alloc(&conf->r10bio_pool, GFP_NOIO);
r10_bio->master_bio = bio;
r10_bio->sectors = sectors;
@@ -1732,8 +1732,7 @@ static void close_sync(struct r10conf *conf)
wait_barrier(conf);
allow_barrier(conf);
- mempool_destroy(conf->r10buf_pool);
- conf->r10buf_pool = NULL;
+ mempool_exit(&conf->r10buf_pool);
}
static int raid10_spare_active(struct mddev *mddev)
@@ -2583,7 +2582,7 @@ static int narrow_write_error(struct r10bio *r10_bio, int i)
if (sectors > sect_to_write)
sectors = sect_to_write;
/* Write at 'sector' for 'sectors' */
- wbio = bio_clone_fast(bio, GFP_NOIO, mddev->bio_set);
+ wbio = bio_clone_fast(bio, GFP_NOIO, &mddev->bio_set);
bio_trim(wbio, sector - bio->bi_iter.bi_sector, sectors);
wsector = r10_bio->devs[i].addr + (sector - r10_bio->sector);
wbio->bi_iter.bi_sector = wsector +
@@ -2816,25 +2815,25 @@ static void raid10d(struct md_thread *thread)
static int init_resync(struct r10conf *conf)
{
- int buffs;
- int i;
+ int ret, buffs, i;
buffs = RESYNC_WINDOW / RESYNC_BLOCK_SIZE;
- BUG_ON(conf->r10buf_pool);
+ BUG_ON(mempool_initialized(&conf->r10buf_pool));
conf->have_replacement = 0;
for (i = 0; i < conf->geo.raid_disks; i++)
if (conf->mirrors[i].replacement)
conf->have_replacement = 1;
- conf->r10buf_pool = mempool_create(buffs, r10buf_pool_alloc, r10buf_pool_free, conf);
- if (!conf->r10buf_pool)
- return -ENOMEM;
+ ret = mempool_init(&conf->r10buf_pool, buffs,
+ r10buf_pool_alloc, r10buf_pool_free, conf);
+ if (ret)
+ return ret;
conf->next_resync = 0;
return 0;
}
static struct r10bio *raid10_alloc_init_r10buf(struct r10conf *conf)
{
- struct r10bio *r10bio = mempool_alloc(conf->r10buf_pool, GFP_NOIO);
+ struct r10bio *r10bio = mempool_alloc(&conf->r10buf_pool, GFP_NOIO);
struct rsync_pages *rp;
struct bio *bio;
int nalloc;
@@ -2945,7 +2944,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
sector_t chunk_mask = conf->geo.chunk_mask;
int page_idx = 0;
- if (!conf->r10buf_pool)
+ if (!mempool_initialized(&conf->r10buf_pool))
if (init_resync(conf))
return 0;
@@ -3699,13 +3698,13 @@ static struct r10conf *setup_conf(struct mddev *mddev)
conf->geo = geo;
conf->copies = copies;
- conf->r10bio_pool = mempool_create(NR_RAID10_BIOS, r10bio_pool_alloc,
- r10bio_pool_free, conf);
- if (!conf->r10bio_pool)
+ err = mempool_init(&conf->r10bio_pool, NR_RAID10_BIOS, r10bio_pool_alloc,
+ r10bio_pool_free, conf);
+ if (err)
goto out;
- conf->bio_split = bioset_create(BIO_POOL_SIZE, 0, 0);
- if (!conf->bio_split)
+ err = bioset_init(&conf->bio_split, BIO_POOL_SIZE, 0, 0);
+ if (err)
goto out;
calc_sectors(conf, mddev->dev_sectors);
@@ -3733,6 +3732,7 @@ static struct r10conf *setup_conf(struct mddev *mddev)
init_waitqueue_head(&conf->wait_barrier);
atomic_set(&conf->nr_pending, 0);
+ err = -ENOMEM;
conf->thread = md_register_thread(raid10d, mddev, "raid10");
if (!conf->thread)
goto out;
@@ -3742,11 +3742,10 @@ static struct r10conf *setup_conf(struct mddev *mddev)
out:
if (conf) {
- mempool_destroy(conf->r10bio_pool);
+ mempool_exit(&conf->r10bio_pool);
kfree(conf->mirrors);
safe_put_page(conf->tmppage);
- if (conf->bio_split)
- bioset_free(conf->bio_split);
+ bioset_exit(&conf->bio_split);
kfree(conf);
}
return ERR_PTR(err);
@@ -3953,7 +3952,7 @@ static int raid10_run(struct mddev *mddev)
out_free_conf:
md_unregister_thread(&mddev->thread);
- mempool_destroy(conf->r10bio_pool);
+ mempool_exit(&conf->r10bio_pool);
safe_put_page(conf->tmppage);
kfree(conf->mirrors);
kfree(conf);
@@ -3966,13 +3965,12 @@ static void raid10_free(struct mddev *mddev, void *priv)
{
struct r10conf *conf = priv;
- mempool_destroy(conf->r10bio_pool);
+ mempool_exit(&conf->r10bio_pool);
safe_put_page(conf->tmppage);
kfree(conf->mirrors);
kfree(conf->mirrors_old);
kfree(conf->mirrors_new);
- if (conf->bio_split)
- bioset_free(conf->bio_split);
+ bioset_exit(&conf->bio_split);
kfree(conf);
}
@@ -4543,7 +4541,7 @@ read_more:
* on all the target devices.
*/
// FIXME
- mempool_free(r10_bio, conf->r10buf_pool);
+ mempool_free(r10_bio, &conf->r10buf_pool);
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
return sectors_done;
}
diff --git a/drivers/md/raid10.h b/drivers/md/raid10.h
index e2e8840de9bf..d3eaaf3eb1bc 100644
--- a/drivers/md/raid10.h
+++ b/drivers/md/raid10.h
@@ -93,10 +93,10 @@ struct r10conf {
*/
wait_queue_head_t wait_barrier;
- mempool_t *r10bio_pool;
- mempool_t *r10buf_pool;
+ mempool_t r10bio_pool;
+ mempool_t r10buf_pool;
struct page *tmppage;
- struct bio_set *bio_split;
+ struct bio_set bio_split;
/* When taking over an array from a different personality, we store
* the new thread here until we fully activate the array.
diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c
index 3c65f52b68f5..2b775abf377b 100644
--- a/drivers/md/raid5-cache.c
+++ b/drivers/md/raid5-cache.c
@@ -125,9 +125,9 @@ struct r5l_log {
struct list_head no_mem_stripes; /* pending stripes, -ENOMEM */
struct kmem_cache *io_kc;
- mempool_t *io_pool;
- struct bio_set *bs;
- mempool_t *meta_pool;
+ mempool_t io_pool;
+ struct bio_set bs;
+ mempool_t meta_pool;
struct md_thread *reclaim_thread;
unsigned long reclaim_target; /* number of space that need to be
@@ -579,7 +579,7 @@ static void r5l_log_endio(struct bio *bio)
md_error(log->rdev->mddev, log->rdev);
bio_put(bio);
- mempool_free(io->meta_page, log->meta_pool);
+ mempool_free(io->meta_page, &log->meta_pool);
spin_lock_irqsave(&log->io_list_lock, flags);
__r5l_set_io_unit_state(io, IO_UNIT_IO_END);
@@ -748,7 +748,7 @@ static void r5l_submit_current_io(struct r5l_log *log)
static struct bio *r5l_bio_alloc(struct r5l_log *log)
{
- struct bio *bio = bio_alloc_bioset(GFP_NOIO, BIO_MAX_PAGES, log->bs);
+ struct bio *bio = bio_alloc_bioset(GFP_NOIO, BIO_MAX_PAGES, &log->bs);
bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
bio_set_dev(bio, log->rdev->bdev);
@@ -780,7 +780,7 @@ static struct r5l_io_unit *r5l_new_meta(struct r5l_log *log)
struct r5l_io_unit *io;
struct r5l_meta_block *block;
- io = mempool_alloc(log->io_pool, GFP_ATOMIC);
+ io = mempool_alloc(&log->io_pool, GFP_ATOMIC);
if (!io)
return NULL;
memset(io, 0, sizeof(*io));
@@ -791,7 +791,7 @@ static struct r5l_io_unit *r5l_new_meta(struct r5l_log *log)
bio_list_init(&io->flush_barriers);
io->state = IO_UNIT_RUNNING;
- io->meta_page = mempool_alloc(log->meta_pool, GFP_NOIO);
+ io->meta_page = mempool_alloc(&log->meta_pool, GFP_NOIO);
block = page_address(io->meta_page);
clear_page(block);
block->magic = cpu_to_le32(R5LOG_MAGIC);
@@ -1223,7 +1223,7 @@ static bool r5l_complete_finished_ios(struct r5l_log *log)
log->next_checkpoint = io->log_start;
list_del(&io->log_sibling);
- mempool_free(io, log->io_pool);
+ mempool_free(io, &log->io_pool);
r5l_run_no_mem_stripe(log);
found = true;
@@ -1647,7 +1647,7 @@ static int r5l_recovery_allocate_ra_pool(struct r5l_log *log,
{
struct page *page;
- ctx->ra_bio = bio_alloc_bioset(GFP_KERNEL, BIO_MAX_PAGES, log->bs);
+ ctx->ra_bio = bio_alloc_bioset(GFP_KERNEL, BIO_MAX_PAGES, &log->bs);
if (!ctx->ra_bio)
return -ENOMEM;
@@ -3066,6 +3066,7 @@ int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev)
struct request_queue *q = bdev_get_queue(rdev->bdev);
struct r5l_log *log;
char b[BDEVNAME_SIZE];
+ int ret;
pr_debug("md/raid:%s: using device %s as journal\n",
mdname(conf->mddev), bdevname(rdev->bdev, b));
@@ -3111,16 +3112,16 @@ int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev)
if (!log->io_kc)
goto io_kc;
- log->io_pool = mempool_create_slab_pool(R5L_POOL_SIZE, log->io_kc);
- if (!log->io_pool)
+ ret = mempool_init_slab_pool(&log->io_pool, R5L_POOL_SIZE, log->io_kc);
+ if (ret)
goto io_pool;
- log->bs = bioset_create(R5L_POOL_SIZE, 0, BIOSET_NEED_BVECS);
- if (!log->bs)
+ ret = bioset_init(&log->bs, R5L_POOL_SIZE, 0, BIOSET_NEED_BVECS);
+ if (ret)
goto io_bs;
- log->meta_pool = mempool_create_page_pool(R5L_POOL_SIZE, 0);
- if (!log->meta_pool)
+ ret = mempool_init_page_pool(&log->meta_pool, R5L_POOL_SIZE, 0);
+ if (ret)
goto out_mempool;
spin_lock_init(&log->tree_lock);
@@ -3155,11 +3156,11 @@ int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev)
rcu_assign_pointer(conf->log, NULL);
md_unregister_thread(&log->reclaim_thread);
reclaim_thread:
- mempool_destroy(log->meta_pool);
+ mempool_exit(&log->meta_pool);
out_mempool:
- bioset_free(log->bs);
+ bioset_exit(&log->bs);
io_bs:
- mempool_destroy(log->io_pool);
+ mempool_exit(&log->io_pool);
io_pool:
kmem_cache_destroy(log->io_kc);
io_kc:
@@ -3178,9 +3179,9 @@ void r5l_exit_log(struct r5conf *conf)
wake_up(&conf->mddev->sb_wait);
flush_work(&log->disable_writeback_work);
md_unregister_thread(&log->reclaim_thread);
- mempool_destroy(log->meta_pool);
- bioset_free(log->bs);
- mempool_destroy(log->io_pool);
+ mempool_exit(&log->meta_pool);
+ bioset_exit(&log->bs);
+ mempool_exit(&log->io_pool);
kmem_cache_destroy(log->io_kc);
kfree(log);
}
diff --git a/drivers/md/raid5-ppl.c b/drivers/md/raid5-ppl.c
index 42890a08375b..3a7c36326589 100644
--- a/drivers/md/raid5-ppl.c
+++ b/drivers/md/raid5-ppl.c
@@ -105,9 +105,9 @@ struct ppl_conf {
atomic64_t seq; /* current log write sequence number */
struct kmem_cache *io_kc;
- mempool_t *io_pool;
- struct bio_set *bs;
- struct bio_set *flush_bs;
+ mempool_t io_pool;
+ struct bio_set bs;
+ struct bio_set flush_bs;
/* used only for recovery */
int recovered_entries;
@@ -244,7 +244,7 @@ static struct ppl_io_unit *ppl_new_iounit(struct ppl_log *log,
struct ppl_header *pplhdr;
struct page *header_page;
- io = mempool_alloc(ppl_conf->io_pool, GFP_NOWAIT);
+ io = mempool_alloc(&ppl_conf->io_pool, GFP_NOWAIT);
if (!io)
return NULL;
@@ -503,7 +503,7 @@ static void ppl_submit_iounit(struct ppl_io_unit *io)
struct bio *prev = bio;
bio = bio_alloc_bioset(GFP_NOIO, BIO_MAX_PAGES,
- ppl_conf->bs);
+ &ppl_conf->bs);
bio->bi_opf = prev->bi_opf;
bio_copy_dev(bio, prev);
bio->bi_iter.bi_sector = bio_end_sector(prev);
@@ -570,7 +570,7 @@ static void ppl_io_unit_finished(struct ppl_io_unit *io)
list_del(&io->log_sibling);
spin_unlock(&log->io_list_lock);
- mempool_free(io, ppl_conf->io_pool);
+ mempool_free(io, &ppl_conf->io_pool);
spin_lock(&ppl_conf->no_mem_stripes_lock);
if (!list_empty(&ppl_conf->no_mem_stripes)) {
@@ -642,7 +642,7 @@ static void ppl_do_flush(struct ppl_io_unit *io)
struct bio *bio;
char b[BDEVNAME_SIZE];
- bio = bio_alloc_bioset(GFP_NOIO, 0, ppl_conf->flush_bs);
+ bio = bio_alloc_bioset(GFP_NOIO, 0, &ppl_conf->flush_bs);
bio_set_dev(bio, bdev);
bio->bi_private = io;
bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
@@ -1246,11 +1246,9 @@ static void __ppl_exit_log(struct ppl_conf *ppl_conf)
kfree(ppl_conf->child_logs);
- if (ppl_conf->bs)
- bioset_free(ppl_conf->bs);
- if (ppl_conf->flush_bs)
- bioset_free(ppl_conf->flush_bs);
- mempool_destroy(ppl_conf->io_pool);
+ bioset_exit(&ppl_conf->bs);
+ bioset_exit(&ppl_conf->flush_bs);
+ mempool_exit(&ppl_conf->io_pool);
kmem_cache_destroy(ppl_conf->io_kc);
kfree(ppl_conf);
@@ -1387,24 +1385,18 @@ int ppl_init_log(struct r5conf *conf)
goto err;
}
- ppl_conf->io_pool = mempool_create(conf->raid_disks, ppl_io_pool_alloc,
- ppl_io_pool_free, ppl_conf->io_kc);
- if (!ppl_conf->io_pool) {
- ret = -ENOMEM;
+ ret = mempool_init(&ppl_conf->io_pool, conf->raid_disks, ppl_io_pool_alloc,
+ ppl_io_pool_free, ppl_conf->io_kc);
+ if (ret)
goto err;
- }
- ppl_conf->bs = bioset_create(conf->raid_disks, 0, BIOSET_NEED_BVECS);
- if (!ppl_conf->bs) {
- ret = -ENOMEM;
+ ret = bioset_init(&ppl_conf->bs, conf->raid_disks, 0, BIOSET_NEED_BVECS);
+ if (ret)
goto err;
- }
- ppl_conf->flush_bs = bioset_create(conf->raid_disks, 0, 0);
- if (!ppl_conf->flush_bs) {
- ret = -ENOMEM;
+ ret = bioset_init(&ppl_conf->flush_bs, conf->raid_disks, 0, 0);
+ if (ret)
goto err;
- }
ppl_conf->count = conf->raid_disks;
ppl_conf->child_logs = kcalloc(ppl_conf->count, sizeof(struct ppl_log),
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index be117d0a65a8..a2e64989b01f 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -5192,7 +5192,7 @@ static int raid5_read_one_chunk(struct mddev *mddev, struct bio *raid_bio)
/*
* use bio_clone_fast to make a copy of the bio
*/
- align_bi = bio_clone_fast(raid_bio, GFP_NOIO, mddev->bio_set);
+ align_bi = bio_clone_fast(raid_bio, GFP_NOIO, &mddev->bio_set);
if (!align_bi)
return 0;
/*
@@ -5277,7 +5277,7 @@ static struct bio *chunk_aligned_read(struct mddev *mddev, struct bio *raid_bio)
if (sectors < bio_sectors(raid_bio)) {
struct r5conf *conf = mddev->private;
- split = bio_split(raid_bio, sectors, GFP_NOIO, conf->bio_split);
+ split = bio_split(raid_bio, sectors, GFP_NOIO, &conf->bio_split);
bio_chain(split, raid_bio);
generic_make_request(raid_bio);
raid_bio = split;
@@ -6773,8 +6773,7 @@ static void free_conf(struct r5conf *conf)
if (conf->disks[i].extra_page)
put_page(conf->disks[i].extra_page);
kfree(conf->disks);
- if (conf->bio_split)
- bioset_free(conf->bio_split);
+ bioset_exit(&conf->bio_split);
kfree(conf->stripe_hashtbl);
kfree(conf->pending_data);
kfree(conf);
@@ -6853,6 +6852,7 @@ static struct r5conf *setup_conf(struct mddev *mddev)
int i;
int group_cnt, worker_cnt_per_group;
struct r5worker_group *new_group;
+ int ret;
if (mddev->new_level != 5
&& mddev->new_level != 4
@@ -6950,8 +6950,8 @@ static struct r5conf *setup_conf(struct mddev *mddev)
goto abort;
}
- conf->bio_split = bioset_create(BIO_POOL_SIZE, 0, 0);
- if (!conf->bio_split)
+ ret = bioset_init(&conf->bio_split, BIO_POOL_SIZE, 0, 0);
+ if (ret)
goto abort;
conf->mddev = mddev;
diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h
index 3f8da26032ac..72e75ba6abf0 100644
--- a/drivers/md/raid5.h
+++ b/drivers/md/raid5.h
@@ -669,7 +669,7 @@ struct r5conf {
int pool_size; /* number of disks in stripeheads in pool */
spinlock_t device_lock;
struct disk_info *disks;
- struct bio_set *bio_split;
+ struct bio_set bio_split;
/* When taking over an array from a different personality, we store
* the new thread here until we fully activate the array.
diff --git a/drivers/media/i2c/saa7115.c b/drivers/media/i2c/saa7115.c
index e216cd768409..b07114b5efb2 100644
--- a/drivers/media/i2c/saa7115.c
+++ b/drivers/media/i2c/saa7115.c
@@ -20,7 +20,7 @@
//
// VBI support (2004) and cleanups (2005) by Hans Verkuil <hverkuil@xs4all.nl>
//
-// Copyright (c) 2005-2006 Mauro Carvalho Chehab <mchehab@infradead.org>
+// Copyright (c) 2005-2006 Mauro Carvalho Chehab <mchehab@kernel.org>
// SAA7111, SAA7113 and SAA7118 support
#include "saa711x_regs.h"
diff --git a/drivers/media/i2c/saa711x_regs.h b/drivers/media/i2c/saa711x_regs.h
index a50d480e101a..44fabe08234d 100644
--- a/drivers/media/i2c/saa711x_regs.h
+++ b/drivers/media/i2c/saa711x_regs.h
@@ -2,7 +2,7 @@
* SPDX-License-Identifier: GPL-2.0+
* saa711x - Philips SAA711x video decoder register specifications
*
- * Copyright (c) 2006 Mauro Carvalho Chehab <mchehab@infradead.org>
+ * Copyright (c) 2006 Mauro Carvalho Chehab <mchehab@kernel.org>
*/
#define R_00_CHIP_VERSION 0x00
diff --git a/drivers/media/i2c/tda7432.c b/drivers/media/i2c/tda7432.c
index 1c5c61d829d6..9b4f21237810 100644
--- a/drivers/media/i2c/tda7432.c
+++ b/drivers/media/i2c/tda7432.c
@@ -8,7 +8,7 @@
* Muting and tone control by Jonathan Isom <jisom@ematic.com>
*
* Copyright (c) 2000 Eric Sandeen <eric_sandeen@bigfoot.com>
- * Copyright (c) 2006 Mauro Carvalho Chehab <mchehab@infradead.org>
+ * Copyright (c) 2006 Mauro Carvalho Chehab <mchehab@kernel.org>
* This code is placed under the terms of the GNU General Public License
* Based on tda9855.c by Steve VanDeBogart (vandebo@uclink.berkeley.edu)
* Which was based on tda8425.c by Greg Alexander (c) 1998
diff --git a/drivers/media/i2c/tvp5150.c b/drivers/media/i2c/tvp5150.c
index 2476d812f669..1734ed4ede33 100644
--- a/drivers/media/i2c/tvp5150.c
+++ b/drivers/media/i2c/tvp5150.c
@@ -2,7 +2,7 @@
//
// tvp5150 - Texas Instruments TVP5150A/AM1 and TVP5151 video decoder driver
//
-// Copyright (c) 2005,2006 Mauro Carvalho Chehab <mchehab@infradead.org>
+// Copyright (c) 2005,2006 Mauro Carvalho Chehab <mchehab@kernel.org>
#include <dt-bindings/media/tvp5150.h>
#include <linux/i2c.h>
diff --git a/drivers/media/i2c/tvp5150_reg.h b/drivers/media/i2c/tvp5150_reg.h
index c43b7b844021..d3a764cae1a0 100644
--- a/drivers/media/i2c/tvp5150_reg.h
+++ b/drivers/media/i2c/tvp5150_reg.h
@@ -3,7 +3,7 @@
*
* tvp5150 - Texas Instruments TVP5150A/AM1 video decoder registers
*
- * Copyright (c) 2005,2006 Mauro Carvalho Chehab <mchehab@infradead.org>
+ * Copyright (c) 2005,2006 Mauro Carvalho Chehab <mchehab@kernel.org>
*/
#define TVP5150_VD_IN_SRC_SEL_1 0x00 /* Video input source selection #1 */
diff --git a/drivers/media/i2c/tvp7002.c b/drivers/media/i2c/tvp7002.c
index a26c1a3f7183..4599b7e28a8d 100644
--- a/drivers/media/i2c/tvp7002.c
+++ b/drivers/media/i2c/tvp7002.c
@@ -5,7 +5,7 @@
* Author: Santiago Nunez-Corrales <santiago.nunez@ridgerun.com>
*
* This code is partially based upon the TVP5150 driver
- * written by Mauro Carvalho Chehab (mchehab@infradead.org),
+ * written by Mauro Carvalho Chehab <mchehab@kernel.org>,
* the TVP514x driver written by Vaibhav Hiremath <hvaibhav@ti.com>
* and the TVP7002 driver in the TI LSP 2.10.00.14. Revisions by
* Muralidharan Karicheri and Snehaprabha Narnakaje (TI).
diff --git a/drivers/media/i2c/tvp7002_reg.h b/drivers/media/i2c/tvp7002_reg.h
index 3c8c8b0a6a4c..7f56ba689dfe 100644
--- a/drivers/media/i2c/tvp7002_reg.h
+++ b/drivers/media/i2c/tvp7002_reg.h
@@ -5,7 +5,7 @@
* Author: Santiago Nunez-Corrales <santiago.nunez@ridgerun.com>
*
* This code is partially based upon the TVP5150 driver
- * written by Mauro Carvalho Chehab (mchehab@infradead.org),
+ * written by Mauro Carvalho Chehab <mchehab@kernel.org>,
* the TVP514x driver written by Vaibhav Hiremath <hvaibhav@ti.com>
* and the TVP7002 driver in the TI LSP 2.10.00.14
*
diff --git a/drivers/media/media-devnode.c b/drivers/media/media-devnode.c
index 67ac51eff15c..6b87a721dc49 100644
--- a/drivers/media/media-devnode.c
+++ b/drivers/media/media-devnode.c
@@ -4,7 +4,7 @@
* Copyright (C) 2010 Nokia Corporation
*
* Based on drivers/media/video/v4l2_dev.c code authored by
- * Mauro Carvalho Chehab <mchehab@infradead.org> (version 2)
+ * Mauro Carvalho Chehab <mchehab@kernel.org> (version 2)
* Alan Cox, <alan@lxorguk.ukuu.org.uk> (version 1)
*
* Contacts: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
diff --git a/drivers/media/pci/bt8xx/bttv-audio-hook.c b/drivers/media/pci/bt8xx/bttv-audio-hook.c
index 9f1f9169fb5b..346fc7f58839 100644
--- a/drivers/media/pci/bt8xx/bttv-audio-hook.c
+++ b/drivers/media/pci/bt8xx/bttv-audio-hook.c
@@ -1,7 +1,7 @@
/*
* Handlers for board audio hooks, splitted from bttv-cards
*
- * Copyright (c) 2006 Mauro Carvalho Chehab (mchehab@infradead.org)
+ * Copyright (c) 2006 Mauro Carvalho Chehab <mchehab@kernel.org>
* This code is placed under the terms of the GNU General Public License
*/
diff --git a/drivers/media/pci/bt8xx/bttv-audio-hook.h b/drivers/media/pci/bt8xx/bttv-audio-hook.h
index 159d07adeff8..be16a537a03a 100644
--- a/drivers/media/pci/bt8xx/bttv-audio-hook.h
+++ b/drivers/media/pci/bt8xx/bttv-audio-hook.h
@@ -1,7 +1,7 @@
/*
* Handlers for board audio hooks, splitted from bttv-cards
*
- * Copyright (c) 2006 Mauro Carvalho Chehab (mchehab@infradead.org)
+ * Copyright (c) 2006 Mauro Carvalho Chehab <mchehab@kernel.org>
* This code is placed under the terms of the GNU General Public License
*/
diff --git a/drivers/media/pci/bt8xx/bttv-cards.c b/drivers/media/pci/bt8xx/bttv-cards.c
index 1902732f90e1..2616243b2c49 100644
--- a/drivers/media/pci/bt8xx/bttv-cards.c
+++ b/drivers/media/pci/bt8xx/bttv-cards.c
@@ -2447,7 +2447,7 @@ struct tvcard bttv_tvcards[] = {
},
/* ---- card 0x88---------------------------------- */
[BTTV_BOARD_ACORP_Y878F] = {
- /* Mauro Carvalho Chehab <mchehab@infradead.org> */
+ /* Mauro Carvalho Chehab <mchehab@kernel.org> */
.name = "Acorp Y878F",
.video_inputs = 3,
/* .audio_inputs= 1, */
@@ -2688,7 +2688,7 @@ struct tvcard bttv_tvcards[] = {
},
[BTTV_BOARD_ENLTV_FM_2] = {
/* Encore TV Tuner Pro ENL TV-FM-2
- Mauro Carvalho Chehab <mchehab@infradead.org */
+ Mauro Carvalho Chehab <mchehab@kernel.org> */
.name = "Encore ENL TV-FM-2",
.video_inputs = 3,
/* .audio_inputs= 1, */
diff --git a/drivers/media/pci/bt8xx/bttv-driver.c b/drivers/media/pci/bt8xx/bttv-driver.c
index 707f57a9f940..de3f44b8dec6 100644
--- a/drivers/media/pci/bt8xx/bttv-driver.c
+++ b/drivers/media/pci/bt8xx/bttv-driver.c
@@ -13,7 +13,7 @@
(c) 2005-2006 Nickolay V. Shmyrev <nshmyrev@yandex.ru>
Fixes to be fully V4L2 compliant by
- (c) 2006 Mauro Carvalho Chehab <mchehab@infradead.org>
+ (c) 2006 Mauro Carvalho Chehab <mchehab@kernel.org>
Cropping and overscan support
Copyright (C) 2005, 2006 Michael H. Schimek <mschimek@gmx.at>
diff --git a/drivers/media/pci/bt8xx/bttv-i2c.c b/drivers/media/pci/bt8xx/bttv-i2c.c
index eccd1e3d717a..c76823eb399d 100644
--- a/drivers/media/pci/bt8xx/bttv-i2c.c
+++ b/drivers/media/pci/bt8xx/bttv-i2c.c
@@ -8,7 +8,7 @@
& Marcus Metzler (mocm@thp.uni-koeln.de)
(c) 1999-2003 Gerd Knorr <kraxel@bytesex.org>
- (c) 2005 Mauro Carvalho Chehab <mchehab@infradead.org>
+ (c) 2005 Mauro Carvalho Chehab <mchehab@kernel.org>
- Multituner support and i2c address binding
This program is free software; you can redistribute it and/or modify
diff --git a/drivers/media/pci/cx23885/cx23885-input.c b/drivers/media/pci/cx23885/cx23885-input.c
index be49589a61d2..395ff9bba759 100644
--- a/drivers/media/pci/cx23885/cx23885-input.c
+++ b/drivers/media/pci/cx23885/cx23885-input.c
@@ -13,7 +13,7 @@
* Copyright (C) 2008 <srinivasa.deevi at conexant dot com>
* Copyright (C) 2005 Ludovico Cavedon <cavedon@sssup.it>
* Markus Rechberger <mrechberger@gmail.com>
- * Mauro Carvalho Chehab <mchehab@infradead.org>
+ * Mauro Carvalho Chehab <mchehab@kernel.org>
* Sascha Sommer <saschasommer@freenet.de>
* Copyright (C) 2004, 2005 Chris Pascoe
* Copyright (C) 2003, 2004 Gerd Knorr
diff --git a/drivers/media/pci/cx88/cx88-alsa.c b/drivers/media/pci/cx88/cx88-alsa.c
index ab09bb55cf45..8a28fda703a2 100644
--- a/drivers/media/pci/cx88/cx88-alsa.c
+++ b/drivers/media/pci/cx88/cx88-alsa.c
@@ -4,7 +4,7 @@
*
* (c) 2007 Trent Piepho <xyzzy@speakeasy.org>
* (c) 2005,2006 Ricardo Cerqueira <v4l@cerqueira.org>
- * (c) 2005 Mauro Carvalho Chehab <mchehab@infradead.org>
+ * (c) 2005 Mauro Carvalho Chehab <mchehab@kernel.org>
* Based on a dummy cx88 module by Gerd Knorr <kraxel@bytesex.org>
* Based on dummy.c by Jaroslav Kysela <perex@perex.cz>
*
@@ -103,7 +103,7 @@ MODULE_PARM_DESC(index, "Index value for cx88x capture interface(s).");
MODULE_DESCRIPTION("ALSA driver module for cx2388x based TV cards");
MODULE_AUTHOR("Ricardo Cerqueira");
-MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@infradead.org>");
+MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@kernel.org>");
MODULE_LICENSE("GPL");
MODULE_VERSION(CX88_VERSION);
diff --git a/drivers/media/pci/cx88/cx88-blackbird.c b/drivers/media/pci/cx88/cx88-blackbird.c
index 0e0952e60795..7a4876cf9f08 100644
--- a/drivers/media/pci/cx88/cx88-blackbird.c
+++ b/drivers/media/pci/cx88/cx88-blackbird.c
@@ -5,7 +5,7 @@
* (c) 2004 Jelle Foks <jelle@foks.us>
* (c) 2004 Gerd Knorr <kraxel@bytesex.org>
*
- * (c) 2005-2006 Mauro Carvalho Chehab <mchehab@infradead.org>
+ * (c) 2005-2006 Mauro Carvalho Chehab <mchehab@kernel.org>
* - video_ioctl2 conversion
*
* Includes parts from the ivtv driver <http://sourceforge.net/projects/ivtv/>
diff --git a/drivers/media/pci/cx88/cx88-core.c b/drivers/media/pci/cx88/cx88-core.c
index 8bfa5b7ed91b..60988e95b637 100644
--- a/drivers/media/pci/cx88/cx88-core.c
+++ b/drivers/media/pci/cx88/cx88-core.c
@@ -4,7 +4,7 @@
*
* (c) 2003 Gerd Knorr <kraxel@bytesex.org> [SuSE Labs]
*
- * (c) 2005-2006 Mauro Carvalho Chehab <mchehab@infradead.org>
+ * (c) 2005-2006 Mauro Carvalho Chehab <mchehab@kernel.org>
* - Multituner support
* - video_ioctl2 conversion
* - PAL/M fixes
diff --git a/drivers/media/pci/cx88/cx88-i2c.c b/drivers/media/pci/cx88/cx88-i2c.c
index f7692775fb5a..99f88a05a7c9 100644
--- a/drivers/media/pci/cx88/cx88-i2c.c
+++ b/drivers/media/pci/cx88/cx88-i2c.c
@@ -8,7 +8,7 @@
* (c) 2002 Yurij Sysoev <yurij@naturesoft.net>
* (c) 1999-2003 Gerd Knorr <kraxel@bytesex.org>
*
- * (c) 2005 Mauro Carvalho Chehab <mchehab@infradead.org>
+ * (c) 2005 Mauro Carvalho Chehab <mchehab@kernel.org>
* - Multituner support and i2c address binding
*
* This program is free software; you can redistribute it and/or modify
diff --git a/drivers/media/pci/cx88/cx88-video.c b/drivers/media/pci/cx88/cx88-video.c
index 9be682cdb644..7b113bad70d2 100644
--- a/drivers/media/pci/cx88/cx88-video.c
+++ b/drivers/media/pci/cx88/cx88-video.c
@@ -5,7 +5,7 @@
*
* (c) 2003-04 Gerd Knorr <kraxel@bytesex.org> [SuSE Labs]
*
- * (c) 2005-2006 Mauro Carvalho Chehab <mchehab@infradead.org>
+ * (c) 2005-2006 Mauro Carvalho Chehab <mchehab@kernel.org>
* - Multituner support
* - video_ioctl2 conversion
* - PAL/M fixes
diff --git a/drivers/media/pci/saa7164/saa7164-core.c b/drivers/media/pci/saa7164/saa7164-core.c
index fca36a4910c2..d697e1ad929c 100644
--- a/drivers/media/pci/saa7164/saa7164-core.c
+++ b/drivers/media/pci/saa7164/saa7164-core.c
@@ -1122,23 +1122,11 @@ static int saa7164_proc_show(struct seq_file *m, void *v)
return 0;
}
-static int saa7164_proc_open(struct inode *inode, struct file *filp)
-{
- return single_open(filp, saa7164_proc_show, NULL);
-}
-
-static const struct file_operations saa7164_proc_fops = {
- .open = saa7164_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
static int saa7164_proc_create(void)
{
struct proc_dir_entry *pe;
- pe = proc_create("saa7164", S_IRUGO, NULL, &saa7164_proc_fops);
+ pe = proc_create_single("saa7164", S_IRUGO, NULL, saa7164_proc_show);
if (!pe)
return -ENOMEM;
diff --git a/drivers/media/pci/zoran/videocodec.c b/drivers/media/pci/zoran/videocodec.c
index 5ff23ef89215..4427ae7126e2 100644
--- a/drivers/media/pci/zoran/videocodec.c
+++ b/drivers/media/pci/zoran/videocodec.c
@@ -344,19 +344,6 @@ static int proc_videocodecs_show(struct seq_file *m, void *v)
return 0;
}
-
-static int proc_videocodecs_open(struct inode *inode, struct file *file)
-{
- return single_open(file, proc_videocodecs_show, NULL);
-}
-
-static const struct file_operations videocodecs_proc_fops = {
- .owner = THIS_MODULE,
- .open = proc_videocodecs_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
#endif
/* ===================== */
@@ -373,7 +360,8 @@ videocodec_init (void)
VIDEOCODEC_VERSION);
#ifdef CONFIG_PROC_FS
- videocodec_proc_entry = proc_create("videocodecs", 0, NULL, &videocodecs_proc_fops);
+ videocodec_proc_entry = proc_create_single("videocodecs", 0, NULL,
+ proc_videocodecs_show);
if (!videocodec_proc_entry) {
dprintk(1, KERN_ERR "videocodec: can't init procfs.\n");
}
diff --git a/drivers/media/radio/radio-aimslab.c b/drivers/media/radio/radio-aimslab.c
index 5ef635e72e10..4c52ac6d8bc5 100644
--- a/drivers/media/radio/radio-aimslab.c
+++ b/drivers/media/radio/radio-aimslab.c
@@ -4,7 +4,7 @@
* Copyright 1997 M. Kirkwood
*
* Converted to the radio-isa framework by Hans Verkuil <hans.verkuil@cisco.com>
- * Converted to V4L2 API by Mauro Carvalho Chehab <mchehab@infradead.org>
+ * Converted to V4L2 API by Mauro Carvalho Chehab <mchehab@kernel.org>
* Converted to new API by Alan Cox <alan@lxorguk.ukuu.org.uk>
* Various bugfixes and enhancements by Russell Kroll <rkroll@exploits.org>
*
diff --git a/drivers/media/radio/radio-aztech.c b/drivers/media/radio/radio-aztech.c
index 9e12c6027359..840b7d60462b 100644
--- a/drivers/media/radio/radio-aztech.c
+++ b/drivers/media/radio/radio-aztech.c
@@ -2,7 +2,7 @@
* radio-aztech.c - Aztech radio card driver
*
* Converted to the radio-isa framework by Hans Verkuil <hans.verkuil@xs4all.nl>
- * Converted to V4L2 API by Mauro Carvalho Chehab <mchehab@infradead.org>
+ * Converted to V4L2 API by Mauro Carvalho Chehab <mchehab@kernel.org>
* Adapted to support the Video for Linux API by
* Russell Kroll <rkroll@exploits.org>. Based on original tuner code by:
*
diff --git a/drivers/media/radio/radio-gemtek.c b/drivers/media/radio/radio-gemtek.c
index 3ff4c4e1435f..f051f8694ab9 100644
--- a/drivers/media/radio/radio-gemtek.c
+++ b/drivers/media/radio/radio-gemtek.c
@@ -15,7 +15,7 @@
* Various bugfixes and enhancements by Russell Kroll <rkroll@exploits.org>
*
* Converted to the radio-isa framework by Hans Verkuil <hans.verkuil@cisco.com>
- * Converted to V4L2 API by Mauro Carvalho Chehab <mchehab@infradead.org>
+ * Converted to V4L2 API by Mauro Carvalho Chehab <mchehab@kernel.org>
*
* Note: this card seems to swap the left and right audio channels!
*
diff --git a/drivers/media/radio/radio-maxiradio.c b/drivers/media/radio/radio-maxiradio.c
index 95f06f3b35dc..e4e758739246 100644
--- a/drivers/media/radio/radio-maxiradio.c
+++ b/drivers/media/radio/radio-maxiradio.c
@@ -27,7 +27,7 @@
* BUGS:
* - card unmutes if you change frequency
*
- * (c) 2006, 2007 by Mauro Carvalho Chehab <mchehab@infradead.org>:
+ * (c) 2006, 2007 by Mauro Carvalho Chehab <mchehab@kernel.org>:
* - Conversion to V4L2 API
* - Uses video_ioctl2 for parsing and to add debug support
*/
diff --git a/drivers/media/radio/radio-rtrack2.c b/drivers/media/radio/radio-rtrack2.c
index abeaedd8d437..5a1470eb753e 100644
--- a/drivers/media/radio/radio-rtrack2.c
+++ b/drivers/media/radio/radio-rtrack2.c
@@ -7,7 +7,7 @@
* Various bugfixes and enhancements by Russell Kroll <rkroll@exploits.org>
*
* Converted to the radio-isa framework by Hans Verkuil <hans.verkuil@cisco.com>
- * Converted to V4L2 API by Mauro Carvalho Chehab <mchehab@infradead.org>
+ * Converted to V4L2 API by Mauro Carvalho Chehab <mchehab@kernel.org>
*
* Fully tested with actual hardware and the v4l2-compliance tool.
*/
diff --git a/drivers/media/radio/radio-sf16fmi.c b/drivers/media/radio/radio-sf16fmi.c
index fc4e63d36e4c..4f9b97edd9eb 100644
--- a/drivers/media/radio/radio-sf16fmi.c
+++ b/drivers/media/radio/radio-sf16fmi.c
@@ -13,7 +13,7 @@
* No volume control - only mute/unmute - you have to use line volume
* control on SB-part of SF16-FMI/SF16-FMP/SF16-FMD
*
- * Converted to V4L2 API by Mauro Carvalho Chehab <mchehab@infradead.org>
+ * Converted to V4L2 API by Mauro Carvalho Chehab <mchehab@kernel.org>
*/
#include <linux/kernel.h> /* __setup */
diff --git a/drivers/media/radio/radio-terratec.c b/drivers/media/radio/radio-terratec.c
index 4f116ea294fb..1af8f29cc7d1 100644
--- a/drivers/media/radio/radio-terratec.c
+++ b/drivers/media/radio/radio-terratec.c
@@ -17,7 +17,7 @@
* Volume Control is done digitally
*
* Converted to the radio-isa framework by Hans Verkuil <hans.verkuil@cisco.com>
- * Converted to V4L2 API by Mauro Carvalho Chehab <mchehab@infradead.org>
+ * Converted to V4L2 API by Mauro Carvalho Chehab <mchehab@kernel.org>
*/
#include <linux/module.h> /* Modules */
diff --git a/drivers/media/radio/radio-trust.c b/drivers/media/radio/radio-trust.c
index 26a8c6002121..a4bad322ffff 100644
--- a/drivers/media/radio/radio-trust.c
+++ b/drivers/media/radio/radio-trust.c
@@ -12,7 +12,7 @@
* Scott McGrath (smcgrath@twilight.vtc.vsc.edu)
* William McGrath (wmcgrath@twilight.vtc.vsc.edu)
*
- * Converted to V4L2 API by Mauro Carvalho Chehab <mchehab@infradead.org>
+ * Converted to V4L2 API by Mauro Carvalho Chehab <mchehab@kernel.org>
*/
#include <stdarg.h>
diff --git a/drivers/media/radio/radio-typhoon.c b/drivers/media/radio/radio-typhoon.c
index eb72a4d13758..d0d67ad85b8f 100644
--- a/drivers/media/radio/radio-typhoon.c
+++ b/drivers/media/radio/radio-typhoon.c
@@ -25,7 +25,7 @@
* The frequency change is necessary since the card never seems to be
* completely silent.
*
- * Converted to V4L2 API by Mauro Carvalho Chehab <mchehab@infradead.org>
+ * Converted to V4L2 API by Mauro Carvalho Chehab <mchehab@kernel.org>
*/
#include <linux/module.h> /* Modules */
diff --git a/drivers/media/radio/radio-zoltrix.c b/drivers/media/radio/radio-zoltrix.c
index 026e88eef29c..6007cd09b328 100644
--- a/drivers/media/radio/radio-zoltrix.c
+++ b/drivers/media/radio/radio-zoltrix.c
@@ -27,7 +27,7 @@
* 2002-07-15 - Fix Stereo typo
*
* 2006-07-24 - Converted to V4L2 API
- * by Mauro Carvalho Chehab <mchehab@infradead.org>
+ * by Mauro Carvalho Chehab <mchehab@kernel.org>
*
* Converted to the radio-isa framework by Hans Verkuil <hans.verkuil@cisco.com>
*
diff --git a/drivers/media/rc/keymaps/rc-avermedia-m135a.c b/drivers/media/rc/keymaps/rc-avermedia-m135a.c
index f6977df1a75b..d275d98d066a 100644
--- a/drivers/media/rc/keymaps/rc-avermedia-m135a.c
+++ b/drivers/media/rc/keymaps/rc-avermedia-m135a.c
@@ -12,7 +12,7 @@
*
* On Avermedia M135A with IR model RM-JX, the same codes exist on both
* Positivo (BR) and original IR, initial version and remote control codes
- * added by Mauro Carvalho Chehab <mchehab@infradead.org>
+ * added by Mauro Carvalho Chehab <mchehab@kernel.org>
*
* Positivo also ships Avermedia M135A with model RM-K6, extra control
* codes added by Herton Ronaldo Krzesinski <herton@mandriva.com.br>
diff --git a/drivers/media/rc/keymaps/rc-encore-enltv-fm53.c b/drivers/media/rc/keymaps/rc-encore-enltv-fm53.c
index e4e78c1f4123..057c13b765ef 100644
--- a/drivers/media/rc/keymaps/rc-encore-enltv-fm53.c
+++ b/drivers/media/rc/keymaps/rc-encore-enltv-fm53.c
@@ -9,7 +9,7 @@
#include <linux/module.h>
/* Encore ENLTV-FM v5.3
- Mauro Carvalho Chehab <mchehab@infradead.org>
+ Mauro Carvalho Chehab <mchehab@kernel.org>
*/
static struct rc_map_table encore_enltv_fm53[] = {
diff --git a/drivers/media/rc/keymaps/rc-encore-enltv2.c b/drivers/media/rc/keymaps/rc-encore-enltv2.c
index c3d4437a6fda..cd0555924456 100644
--- a/drivers/media/rc/keymaps/rc-encore-enltv2.c
+++ b/drivers/media/rc/keymaps/rc-encore-enltv2.c
@@ -9,7 +9,7 @@
#include <linux/module.h>
/* Encore ENLTV2-FM - silver plastic - "Wand Media" written at the botton
- Mauro Carvalho Chehab <mchehab@infradead.org> */
+ Mauro Carvalho Chehab <mchehab@kernel.org> */
static struct rc_map_table encore_enltv2[] = {
{ 0x4c, KEY_POWER2 },
diff --git a/drivers/media/rc/keymaps/rc-kaiomy.c b/drivers/media/rc/keymaps/rc-kaiomy.c
index f0f88df18606..a00051339842 100644
--- a/drivers/media/rc/keymaps/rc-kaiomy.c
+++ b/drivers/media/rc/keymaps/rc-kaiomy.c
@@ -9,7 +9,7 @@
#include <linux/module.h>
/* Kaiomy TVnPC U2
- Mauro Carvalho Chehab <mchehab@infradead.org>
+ Mauro Carvalho Chehab <mchehab@kernel.org>
*/
static struct rc_map_table kaiomy[] = {
diff --git a/drivers/media/rc/keymaps/rc-kworld-plus-tv-analog.c b/drivers/media/rc/keymaps/rc-kworld-plus-tv-analog.c
index 453e04377de7..db5edde3eeb1 100644
--- a/drivers/media/rc/keymaps/rc-kworld-plus-tv-analog.c
+++ b/drivers/media/rc/keymaps/rc-kworld-plus-tv-analog.c
@@ -9,7 +9,7 @@
#include <linux/module.h>
/* Kworld Plus TV Analog Lite PCI IR
- Mauro Carvalho Chehab <mchehab@infradead.org>
+ Mauro Carvalho Chehab <mchehab@kernel.org>
*/
static struct rc_map_table kworld_plus_tv_analog[] = {
diff --git a/drivers/media/rc/keymaps/rc-pixelview-new.c b/drivers/media/rc/keymaps/rc-pixelview-new.c
index 791130f108ff..e4e34f2ccf74 100644
--- a/drivers/media/rc/keymaps/rc-pixelview-new.c
+++ b/drivers/media/rc/keymaps/rc-pixelview-new.c
@@ -9,7 +9,7 @@
#include <linux/module.h>
/*
- Mauro Carvalho Chehab <mchehab@infradead.org>
+ Mauro Carvalho Chehab <mchehab@kernel.org>
present on PV MPEG 8000GT
*/
diff --git a/drivers/media/tuners/tea5761.c b/drivers/media/tuners/tea5761.c
index 88b3e80c38ad..d78a2bdb3e36 100644
--- a/drivers/media/tuners/tea5761.c
+++ b/drivers/media/tuners/tea5761.c
@@ -2,7 +2,7 @@
// For Philips TEA5761 FM Chip
// I2C address is always 0x20 (0x10 at 7-bit mode).
//
-// Copyright (c) 2005-2007 Mauro Carvalho Chehab (mchehab@infradead.org)
+// Copyright (c) 2005-2007 Mauro Carvalho Chehab <mchehab@kernel.org>
#include <linux/i2c.h>
#include <linux/slab.h>
@@ -337,5 +337,5 @@ EXPORT_SYMBOL_GPL(tea5761_attach);
EXPORT_SYMBOL_GPL(tea5761_autodetection);
MODULE_DESCRIPTION("Philips TEA5761 FM tuner driver");
-MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@infradead.org>");
+MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@kernel.org>");
MODULE_LICENSE("GPL v2");
diff --git a/drivers/media/tuners/tea5767.c b/drivers/media/tuners/tea5767.c
index 2b2c064d7dc3..016d0d5ec50b 100644
--- a/drivers/media/tuners/tea5767.c
+++ b/drivers/media/tuners/tea5767.c
@@ -2,7 +2,7 @@
// For Philips TEA5767 FM Chip used on some TV Cards like Prolink Pixelview
// I2C address is always 0xC0.
//
-// Copyright (c) 2005 Mauro Carvalho Chehab (mchehab@infradead.org)
+// Copyright (c) 2005 Mauro Carvalho Chehab <mchehab@kernel.org>
//
// tea5767 autodetection thanks to Torsten Seeboth and Atsushi Nakagawa
// from their contributions on DScaler.
@@ -469,5 +469,5 @@ EXPORT_SYMBOL_GPL(tea5767_attach);
EXPORT_SYMBOL_GPL(tea5767_autodetection);
MODULE_DESCRIPTION("Philips TEA5767 FM tuner driver");
-MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@infradead.org>");
+MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@kernel.org>");
MODULE_LICENSE("GPL v2");
diff --git a/drivers/media/tuners/tuner-xc2028-types.h b/drivers/media/tuners/tuner-xc2028-types.h
index bb0437c36c03..50d017a4822a 100644
--- a/drivers/media/tuners/tuner-xc2028-types.h
+++ b/drivers/media/tuners/tuner-xc2028-types.h
@@ -5,7 +5,7 @@
* This file includes internal tipes to be used inside tuner-xc2028.
* Shouldn't be included outside tuner-xc2028
*
- * Copyright (c) 2007-2008 Mauro Carvalho Chehab (mchehab@infradead.org)
+ * Copyright (c) 2007-2008 Mauro Carvalho Chehab <mchehab@kernel.org>
*/
/* xc3028 firmware types */
diff --git a/drivers/media/tuners/tuner-xc2028.c b/drivers/media/tuners/tuner-xc2028.c
index fca85e08ebd7..84744e138982 100644
--- a/drivers/media/tuners/tuner-xc2028.c
+++ b/drivers/media/tuners/tuner-xc2028.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
// tuner-xc2028
//
-// Copyright (c) 2007-2008 Mauro Carvalho Chehab (mchehab@infradead.org)
+// Copyright (c) 2007-2008 Mauro Carvalho Chehab <mchehab@kernel.org>
//
// Copyright (c) 2007 Michel Ludwig (michel.ludwig@gmail.com)
// - frontend interface
@@ -1518,7 +1518,7 @@ EXPORT_SYMBOL(xc2028_attach);
MODULE_DESCRIPTION("Xceive xc2028/xc3028 tuner driver");
MODULE_AUTHOR("Michel Ludwig <michel.ludwig@gmail.com>");
-MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@infradead.org>");
+MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@kernel.org>");
MODULE_LICENSE("GPL v2");
MODULE_FIRMWARE(XC2028_DEFAULT_FIRMWARE);
MODULE_FIRMWARE(XC3028L_DEFAULT_FIRMWARE);
diff --git a/drivers/media/tuners/tuner-xc2028.h b/drivers/media/tuners/tuner-xc2028.h
index 03fd6d4233a4..7b58bc06e35c 100644
--- a/drivers/media/tuners/tuner-xc2028.h
+++ b/drivers/media/tuners/tuner-xc2028.h
@@ -2,7 +2,7 @@
* SPDX-License-Identifier: GPL-2.0
* tuner-xc2028
*
- * Copyright (c) 2007-2008 Mauro Carvalho Chehab (mchehab@infradead.org)
+ * Copyright (c) 2007-2008 Mauro Carvalho Chehab <mchehab@kernel.org>
*/
#ifndef __TUNER_XC2028_H__
diff --git a/drivers/media/usb/em28xx/em28xx-camera.c b/drivers/media/usb/em28xx/em28xx-camera.c
index 3c2694a16ed1..d1e66b503f4d 100644
--- a/drivers/media/usb/em28xx/em28xx-camera.c
+++ b/drivers/media/usb/em28xx/em28xx-camera.c
@@ -2,7 +2,7 @@
//
// em28xx-camera.c - driver for Empia EM25xx/27xx/28xx USB video capture devices
//
-// Copyright (C) 2009 Mauro Carvalho Chehab <mchehab@infradead.org>
+// Copyright (C) 2009 Mauro Carvalho Chehab <mchehab@kernel.org>
// Copyright (C) 2013 Frank Schäfer <fschaefer.oss@googlemail.com>
//
// This program is free software; you can redistribute it and/or modify
diff --git a/drivers/media/usb/em28xx/em28xx-cards.c b/drivers/media/usb/em28xx/em28xx-cards.c
index 6e0e67d23876..7c3203d7044b 100644
--- a/drivers/media/usb/em28xx/em28xx-cards.c
+++ b/drivers/media/usb/em28xx/em28xx-cards.c
@@ -5,7 +5,7 @@
//
// Copyright (C) 2005 Ludovico Cavedon <cavedon@sssup.it>
// Markus Rechberger <mrechberger@gmail.com>
-// Mauro Carvalho Chehab <mchehab@infradead.org>
+// Mauro Carvalho Chehab <mchehab@kernel.org>
// Sascha Sommer <saschasommer@freenet.de>
// Copyright (C) 2012 Frank Schäfer <fschaefer.oss@googlemail.com>
//
diff --git a/drivers/media/usb/em28xx/em28xx-core.c b/drivers/media/usb/em28xx/em28xx-core.c
index 36d341fb65dd..f28995383090 100644
--- a/drivers/media/usb/em28xx/em28xx-core.c
+++ b/drivers/media/usb/em28xx/em28xx-core.c
@@ -4,7 +4,7 @@
//
// Copyright (C) 2005 Ludovico Cavedon <cavedon@sssup.it>
// Markus Rechberger <mrechberger@gmail.com>
-// Mauro Carvalho Chehab <mchehab@infradead.org>
+// Mauro Carvalho Chehab <mchehab@kernel.org>
// Sascha Sommer <saschasommer@freenet.de>
// Copyright (C) 2012 Frank Schäfer <fschaefer.oss@googlemail.com>
//
@@ -32,7 +32,7 @@
#define DRIVER_AUTHOR "Ludovico Cavedon <cavedon@sssup.it>, " \
"Markus Rechberger <mrechberger@gmail.com>, " \
- "Mauro Carvalho Chehab <mchehab@infradead.org>, " \
+ "Mauro Carvalho Chehab <mchehab@kernel.org>, " \
"Sascha Sommer <saschasommer@freenet.de>"
MODULE_AUTHOR(DRIVER_AUTHOR);
diff --git a/drivers/media/usb/em28xx/em28xx-dvb.c b/drivers/media/usb/em28xx/em28xx-dvb.c
index a54cb8dc52c9..3f493e0b0716 100644
--- a/drivers/media/usb/em28xx/em28xx-dvb.c
+++ b/drivers/media/usb/em28xx/em28xx-dvb.c
@@ -2,7 +2,7 @@
//
// DVB device driver for em28xx
//
-// (c) 2008-2011 Mauro Carvalho Chehab <mchehab@infradead.org>
+// (c) 2008-2011 Mauro Carvalho Chehab <mchehab@kernel.org>
//
// (c) 2008 Devin Heitmueller <devin.heitmueller@gmail.com>
// - Fixes for the driver to properly work with HVR-950
@@ -63,7 +63,7 @@
#include "tc90522.h"
#include "qm1d1c0042.h"
-MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@infradead.org>");
+MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@kernel.org>");
MODULE_LICENSE("GPL v2");
MODULE_DESCRIPTION(DRIVER_DESC " - digital TV interface");
MODULE_VERSION(EM28XX_VERSION);
diff --git a/drivers/media/usb/em28xx/em28xx-i2c.c b/drivers/media/usb/em28xx/em28xx-i2c.c
index 9151bccd859a..6458682bc6e2 100644
--- a/drivers/media/usb/em28xx/em28xx-i2c.c
+++ b/drivers/media/usb/em28xx/em28xx-i2c.c
@@ -4,7 +4,7 @@
//
// Copyright (C) 2005 Ludovico Cavedon <cavedon@sssup.it>
// Markus Rechberger <mrechberger@gmail.com>
-// Mauro Carvalho Chehab <mchehab@infradead.org>
+// Mauro Carvalho Chehab <mchehab@kernel.org>
// Sascha Sommer <saschasommer@freenet.de>
// Copyright (C) 2013 Frank Schäfer <fschaefer.oss@googlemail.com>
//
diff --git a/drivers/media/usb/em28xx/em28xx-input.c b/drivers/media/usb/em28xx/em28xx-input.c
index 2dc1be00b8b8..f84a1208d5d3 100644
--- a/drivers/media/usb/em28xx/em28xx-input.c
+++ b/drivers/media/usb/em28xx/em28xx-input.c
@@ -4,7 +4,7 @@
//
// Copyright (C) 2005 Ludovico Cavedon <cavedon@sssup.it>
// Markus Rechberger <mrechberger@gmail.com>
-// Mauro Carvalho Chehab <mchehab@infradead.org>
+// Mauro Carvalho Chehab <mchehab@kernel.org>
// Sascha Sommer <saschasommer@freenet.de>
//
// This program is free software; you can redistribute it and/or modify
diff --git a/drivers/media/usb/em28xx/em28xx-video.c b/drivers/media/usb/em28xx/em28xx-video.c
index d70ee13cc52e..68571bf36d28 100644
--- a/drivers/media/usb/em28xx/em28xx-video.c
+++ b/drivers/media/usb/em28xx/em28xx-video.c
@@ -5,7 +5,7 @@
//
// Copyright (C) 2005 Ludovico Cavedon <cavedon@sssup.it>
// Markus Rechberger <mrechberger@gmail.com>
-// Mauro Carvalho Chehab <mchehab@infradead.org>
+// Mauro Carvalho Chehab <mchehab@kernel.org>
// Sascha Sommer <saschasommer@freenet.de>
// Copyright (C) 2012 Frank Schäfer <fschaefer.oss@googlemail.com>
//
@@ -44,7 +44,7 @@
#define DRIVER_AUTHOR "Ludovico Cavedon <cavedon@sssup.it>, " \
"Markus Rechberger <mrechberger@gmail.com>, " \
- "Mauro Carvalho Chehab <mchehab@infradead.org>, " \
+ "Mauro Carvalho Chehab <mchehab@kernel.org>, " \
"Sascha Sommer <saschasommer@freenet.de>"
static unsigned int isoc_debug;
diff --git a/drivers/media/usb/em28xx/em28xx.h b/drivers/media/usb/em28xx/em28xx.h
index 63c7c6124707..b0378e77ddff 100644
--- a/drivers/media/usb/em28xx/em28xx.h
+++ b/drivers/media/usb/em28xx/em28xx.h
@@ -4,7 +4,7 @@
*
* Copyright (C) 2005 Markus Rechberger <mrechberger@gmail.com>
* Ludovico Cavedon <cavedon@sssup.it>
- * Mauro Carvalho Chehab <mchehab@infradead.org>
+ * Mauro Carvalho Chehab <mchehab@kernel.org>
* Copyright (C) 2012 Frank Schäfer <fschaefer.oss@googlemail.com>
*
* Based on the em2800 driver from Sascha Sommer <saschasommer@freenet.de>
diff --git a/drivers/media/usb/gspca/zc3xx-reg.h b/drivers/media/usb/gspca/zc3xx-reg.h
index a1bd94e8ce52..71fda38e85e0 100644
--- a/drivers/media/usb/gspca/zc3xx-reg.h
+++ b/drivers/media/usb/gspca/zc3xx-reg.h
@@ -1,7 +1,7 @@
/*
* zc030x registers
*
- * Copyright (c) 2008 Mauro Carvalho Chehab <mchehab@infradead.org>
+ * Copyright (c) 2008 Mauro Carvalho Chehab <mchehab@kernel.org>
*
* The register aliases used here came from this driver:
* http://zc0302.sourceforge.net/zc0302.php
diff --git a/drivers/media/usb/tm6000/tm6000-cards.c b/drivers/media/usb/tm6000/tm6000-cards.c
index 70939e96b856..23df50aa0a4a 100644
--- a/drivers/media/usb/tm6000/tm6000-cards.c
+++ b/drivers/media/usb/tm6000/tm6000-cards.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
// tm6000-cards.c - driver for TM5600/TM6000/TM6010 USB video capture devices
//
-// Copyright (c) 2006-2007 Mauro Carvalho Chehab <mchehab@infradead.org>
+// Copyright (c) 2006-2007 Mauro Carvalho Chehab <mchehab@kernel.org>
#include <linux/init.h>
#include <linux/module.h>
diff --git a/drivers/media/usb/tm6000/tm6000-core.c b/drivers/media/usb/tm6000/tm6000-core.c
index 23a1332d98e6..d3229aa45fcb 100644
--- a/drivers/media/usb/tm6000/tm6000-core.c
+++ b/drivers/media/usb/tm6000/tm6000-core.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
// tm6000-core.c - driver for TM5600/TM6000/TM6010 USB video capture devices
//
-// Copyright (c) 2006-2007 Mauro Carvalho Chehab <mchehab@infradead.org>
+// Copyright (c) 2006-2007 Mauro Carvalho Chehab <mchehab@kernel.org>
//
// Copyright (c) 2007 Michel Ludwig <michel.ludwig@gmail.com>
// - DVB-T support
diff --git a/drivers/media/usb/tm6000/tm6000-i2c.c b/drivers/media/usb/tm6000/tm6000-i2c.c
index c9a62bbff27a..659b63febf85 100644
--- a/drivers/media/usb/tm6000/tm6000-i2c.c
+++ b/drivers/media/usb/tm6000/tm6000-i2c.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
// tm6000-i2c.c - driver for TM5600/TM6000/TM6010 USB video capture devices
//
-// Copyright (c) 2006-2007 Mauro Carvalho Chehab <mchehab@infradead.org>
+// Copyright (c) 2006-2007 Mauro Carvalho Chehab <mchehab@kernel.org>
//
// Copyright (c) 2007 Michel Ludwig <michel.ludwig@gmail.com>
// - Fix SMBus Read Byte command
diff --git a/drivers/media/usb/tm6000/tm6000-regs.h b/drivers/media/usb/tm6000/tm6000-regs.h
index 21587fcf11e3..d10424673db9 100644
--- a/drivers/media/usb/tm6000/tm6000-regs.h
+++ b/drivers/media/usb/tm6000/tm6000-regs.h
@@ -2,7 +2,7 @@
* SPDX-License-Identifier: GPL-2.0
* tm6000-regs.h - driver for TM5600/TM6000/TM6010 USB video capture devices
*
- * Copyright (c) 2006-2007 Mauro Carvalho Chehab <mchehab@infradead.org>
+ * Copyright (c) 2006-2007 Mauro Carvalho Chehab <mchehab@kernel.org>
*/
/*
diff --git a/drivers/media/usb/tm6000/tm6000-usb-isoc.h b/drivers/media/usb/tm6000/tm6000-usb-isoc.h
index 5c615b0a7a46..b275dbce3a1b 100644
--- a/drivers/media/usb/tm6000/tm6000-usb-isoc.h
+++ b/drivers/media/usb/tm6000/tm6000-usb-isoc.h
@@ -2,7 +2,7 @@
* SPDX-License-Identifier: GPL-2.0
* tm6000-buf.c - driver for TM5600/TM6000/TM6010 USB video capture devices
*
- * Copyright (c) 2006-2007 Mauro Carvalho Chehab <mchehab@infradead.org>
+ * Copyright (c) 2006-2007 Mauro Carvalho Chehab <mchehab@kernel.org>
*/
#include <linux/videodev2.h>
diff --git a/drivers/media/usb/tm6000/tm6000-video.c b/drivers/media/usb/tm6000/tm6000-video.c
index b2399d4266da..aa85fe31c835 100644
--- a/drivers/media/usb/tm6000/tm6000-video.c
+++ b/drivers/media/usb/tm6000/tm6000-video.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
// tm6000-video.c - driver for TM5600/TM6000/TM6010 USB video capture devices
//
-// Copyright (c) 2006-2007 Mauro Carvalho Chehab <mchehab@infradead.org>
+// Copyright (c) 2006-2007 Mauro Carvalho Chehab <mchehab@kernel.org>
//
// Copyright (c) 2007 Michel Ludwig <michel.ludwig@gmail.com>
// - Fixed module load/unload
diff --git a/drivers/media/usb/tm6000/tm6000.h b/drivers/media/usb/tm6000/tm6000.h
index e1e45770e28d..0864ed7314eb 100644
--- a/drivers/media/usb/tm6000/tm6000.h
+++ b/drivers/media/usb/tm6000/tm6000.h
@@ -2,7 +2,7 @@
* SPDX-License-Identifier: GPL-2.0
* tm6000.h - driver for TM5600/TM6000/TM6010 USB video capture devices
*
- * Copyright (c) 2006-2007 Mauro Carvalho Chehab <mchehab@infradead.org>
+ * Copyright (c) 2006-2007 Mauro Carvalho Chehab <mchehab@kernel.org>
*
* Copyright (c) 2007 Michel Ludwig <michel.ludwig@gmail.com>
* - DVB-T support
diff --git a/drivers/media/v4l2-core/v4l2-dev.c b/drivers/media/v4l2-core/v4l2-dev.c
index 1d0b2208e8fb..c080dcc75393 100644
--- a/drivers/media/v4l2-core/v4l2-dev.c
+++ b/drivers/media/v4l2-core/v4l2-dev.c
@@ -10,7 +10,7 @@
* 2 of the License, or (at your option) any later version.
*
* Authors: Alan Cox, <alan@lxorguk.ukuu.org.uk> (version 1)
- * Mauro Carvalho Chehab <mchehab@infradead.org> (version 2)
+ * Mauro Carvalho Chehab <mchehab@kernel.org> (version 2)
*
* Fixes: 20000516 Claudio Matsuoka <claudio@conectiva.com>
* - Added procfs support
@@ -1072,7 +1072,7 @@ static void __exit videodev_exit(void)
subsys_initcall(videodev_init);
module_exit(videodev_exit)
-MODULE_AUTHOR("Alan Cox, Mauro Carvalho Chehab <mchehab@infradead.org>");
+MODULE_AUTHOR("Alan Cox, Mauro Carvalho Chehab <mchehab@kernel.org>");
MODULE_DESCRIPTION("Device registrar for Video4Linux drivers v2");
MODULE_LICENSE("GPL");
MODULE_ALIAS_CHARDEV_MAJOR(VIDEO_MAJOR);
diff --git a/drivers/media/v4l2-core/v4l2-ioctl.c b/drivers/media/v4l2-core/v4l2-ioctl.c
index f48c505550e0..de5d96dbe69e 100644
--- a/drivers/media/v4l2-core/v4l2-ioctl.c
+++ b/drivers/media/v4l2-core/v4l2-ioctl.c
@@ -9,7 +9,7 @@
* 2 of the License, or (at your option) any later version.
*
* Authors: Alan Cox, <alan@lxorguk.ukuu.org.uk> (version 1)
- * Mauro Carvalho Chehab <mchehab@infradead.org> (version 2)
+ * Mauro Carvalho Chehab <mchehab@kernel.org> (version 2)
*/
#include <linux/mm.h>
diff --git a/drivers/media/v4l2-core/videobuf-core.c b/drivers/media/v4l2-core/videobuf-core.c
index 2b3981842b4b..7491b337002c 100644
--- a/drivers/media/v4l2-core/videobuf-core.c
+++ b/drivers/media/v4l2-core/videobuf-core.c
@@ -1,11 +1,11 @@
/*
* generic helper functions for handling video4linux capture buffers
*
- * (c) 2007 Mauro Carvalho Chehab, <mchehab@infradead.org>
+ * (c) 2007 Mauro Carvalho Chehab, <mchehab@kernel.org>
*
* Highly based on video-buf written originally by:
* (c) 2001,02 Gerd Knorr <kraxel@bytesex.org>
- * (c) 2006 Mauro Carvalho Chehab, <mchehab@infradead.org>
+ * (c) 2006 Mauro Carvalho Chehab, <mchehab@kernel.org>
* (c) 2006 Ted Walther and John Sokol
*
* This program is free software; you can redistribute it and/or modify
@@ -38,7 +38,7 @@ static int debug;
module_param(debug, int, 0644);
MODULE_DESCRIPTION("helper module to manage video4linux buffers");
-MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@infradead.org>");
+MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@kernel.org>");
MODULE_LICENSE("GPL");
#define dprintk(level, fmt, arg...) \
diff --git a/drivers/media/v4l2-core/videobuf-dma-contig.c b/drivers/media/v4l2-core/videobuf-dma-contig.c
index e02353e340dd..f46132504d88 100644
--- a/drivers/media/v4l2-core/videobuf-dma-contig.c
+++ b/drivers/media/v4l2-core/videobuf-dma-contig.c
@@ -7,7 +7,7 @@
* Copyright (c) 2008 Magnus Damm
*
* Based on videobuf-vmalloc.c,
- * (c) 2007 Mauro Carvalho Chehab, <mchehab@infradead.org>
+ * (c) 2007 Mauro Carvalho Chehab, <mchehab@kernel.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
diff --git a/drivers/media/v4l2-core/videobuf-dma-sg.c b/drivers/media/v4l2-core/videobuf-dma-sg.c
index add2edb23eac..7770034aae28 100644
--- a/drivers/media/v4l2-core/videobuf-dma-sg.c
+++ b/drivers/media/v4l2-core/videobuf-dma-sg.c
@@ -6,11 +6,11 @@
* into PAGE_SIZE chunks). They also assume the driver does not need
* to touch the video data.
*
- * (c) 2007 Mauro Carvalho Chehab, <mchehab@infradead.org>
+ * (c) 2007 Mauro Carvalho Chehab, <mchehab@kernel.org>
*
* Highly based on video-buf written originally by:
* (c) 2001,02 Gerd Knorr <kraxel@bytesex.org>
- * (c) 2006 Mauro Carvalho Chehab, <mchehab@infradead.org>
+ * (c) 2006 Mauro Carvalho Chehab, <mchehab@kernel.org>
* (c) 2006 Ted Walther and John Sokol
*
* This program is free software; you can redistribute it and/or modify
@@ -48,7 +48,7 @@ static int debug;
module_param(debug, int, 0644);
MODULE_DESCRIPTION("helper module to manage video4linux dma sg buffers");
-MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@infradead.org>");
+MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@kernel.org>");
MODULE_LICENSE("GPL");
#define dprintk(level, fmt, arg...) \
diff --git a/drivers/media/v4l2-core/videobuf-vmalloc.c b/drivers/media/v4l2-core/videobuf-vmalloc.c
index 2ff7fcc77b11..45fe781aeeec 100644
--- a/drivers/media/v4l2-core/videobuf-vmalloc.c
+++ b/drivers/media/v4l2-core/videobuf-vmalloc.c
@@ -6,7 +6,7 @@
* into PAGE_SIZE chunks). They also assume the driver does not need
* to touch the video data.
*
- * (c) 2007 Mauro Carvalho Chehab, <mchehab@infradead.org>
+ * (c) 2007 Mauro Carvalho Chehab, <mchehab@kernel.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -41,7 +41,7 @@ static int debug;
module_param(debug, int, 0644);
MODULE_DESCRIPTION("helper module to manage video4linux vmalloc buffers");
-MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@infradead.org>");
+MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@kernel.org>");
MODULE_LICENSE("GPL");
#define dprintk(level, fmt, arg...) \
diff --git a/drivers/memstick/core/ms_block.c b/drivers/memstick/core/ms_block.c
index 57b13dfbd21e..a15181fa45f7 100644
--- a/drivers/memstick/core/ms_block.c
+++ b/drivers/memstick/core/ms_block.c
@@ -2094,14 +2094,9 @@ static const struct block_device_operations msb_bdops = {
static int msb_init_disk(struct memstick_dev *card)
{
struct msb_data *msb = memstick_get_drvdata(card);
- struct memstick_host *host = card->host;
int rc;
- u64 limit = BLK_BOUNCE_HIGH;
unsigned long capacity;
- if (host->dev.dma_mask && *(host->dev.dma_mask))
- limit = *(host->dev.dma_mask);
-
mutex_lock(&msb_disk_lock);
msb->disk_id = idr_alloc(&msb_disk_idr, card, 0, 256, GFP_KERNEL);
mutex_unlock(&msb_disk_lock);
@@ -2123,7 +2118,6 @@ static int msb_init_disk(struct memstick_dev *card)
msb->queue->queuedata = card;
- blk_queue_bounce_limit(msb->queue, limit);
blk_queue_max_hw_sectors(msb->queue, MS_BLOCK_MAX_PAGES);
blk_queue_max_segments(msb->queue, MS_BLOCK_MAX_SEGS);
blk_queue_max_segment_size(msb->queue,
diff --git a/drivers/memstick/core/mspro_block.c b/drivers/memstick/core/mspro_block.c
index 8897962781bb..5ee932631fae 100644
--- a/drivers/memstick/core/mspro_block.c
+++ b/drivers/memstick/core/mspro_block.c
@@ -1170,17 +1170,12 @@ static int mspro_block_init_card(struct memstick_dev *card)
static int mspro_block_init_disk(struct memstick_dev *card)
{
struct mspro_block_data *msb = memstick_get_drvdata(card);
- struct memstick_host *host = card->host;
struct mspro_devinfo *dev_info = NULL;
struct mspro_sys_info *sys_info = NULL;
struct mspro_sys_attr *s_attr = NULL;
int rc, disk_id;
- u64 limit = BLK_BOUNCE_HIGH;
unsigned long capacity;
- if (host->dev.dma_mask && *(host->dev.dma_mask))
- limit = *(host->dev.dma_mask);
-
for (rc = 0; msb->attr_group.attrs[rc]; ++rc) {
s_attr = mspro_from_sysfs_attr(msb->attr_group.attrs[rc]);
@@ -1219,7 +1214,6 @@ static int mspro_block_init_disk(struct memstick_dev *card)
msb->queue->queuedata = card;
- blk_queue_bounce_limit(msb->queue, limit);
blk_queue_max_hw_sectors(msb->queue, MSPRO_BLOCK_MAX_PAGES);
blk_queue_max_segments(msb->queue, MSPRO_BLOCK_MAX_SEGS);
blk_queue_max_segment_size(msb->queue,
diff --git a/drivers/message/fusion/mptbase.c b/drivers/message/fusion/mptbase.c
index 51eb1b027963..a746ccdd630a 100644
--- a/drivers/message/fusion/mptbase.c
+++ b/drivers/message/fusion/mptbase.c
@@ -197,9 +197,9 @@ static int mpt_host_page_access_control(MPT_ADAPTER *ioc, u8 access_control_valu
static int mpt_host_page_alloc(MPT_ADAPTER *ioc, pIOCInit_t ioc_init);
#ifdef CONFIG_PROC_FS
-static const struct file_operations mpt_summary_proc_fops;
-static const struct file_operations mpt_version_proc_fops;
-static const struct file_operations mpt_iocinfo_proc_fops;
+static int mpt_summary_proc_show(struct seq_file *m, void *v);
+static int mpt_version_proc_show(struct seq_file *m, void *v);
+static int mpt_iocinfo_proc_show(struct seq_file *m, void *v);
#endif
static void mpt_get_fw_exp_ver(char *buf, MPT_ADAPTER *ioc);
@@ -2040,8 +2040,10 @@ mpt_attach(struct pci_dev *pdev, const struct pci_device_id *id)
*/
dent = proc_mkdir(ioc->name, mpt_proc_root_dir);
if (dent) {
- proc_create_data("info", S_IRUGO, dent, &mpt_iocinfo_proc_fops, ioc);
- proc_create_data("summary", S_IRUGO, dent, &mpt_summary_proc_fops, ioc);
+ proc_create_single_data("info", S_IRUGO, dent,
+ mpt_iocinfo_proc_show, ioc);
+ proc_create_single_data("summary", S_IRUGO, dent,
+ mpt_summary_proc_show, ioc);
}
#endif
@@ -6606,8 +6608,10 @@ procmpt_create(void)
if (mpt_proc_root_dir == NULL)
return -ENOTDIR;
- proc_create("summary", S_IRUGO, mpt_proc_root_dir, &mpt_summary_proc_fops);
- proc_create("version", S_IRUGO, mpt_proc_root_dir, &mpt_version_proc_fops);
+ proc_create_single("summary", S_IRUGO, mpt_proc_root_dir,
+ mpt_summary_proc_show);
+ proc_create_single("version", S_IRUGO, mpt_proc_root_dir,
+ mpt_version_proc_show);
return 0;
}
@@ -6646,19 +6650,6 @@ static int mpt_summary_proc_show(struct seq_file *m, void *v)
return 0;
}
-static int mpt_summary_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, mpt_summary_proc_show, PDE_DATA(inode));
-}
-
-static const struct file_operations mpt_summary_proc_fops = {
- .owner = THIS_MODULE,
- .open = mpt_summary_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
static int mpt_version_proc_show(struct seq_file *m, void *v)
{
u8 cb_idx;
@@ -6701,19 +6692,6 @@ static int mpt_version_proc_show(struct seq_file *m, void *v)
return 0;
}
-static int mpt_version_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, mpt_version_proc_show, NULL);
-}
-
-static const struct file_operations mpt_version_proc_fops = {
- .owner = THIS_MODULE,
- .open = mpt_version_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
static int mpt_iocinfo_proc_show(struct seq_file *m, void *v)
{
MPT_ADAPTER *ioc = m->private;
@@ -6793,19 +6771,6 @@ static int mpt_iocinfo_proc_show(struct seq_file *m, void *v)
return 0;
}
-
-static int mpt_iocinfo_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, mpt_iocinfo_proc_show, PDE_DATA(inode));
-}
-
-static const struct file_operations mpt_iocinfo_proc_fops = {
- .owner = THIS_MODULE,
- .open = mpt_iocinfo_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
#endif /* CONFIG_PROC_FS } */
/*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
diff --git a/drivers/message/fusion/mptsas.c b/drivers/message/fusion/mptsas.c
index 86503f60468f..19a5aa70ecda 100644
--- a/drivers/message/fusion/mptsas.c
+++ b/drivers/message/fusion/mptsas.c
@@ -1929,7 +1929,7 @@ static enum blk_eh_timer_return mptsas_eh_timed_out(struct scsi_cmnd *sc)
MPT_SCSI_HOST *hd;
MPT_ADAPTER *ioc;
VirtDevice *vdevice;
- enum blk_eh_timer_return rc = BLK_EH_NOT_HANDLED;
+ enum blk_eh_timer_return rc = BLK_EH_DONE;
hd = shost_priv(sc->device->host);
if (hd == NULL) {
diff --git a/drivers/mfd/cros_ec_spi.c b/drivers/mfd/cros_ec_spi.c
index 1b52b8557034..2060d1483043 100644
--- a/drivers/mfd/cros_ec_spi.c
+++ b/drivers/mfd/cros_ec_spi.c
@@ -419,10 +419,25 @@ static int cros_ec_pkt_xfer_spi(struct cros_ec_device *ec_dev,
/* Verify that EC can process command */
for (i = 0; i < len; i++) {
rx_byte = rx_buf[i];
+ /*
+ * Seeing the PAST_END, RX_BAD_DATA, or NOT_READY
+ * markers are all signs that the EC didn't fully
+ * receive our command. e.g., if the EC is flashing
+ * itself, it can't respond to any commands and instead
+ * clocks out EC_SPI_PAST_END from its SPI hardware
+ * buffer. Similar occurrences can happen if the AP is
+ * too slow to clock out data after asserting CS -- the
+ * EC will abort and fill its buffer with
+ * EC_SPI_RX_BAD_DATA.
+ *
+ * In all cases, these errors should be safe to retry.
+ * Report -EAGAIN and let the caller decide what to do
+ * about that.
+ */
if (rx_byte == EC_SPI_PAST_END ||
rx_byte == EC_SPI_RX_BAD_DATA ||
rx_byte == EC_SPI_NOT_READY) {
- ret = -EREMOTEIO;
+ ret = -EAGAIN;
break;
}
}
@@ -431,7 +446,7 @@ static int cros_ec_pkt_xfer_spi(struct cros_ec_device *ec_dev,
if (!ret)
ret = cros_ec_spi_receive_packet(ec_dev,
ec_msg->insize + sizeof(*response));
- else
+ else if (ret != -EAGAIN)
dev_err(ec_dev->dev, "spi transfer failed: %d\n", ret);
final_ret = terminate_request(ec_dev);
@@ -537,10 +552,11 @@ static int cros_ec_cmd_xfer_spi(struct cros_ec_device *ec_dev,
/* Verify that EC can process command */
for (i = 0; i < len; i++) {
rx_byte = rx_buf[i];
+ /* See comments in cros_ec_pkt_xfer_spi() */
if (rx_byte == EC_SPI_PAST_END ||
rx_byte == EC_SPI_RX_BAD_DATA ||
rx_byte == EC_SPI_NOT_READY) {
- ret = -EREMOTEIO;
+ ret = -EAGAIN;
break;
}
}
@@ -549,7 +565,7 @@ static int cros_ec_cmd_xfer_spi(struct cros_ec_device *ec_dev,
if (!ret)
ret = cros_ec_spi_receive_response(ec_dev,
ec_msg->insize + EC_MSG_TX_PROTO_BYTES);
- else
+ else if (ret != -EAGAIN)
dev_err(ec_dev->dev, "spi transfer failed: %d\n", ret);
final_ret = terminate_request(ec_dev);
diff --git a/drivers/mfd/mc13xxx-core.c b/drivers/mfd/mc13xxx-core.c
index d7f54e492aa6..c63e331738c1 100644
--- a/drivers/mfd/mc13xxx-core.c
+++ b/drivers/mfd/mc13xxx-core.c
@@ -279,8 +279,21 @@ int mc13xxx_adc_do_conversion(struct mc13xxx *mc13xxx, unsigned int mode,
adc0 = MC13XXX_ADC0_ADINC1 | MC13XXX_ADC0_ADINC2;
adc1 = MC13XXX_ADC1_ADEN | MC13XXX_ADC1_ADTRIGIGN | MC13XXX_ADC1_ASC;
- if (channel > 7)
+ /*
+ * Channels mapped through ADIN7:
+ * 7 - General purpose ADIN7
+ * 16 - UID
+ * 17 - Die temperature
+ */
+ if (channel > 7 && channel < 16) {
adc1 |= MC13XXX_ADC1_ADSEL;
+ } else if (channel == 16) {
+ adc0 |= MC13XXX_ADC0_ADIN7SEL_UID;
+ channel = 7;
+ } else if (channel == 17) {
+ adc0 |= MC13XXX_ADC0_ADIN7SEL_DIE;
+ channel = 7;
+ }
switch (mode) {
case MC13XXX_ADC_MODE_TS:
diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h
index a4c9c8297a6d..918d4fb742d1 100644
--- a/drivers/misc/cxl/cxl.h
+++ b/drivers/misc/cxl/cxl.h
@@ -717,6 +717,7 @@ struct cxl {
bool perst_select_user;
bool perst_same_image;
bool psl_timebase_synced;
+ bool tunneled_ops_supported;
/*
* number of contexts mapped on to this card. Possible values are:
diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c
index 83f1d08058fc..4d6736f9d463 100644
--- a/drivers/misc/cxl/pci.c
+++ b/drivers/misc/cxl/pci.c
@@ -1742,6 +1742,15 @@ static int cxl_configure_adapter(struct cxl *adapter, struct pci_dev *dev)
/* Required for devices using CAPP DMA mode, harmless for others */
pci_set_master(dev);
+ adapter->tunneled_ops_supported = false;
+
+ if (cxl_is_power9()) {
+ if (pnv_pci_set_tunnel_bar(dev, 0x00020000E0000000ull, 1))
+ dev_info(&dev->dev, "Tunneled operations unsupported\n");
+ else
+ adapter->tunneled_ops_supported = true;
+ }
+
if ((rc = pnv_phb_to_cxl_mode(dev, adapter->native->sl_ops->capi_mode)))
goto err;
@@ -1768,6 +1777,9 @@ static void cxl_deconfigure_adapter(struct cxl *adapter)
{
struct pci_dev *pdev = to_pci_dev(adapter->dev.parent);
+ if (cxl_is_power9())
+ pnv_pci_set_tunnel_bar(pdev, 0x00020000E0000000ull, 0);
+
cxl_native_release_psl_err_irq(adapter);
cxl_unmap_adapter_regs(adapter);
diff --git a/drivers/misc/cxl/sysfs.c b/drivers/misc/cxl/sysfs.c
index 95285b7f636f..4b5a4c5d3c01 100644
--- a/drivers/misc/cxl/sysfs.c
+++ b/drivers/misc/cxl/sysfs.c
@@ -78,6 +78,15 @@ static ssize_t psl_timebase_synced_show(struct device *device,
return scnprintf(buf, PAGE_SIZE, "%i\n", adapter->psl_timebase_synced);
}
+static ssize_t tunneled_ops_supported_show(struct device *device,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct cxl *adapter = to_cxl_adapter(device);
+
+ return scnprintf(buf, PAGE_SIZE, "%i\n", adapter->tunneled_ops_supported);
+}
+
static ssize_t reset_adapter_store(struct device *device,
struct device_attribute *attr,
const char *buf, size_t count)
@@ -183,6 +192,7 @@ static struct device_attribute adapter_attrs[] = {
__ATTR_RO(base_image),
__ATTR_RO(image_loaded),
__ATTR_RO(psl_timebase_synced),
+ __ATTR_RO(tunneled_ops_supported),
__ATTR_RW(load_image_on_perst),
__ATTR_RW(perst_reloads_same_image),
__ATTR(reset, S_IWUSR, NULL, reset_adapter_store),
diff --git a/drivers/misc/eeprom/at24.c b/drivers/misc/eeprom/at24.c
index 0c125f207aea..33053b0d1fdf 100644
--- a/drivers/misc/eeprom/at24.c
+++ b/drivers/misc/eeprom/at24.c
@@ -518,7 +518,7 @@ static int at24_get_pdata(struct device *dev, struct at24_platform_data *pdata)
if (of_node && of_match_device(at24_of_match, dev))
cdata = of_device_get_match_data(dev);
else if (id)
- cdata = (void *)&id->driver_data;
+ cdata = (void *)id->driver_data;
else
cdata = acpi_device_get_match_data(dev);
diff --git a/drivers/misc/sgi-gru/gruprocfs.c b/drivers/misc/sgi-gru/gruprocfs.c
index 4f7635922394..42ea2eccaee9 100644
--- a/drivers/misc/sgi-gru/gruprocfs.c
+++ b/drivers/misc/sgi-gru/gruprocfs.c
@@ -270,16 +270,6 @@ static int options_open(struct inode *inode, struct file *file)
return single_open(file, options_show, NULL);
}
-static int cch_open(struct inode *inode, struct file *file)
-{
- return seq_open(file, &cch_seq_ops);
-}
-
-static int gru_open(struct inode *inode, struct file *file)
-{
- return seq_open(file, &gru_seq_ops);
-}
-
/* *INDENT-OFF* */
static const struct file_operations statistics_fops = {
.open = statistics_open,
@@ -305,73 +295,30 @@ static const struct file_operations options_fops = {
.release = single_release,
};
-static const struct file_operations cch_fops = {
- .open = cch_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-static const struct file_operations gru_fops = {
- .open = gru_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
-static struct proc_entry {
- char *name;
- umode_t mode;
- const struct file_operations *fops;
- struct proc_dir_entry *entry;
-} proc_files[] = {
- {"statistics", 0644, &statistics_fops},
- {"mcs_statistics", 0644, &mcs_statistics_fops},
- {"debug_options", 0644, &options_fops},
- {"cch_status", 0444, &cch_fops},
- {"gru_status", 0444, &gru_fops},
- {NULL}
-};
-/* *INDENT-ON* */
-
static struct proc_dir_entry *proc_gru __read_mostly;
-static int create_proc_file(struct proc_entry *p)
-{
- p->entry = proc_create(p->name, p->mode, proc_gru, p->fops);
- if (!p->entry)
- return -1;
- return 0;
-}
-
-static void delete_proc_files(void)
-{
- struct proc_entry *p;
-
- if (proc_gru) {
- for (p = proc_files; p->name; p++)
- if (p->entry)
- remove_proc_entry(p->name, proc_gru);
- proc_remove(proc_gru);
- }
-}
-
int gru_proc_init(void)
{
- struct proc_entry *p;
-
proc_gru = proc_mkdir("sgi_uv/gru", NULL);
-
- for (p = proc_files; p->name; p++)
- if (create_proc_file(p))
- goto err;
+ if (!proc_gru)
+ return -1;
+ if (!proc_create("statistics", 0644, proc_gru, &statistics_fops))
+ goto err;
+ if (!proc_create("mcs_statistics", 0644, proc_gru, &mcs_statistics_fops))
+ goto err;
+ if (!proc_create("debug_options", 0644, proc_gru, &options_fops))
+ goto err;
+ if (!proc_create_seq("cch_status", 0444, proc_gru, &cch_seq_ops))
+ goto err;
+ if (!proc_create_seq("gru_status", 0444, proc_gru, &gru_seq_ops))
+ goto err;
return 0;
-
err:
- delete_proc_files();
+ remove_proc_subtree("sgi_uv/gru", NULL);
return -1;
}
void gru_proc_exit(void)
{
- delete_proc_files();
+ remove_proc_subtree("sgi_uv/gru", NULL);
}
diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c
index 9e923cd1d80e..d89e17829527 100644
--- a/drivers/mmc/core/block.c
+++ b/drivers/mmc/core/block.c
@@ -244,7 +244,7 @@ static ssize_t power_ro_lock_store(struct device *dev,
mq = &md->queue;
/* Dispatch locking to the block layer */
- req = blk_get_request(mq->queue, REQ_OP_DRV_OUT, __GFP_RECLAIM);
+ req = blk_get_request(mq->queue, REQ_OP_DRV_OUT, 0);
if (IS_ERR(req)) {
count = PTR_ERR(req);
goto out_put;
@@ -650,8 +650,7 @@ static int mmc_blk_ioctl_cmd(struct mmc_blk_data *md,
*/
mq = &md->queue;
req = blk_get_request(mq->queue,
- idata->ic.write_flag ? REQ_OP_DRV_OUT : REQ_OP_DRV_IN,
- __GFP_RECLAIM);
+ idata->ic.write_flag ? REQ_OP_DRV_OUT : REQ_OP_DRV_IN, 0);
if (IS_ERR(req)) {
err = PTR_ERR(req);
goto cmd_done;
@@ -721,8 +720,7 @@ static int mmc_blk_ioctl_multi_cmd(struct mmc_blk_data *md,
*/
mq = &md->queue;
req = blk_get_request(mq->queue,
- idata[0]->ic.write_flag ? REQ_OP_DRV_OUT : REQ_OP_DRV_IN,
- __GFP_RECLAIM);
+ idata[0]->ic.write_flag ? REQ_OP_DRV_OUT : REQ_OP_DRV_IN, 0);
if (IS_ERR(req)) {
err = PTR_ERR(req);
goto cmd_err;
@@ -2485,7 +2483,7 @@ static long mmc_rpmb_ioctl(struct file *filp, unsigned int cmd,
break;
}
- return 0;
+ return ret;
}
#ifdef CONFIG_COMPAT
@@ -2750,7 +2748,7 @@ static int mmc_dbg_card_status_get(void *data, u64 *val)
int ret;
/* Ask the block layer about the card status */
- req = blk_get_request(mq->queue, REQ_OP_DRV_IN, __GFP_RECLAIM);
+ req = blk_get_request(mq->queue, REQ_OP_DRV_IN, 0);
if (IS_ERR(req))
return PTR_ERR(req);
req_to_mmc_queue_req(req)->drv_op = MMC_DRV_OP_GET_CARD_STATUS;
@@ -2786,7 +2784,7 @@ static int mmc_ext_csd_open(struct inode *inode, struct file *filp)
return -ENOMEM;
/* Ask the block layer for the EXT CSD */
- req = blk_get_request(mq->queue, REQ_OP_DRV_IN, __GFP_RECLAIM);
+ req = blk_get_request(mq->queue, REQ_OP_DRV_IN, 0);
if (IS_ERR(req)) {
err = PTR_ERR(req);
goto out_free;
diff --git a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c
index 56e9a803db21..648eb6743ed5 100644
--- a/drivers/mmc/core/queue.c
+++ b/drivers/mmc/core/queue.c
@@ -111,8 +111,9 @@ static enum blk_eh_timer_return mmc_cqe_timed_out(struct request *req)
__mmc_cqe_recovery_notifier(mq);
return BLK_EH_RESET_TIMER;
}
- /* No timeout */
- return BLK_EH_HANDLED;
+ /* No timeout (XXX: huh? comment doesn't make much sense) */
+ blk_mq_complete_request(req);
+ return BLK_EH_DONE;
default:
/* Timeout is handled by mmc core */
return BLK_EH_RESET_TIMER;
diff --git a/drivers/mmc/core/sdio_uart.c b/drivers/mmc/core/sdio_uart.c
index d3c91f412b69..25e113001a3c 100644
--- a/drivers/mmc/core/sdio_uart.c
+++ b/drivers/mmc/core/sdio_uart.c
@@ -1008,19 +1008,6 @@ static int sdio_uart_proc_show(struct seq_file *m, void *v)
return 0;
}
-static int sdio_uart_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, sdio_uart_proc_show, NULL);
-}
-
-static const struct file_operations sdio_uart_proc_fops = {
- .owner = THIS_MODULE,
- .open = sdio_uart_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
static const struct tty_port_operations sdio_uart_port_ops = {
.dtr_rts = uart_dtr_rts,
.carrier_raised = uart_carrier_raised,
@@ -1045,7 +1032,7 @@ static const struct tty_operations sdio_uart_ops = {
.tiocmset = sdio_uart_tiocmset,
.install = sdio_uart_install,
.cleanup = sdio_uart_cleanup,
- .proc_fops = &sdio_uart_proc_fops,
+ .proc_show = sdio_uart_proc_show,
};
static struct tty_driver *sdio_uart_tty_driver;
diff --git a/drivers/mmc/host/sdhci-iproc.c b/drivers/mmc/host/sdhci-iproc.c
index 0ef741bc515d..d0e83db42ae5 100644
--- a/drivers/mmc/host/sdhci-iproc.c
+++ b/drivers/mmc/host/sdhci-iproc.c
@@ -33,6 +33,8 @@ struct sdhci_iproc_host {
const struct sdhci_iproc_data *data;
u32 shadow_cmd;
u32 shadow_blk;
+ bool is_cmd_shadowed;
+ bool is_blk_shadowed;
};
#define REG_OFFSET_IN_BITS(reg) ((reg) << 3 & 0x18)
@@ -48,8 +50,22 @@ static inline u32 sdhci_iproc_readl(struct sdhci_host *host, int reg)
static u16 sdhci_iproc_readw(struct sdhci_host *host, int reg)
{
- u32 val = sdhci_iproc_readl(host, (reg & ~3));
- u16 word = val >> REG_OFFSET_IN_BITS(reg) & 0xffff;
+ struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+ struct sdhci_iproc_host *iproc_host = sdhci_pltfm_priv(pltfm_host);
+ u32 val;
+ u16 word;
+
+ if ((reg == SDHCI_TRANSFER_MODE) && iproc_host->is_cmd_shadowed) {
+ /* Get the saved transfer mode */
+ val = iproc_host->shadow_cmd;
+ } else if ((reg == SDHCI_BLOCK_SIZE || reg == SDHCI_BLOCK_COUNT) &&
+ iproc_host->is_blk_shadowed) {
+ /* Get the saved block info */
+ val = iproc_host->shadow_blk;
+ } else {
+ val = sdhci_iproc_readl(host, (reg & ~3));
+ }
+ word = val >> REG_OFFSET_IN_BITS(reg) & 0xffff;
return word;
}
@@ -105,13 +121,15 @@ static void sdhci_iproc_writew(struct sdhci_host *host, u16 val, int reg)
if (reg == SDHCI_COMMAND) {
/* Write the block now as we are issuing a command */
- if (iproc_host->shadow_blk != 0) {
+ if (iproc_host->is_blk_shadowed) {
sdhci_iproc_writel(host, iproc_host->shadow_blk,
SDHCI_BLOCK_SIZE);
- iproc_host->shadow_blk = 0;
+ iproc_host->is_blk_shadowed = false;
}
oldval = iproc_host->shadow_cmd;
- } else if (reg == SDHCI_BLOCK_SIZE || reg == SDHCI_BLOCK_COUNT) {
+ iproc_host->is_cmd_shadowed = false;
+ } else if ((reg == SDHCI_BLOCK_SIZE || reg == SDHCI_BLOCK_COUNT) &&
+ iproc_host->is_blk_shadowed) {
/* Block size and count are stored in shadow reg */
oldval = iproc_host->shadow_blk;
} else {
@@ -123,9 +141,11 @@ static void sdhci_iproc_writew(struct sdhci_host *host, u16 val, int reg)
if (reg == SDHCI_TRANSFER_MODE) {
/* Save the transfer mode until the command is issued */
iproc_host->shadow_cmd = newval;
+ iproc_host->is_cmd_shadowed = true;
} else if (reg == SDHCI_BLOCK_SIZE || reg == SDHCI_BLOCK_COUNT) {
/* Save the block info until the command is issued */
iproc_host->shadow_blk = newval;
+ iproc_host->is_blk_shadowed = true;
} else {
/* Command or other regular 32-bit write */
sdhci_iproc_writel(host, newval, reg & ~3);
@@ -166,7 +186,7 @@ static const struct sdhci_ops sdhci_iproc_32only_ops = {
static const struct sdhci_pltfm_data sdhci_iproc_cygnus_pltfm_data = {
.quirks = SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK,
- .quirks2 = SDHCI_QUIRK2_ACMD23_BROKEN,
+ .quirks2 = SDHCI_QUIRK2_ACMD23_BROKEN | SDHCI_QUIRK2_HOST_OFF_CARD_ON,
.ops = &sdhci_iproc_32only_ops,
};
@@ -206,7 +226,6 @@ static const struct sdhci_iproc_data iproc_data = {
.caps1 = SDHCI_DRIVER_TYPE_C |
SDHCI_DRIVER_TYPE_D |
SDHCI_SUPPORT_DDR50,
- .mmc_caps = MMC_CAP_1_8V_DDR,
};
static const struct sdhci_pltfm_data sdhci_bcm2835_pltfm_data = {
diff --git a/drivers/mtd/devices/Kconfig b/drivers/mtd/devices/Kconfig
index 6def5445e03e..57b02c4b3f63 100644
--- a/drivers/mtd/devices/Kconfig
+++ b/drivers/mtd/devices/Kconfig
@@ -81,6 +81,7 @@ config MTD_DATAFLASH_OTP
config MTD_M25P80
tristate "Support most SPI Flash chips (AT26DF, M25P, W25X, ...)"
depends on SPI_MASTER && MTD_SPI_NOR
+ select SPI_MEM
help
This enables access to most modern SPI flash chips, used for
program and data storage. Series supported include Atmel AT26DF,
diff --git a/drivers/mtd/devices/m25p80.c b/drivers/mtd/devices/m25p80.c
index a4e18f6aaa33..e84563d2067f 100644
--- a/drivers/mtd/devices/m25p80.c
+++ b/drivers/mtd/devices/m25p80.c
@@ -24,12 +24,13 @@
#include <linux/mtd/partitions.h>
#include <linux/spi/spi.h>
+#include <linux/spi/spi-mem.h>
#include <linux/spi/flash.h>
#include <linux/mtd/spi-nor.h>
#define MAX_CMD_SIZE 6
struct m25p {
- struct spi_device *spi;
+ struct spi_mem *spimem;
struct spi_nor spi_nor;
u8 command[MAX_CMD_SIZE];
};
@@ -37,97 +38,68 @@ struct m25p {
static int m25p80_read_reg(struct spi_nor *nor, u8 code, u8 *val, int len)
{
struct m25p *flash = nor->priv;
- struct spi_device *spi = flash->spi;
+ struct spi_mem_op op = SPI_MEM_OP(SPI_MEM_OP_CMD(code, 1),
+ SPI_MEM_OP_NO_ADDR,
+ SPI_MEM_OP_NO_DUMMY,
+ SPI_MEM_OP_DATA_IN(len, val, 1));
int ret;
- ret = spi_write_then_read(spi, &code, 1, val, len);
+ ret = spi_mem_exec_op(flash->spimem, &op);
if (ret < 0)
- dev_err(&spi->dev, "error %d reading %x\n", ret, code);
+ dev_err(&flash->spimem->spi->dev, "error %d reading %x\n", ret,
+ code);
return ret;
}
-static void m25p_addr2cmd(struct spi_nor *nor, unsigned int addr, u8 *cmd)
-{
- /* opcode is in cmd[0] */
- cmd[1] = addr >> (nor->addr_width * 8 - 8);
- cmd[2] = addr >> (nor->addr_width * 8 - 16);
- cmd[3] = addr >> (nor->addr_width * 8 - 24);
- cmd[4] = addr >> (nor->addr_width * 8 - 32);
-}
-
-static int m25p_cmdsz(struct spi_nor *nor)
-{
- return 1 + nor->addr_width;
-}
-
static int m25p80_write_reg(struct spi_nor *nor, u8 opcode, u8 *buf, int len)
{
struct m25p *flash = nor->priv;
- struct spi_device *spi = flash->spi;
-
- flash->command[0] = opcode;
- if (buf)
- memcpy(&flash->command[1], buf, len);
+ struct spi_mem_op op = SPI_MEM_OP(SPI_MEM_OP_CMD(opcode, 1),
+ SPI_MEM_OP_NO_ADDR,
+ SPI_MEM_OP_NO_DUMMY,
+ SPI_MEM_OP_DATA_OUT(len, buf, 1));
- return spi_write(spi, flash->command, len + 1);
+ return spi_mem_exec_op(flash->spimem, &op);
}
static ssize_t m25p80_write(struct spi_nor *nor, loff_t to, size_t len,
const u_char *buf)
{
struct m25p *flash = nor->priv;
- struct spi_device *spi = flash->spi;
- unsigned int inst_nbits, addr_nbits, data_nbits, data_idx;
- struct spi_transfer t[3] = {};
- struct spi_message m;
- int cmd_sz = m25p_cmdsz(nor);
- ssize_t ret;
+ struct spi_mem_op op =
+ SPI_MEM_OP(SPI_MEM_OP_CMD(nor->program_opcode, 1),
+ SPI_MEM_OP_ADDR(nor->addr_width, to, 1),
+ SPI_MEM_OP_DUMMY(0, 1),
+ SPI_MEM_OP_DATA_OUT(len, buf, 1));
+ size_t remaining = len;
+ int ret;
/* get transfer protocols. */
- inst_nbits = spi_nor_get_protocol_inst_nbits(nor->write_proto);
- addr_nbits = spi_nor_get_protocol_addr_nbits(nor->write_proto);
- data_nbits = spi_nor_get_protocol_data_nbits(nor->write_proto);
-
- spi_message_init(&m);
+ op.cmd.buswidth = spi_nor_get_protocol_inst_nbits(nor->write_proto);
+ op.addr.buswidth = spi_nor_get_protocol_addr_nbits(nor->write_proto);
+ op.dummy.buswidth = op.addr.buswidth;
+ op.data.buswidth = spi_nor_get_protocol_data_nbits(nor->write_proto);
if (nor->program_opcode == SPINOR_OP_AAI_WP && nor->sst_write_second)
- cmd_sz = 1;
-
- flash->command[0] = nor->program_opcode;
- m25p_addr2cmd(nor, to, flash->command);
+ op.addr.nbytes = 0;
- t[0].tx_buf = flash->command;
- t[0].tx_nbits = inst_nbits;
- t[0].len = cmd_sz;
- spi_message_add_tail(&t[0], &m);
-
- /* split the op code and address bytes into two transfers if needed. */
- data_idx = 1;
- if (addr_nbits != inst_nbits) {
- t[0].len = 1;
+ while (remaining) {
+ op.data.nbytes = remaining < UINT_MAX ? remaining : UINT_MAX;
+ ret = spi_mem_adjust_op_size(flash->spimem, &op);
+ if (ret)
+ return ret;
- t[1].tx_buf = &flash->command[1];
- t[1].tx_nbits = addr_nbits;
- t[1].len = cmd_sz - 1;
- spi_message_add_tail(&t[1], &m);
+ ret = spi_mem_exec_op(flash->spimem, &op);
+ if (ret)
+ return ret;
- data_idx = 2;
+ op.addr.val += op.data.nbytes;
+ remaining -= op.data.nbytes;
+ op.data.buf.out += op.data.nbytes;
}
- t[data_idx].tx_buf = buf;
- t[data_idx].tx_nbits = data_nbits;
- t[data_idx].len = len;
- spi_message_add_tail(&t[data_idx], &m);
-
- ret = spi_sync(spi, &m);
- if (ret)
- return ret;
-
- ret = m.actual_length - cmd_sz;
- if (ret < 0)
- return -EIO;
- return ret;
+ return len;
}
/*
@@ -138,92 +110,39 @@ static ssize_t m25p80_read(struct spi_nor *nor, loff_t from, size_t len,
u_char *buf)
{
struct m25p *flash = nor->priv;
- struct spi_device *spi = flash->spi;
- unsigned int inst_nbits, addr_nbits, data_nbits, data_idx;
- struct spi_transfer t[3];
- struct spi_message m;
- unsigned int dummy = nor->read_dummy;
- ssize_t ret;
- int cmd_sz;
+ struct spi_mem_op op =
+ SPI_MEM_OP(SPI_MEM_OP_CMD(nor->read_opcode, 1),
+ SPI_MEM_OP_ADDR(nor->addr_width, from, 1),
+ SPI_MEM_OP_DUMMY(nor->read_dummy, 1),
+ SPI_MEM_OP_DATA_IN(len, buf, 1));
+ size_t remaining = len;
+ int ret;
/* get transfer protocols. */
- inst_nbits = spi_nor_get_protocol_inst_nbits(nor->read_proto);
- addr_nbits = spi_nor_get_protocol_addr_nbits(nor->read_proto);
- data_nbits = spi_nor_get_protocol_data_nbits(nor->read_proto);
+ op.cmd.buswidth = spi_nor_get_protocol_inst_nbits(nor->read_proto);
+ op.addr.buswidth = spi_nor_get_protocol_addr_nbits(nor->read_proto);
+ op.dummy.buswidth = op.addr.buswidth;
+ op.data.buswidth = spi_nor_get_protocol_data_nbits(nor->read_proto);
/* convert the dummy cycles to the number of bytes */
- dummy = (dummy * addr_nbits) / 8;
-
- if (spi_flash_read_supported(spi)) {
- struct spi_flash_read_message msg;
-
- memset(&msg, 0, sizeof(msg));
+ op.dummy.nbytes = (nor->read_dummy * op.dummy.buswidth) / 8;
- msg.buf = buf;
- msg.from = from;
- msg.len = len;
- msg.read_opcode = nor->read_opcode;
- msg.addr_width = nor->addr_width;
- msg.dummy_bytes = dummy;
- msg.opcode_nbits = inst_nbits;
- msg.addr_nbits = addr_nbits;
- msg.data_nbits = data_nbits;
-
- ret = spi_flash_read(spi, &msg);
- if (ret < 0)
+ while (remaining) {
+ op.data.nbytes = remaining < UINT_MAX ? remaining : UINT_MAX;
+ ret = spi_mem_adjust_op_size(flash->spimem, &op);
+ if (ret)
return ret;
- return msg.retlen;
- }
- spi_message_init(&m);
- memset(t, 0, (sizeof t));
-
- flash->command[0] = nor->read_opcode;
- m25p_addr2cmd(nor, from, flash->command);
-
- t[0].tx_buf = flash->command;
- t[0].tx_nbits = inst_nbits;
- t[0].len = m25p_cmdsz(nor) + dummy;
- spi_message_add_tail(&t[0], &m);
-
- /*
- * Set all dummy/mode cycle bits to avoid sending some manufacturer
- * specific pattern, which might make the memory enter its Continuous
- * Read mode by mistake.
- * Based on the different mode cycle bit patterns listed and described
- * in the JESD216B specification, the 0xff value works for all memories
- * and all manufacturers.
- */
- cmd_sz = t[0].len;
- memset(flash->command + cmd_sz - dummy, 0xff, dummy);
-
- /* split the op code and address bytes into two transfers if needed. */
- data_idx = 1;
- if (addr_nbits != inst_nbits) {
- t[0].len = 1;
-
- t[1].tx_buf = &flash->command[1];
- t[1].tx_nbits = addr_nbits;
- t[1].len = cmd_sz - 1;
- spi_message_add_tail(&t[1], &m);
+ ret = spi_mem_exec_op(flash->spimem, &op);
+ if (ret)
+ return ret;
- data_idx = 2;
+ op.addr.val += op.data.nbytes;
+ remaining -= op.data.nbytes;
+ op.data.buf.in += op.data.nbytes;
}
- t[data_idx].rx_buf = buf;
- t[data_idx].rx_nbits = data_nbits;
- t[data_idx].len = min3(len, spi_max_transfer_size(spi),
- spi_max_message_size(spi) - cmd_sz);
- spi_message_add_tail(&t[data_idx], &m);
-
- ret = spi_sync(spi, &m);
- if (ret)
- return ret;
-
- ret = m.actual_length - cmd_sz;
- if (ret < 0)
- return -EIO;
- return ret;
+ return len;
}
/*
@@ -231,8 +150,9 @@ static ssize_t m25p80_read(struct spi_nor *nor, loff_t from, size_t len,
* matches what the READ command supports, at least until this driver
* understands FAST_READ (for clocks over 25 MHz).
*/
-static int m25p_probe(struct spi_device *spi)
+static int m25p_probe(struct spi_mem *spimem)
{
+ struct spi_device *spi = spimem->spi;
struct flash_platform_data *data;
struct m25p *flash;
struct spi_nor *nor;
@@ -244,9 +164,9 @@ static int m25p_probe(struct spi_device *spi)
char *flash_name;
int ret;
- data = dev_get_platdata(&spi->dev);
+ data = dev_get_platdata(&spimem->spi->dev);
- flash = devm_kzalloc(&spi->dev, sizeof(*flash), GFP_KERNEL);
+ flash = devm_kzalloc(&spimem->spi->dev, sizeof(*flash), GFP_KERNEL);
if (!flash)
return -ENOMEM;
@@ -258,12 +178,12 @@ static int m25p_probe(struct spi_device *spi)
nor->write_reg = m25p80_write_reg;
nor->read_reg = m25p80_read_reg;
- nor->dev = &spi->dev;
+ nor->dev = &spimem->spi->dev;
spi_nor_set_flash_node(nor, spi->dev.of_node);
nor->priv = flash;
- spi_set_drvdata(spi, flash);
- flash->spi = spi;
+ spi_mem_set_drvdata(spimem, flash);
+ flash->spimem = spimem;
if (spi->mode & SPI_RX_QUAD) {
hwcaps.mask |= SNOR_HWCAPS_READ_1_1_4;
@@ -303,9 +223,9 @@ static int m25p_probe(struct spi_device *spi)
}
-static int m25p_remove(struct spi_device *spi)
+static int m25p_remove(struct spi_mem *spimem)
{
- struct m25p *flash = spi_get_drvdata(spi);
+ struct m25p *flash = spi_mem_get_drvdata(spimem);
spi_nor_restore(&flash->spi_nor);
@@ -313,9 +233,9 @@ static int m25p_remove(struct spi_device *spi)
return mtd_device_unregister(&flash->spi_nor.mtd);
}
-static void m25p_shutdown(struct spi_device *spi)
+static void m25p_shutdown(struct spi_mem *spimem)
{
- struct m25p *flash = spi_get_drvdata(spi);
+ struct m25p *flash = spi_mem_get_drvdata(spimem);
spi_nor_restore(&flash->spi_nor);
}
@@ -386,12 +306,14 @@ static const struct of_device_id m25p_of_table[] = {
};
MODULE_DEVICE_TABLE(of, m25p_of_table);
-static struct spi_driver m25p80_driver = {
- .driver = {
- .name = "m25p80",
- .of_match_table = m25p_of_table,
+static struct spi_mem_driver m25p80_driver = {
+ .spidrv = {
+ .driver = {
+ .name = "m25p80",
+ .of_match_table = m25p_of_table,
+ },
+ .id_table = m25p_ids,
},
- .id_table = m25p_ids,
.probe = m25p_probe,
.remove = m25p_remove,
.shutdown = m25p_shutdown,
@@ -402,7 +324,7 @@ static struct spi_driver m25p80_driver = {
*/
};
-module_spi_driver(m25p80_driver);
+module_spi_mem_driver(m25p80_driver);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Mike Lavender");
diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c
index 16ae4ae8e8f9..29c0bfd74e8a 100644
--- a/drivers/mtd/mtd_blkdevs.c
+++ b/drivers/mtd/mtd_blkdevs.c
@@ -82,7 +82,6 @@ static blk_status_t do_blktrans_request(struct mtd_blktrans_ops *tr,
block = blk_rq_pos(req) << 9 >> tr->blkshift;
nsect = blk_rq_cur_bytes(req) >> tr->blkshift;
- buf = bio_data(req->bio);
if (req_op(req) == REQ_OP_FLUSH) {
if (tr->flush(dev))
@@ -100,9 +99,14 @@ static blk_status_t do_blktrans_request(struct mtd_blktrans_ops *tr,
return BLK_STS_IOERR;
return BLK_STS_OK;
case REQ_OP_READ:
- for (; nsect > 0; nsect--, block++, buf += tr->blksize)
- if (tr->readsect(dev, block, buf))
+ buf = kmap(bio_page(req->bio)) + bio_offset(req->bio);
+ for (; nsect > 0; nsect--, block++, buf += tr->blksize) {
+ if (tr->readsect(dev, block, buf)) {
+ kunmap(bio_page(req->bio));
return BLK_STS_IOERR;
+ }
+ }
+ kunmap(bio_page(req->bio));
rq_flush_dcache_pages(req);
return BLK_STS_OK;
case REQ_OP_WRITE:
@@ -110,9 +114,14 @@ static blk_status_t do_blktrans_request(struct mtd_blktrans_ops *tr,
return BLK_STS_IOERR;
rq_flush_dcache_pages(req);
- for (; nsect > 0; nsect--, block++, buf += tr->blksize)
- if (tr->writesect(dev, block, buf))
+ buf = kmap(bio_page(req->bio)) + bio_offset(req->bio);
+ for (; nsect > 0; nsect--, block++, buf += tr->blksize) {
+ if (tr->writesect(dev, block, buf)) {
+ kunmap(bio_page(req->bio));
return BLK_STS_IOERR;
+ }
+ }
+ kunmap(bio_page(req->bio));
return BLK_STS_OK;
default:
return BLK_STS_IOERR;
@@ -418,7 +427,6 @@ int add_mtd_blktrans_dev(struct mtd_blktrans_dev *new)
new->rq->queuedata = new;
blk_queue_logical_block_size(new->rq, tr->blksize);
- blk_queue_bounce_limit(new->rq, BLK_BOUNCE_HIGH);
blk_queue_flag_set(QUEUE_FLAG_NONROT, new->rq);
blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, new->rq);
diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c
index 807d17d863b3..64a1fcaafd9a 100644
--- a/drivers/mtd/mtdcore.c
+++ b/drivers/mtd/mtdcore.c
@@ -1829,18 +1829,6 @@ static int mtd_proc_show(struct seq_file *m, void *v)
mutex_unlock(&mtd_table_mutex);
return 0;
}
-
-static int mtd_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, mtd_proc_show, NULL);
-}
-
-static const struct file_operations mtd_proc_ops = {
- .open = mtd_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
#endif /* CONFIG_PROC_FS */
/*====================================================================*/
@@ -1883,7 +1871,7 @@ static int __init init_mtd(void)
goto err_bdi;
}
- proc_mtd = proc_create("mtd", 0, NULL, &mtd_proc_ops);
+ proc_mtd = proc_create_single("mtd", 0, NULL, mtd_proc_show);
ret = init_mtdchar();
if (ret)
diff --git a/drivers/mtd/nand/onenand/omap2.c b/drivers/mtd/nand/onenand/omap2.c
index 9c159f0dd9a6..321137158ff3 100644
--- a/drivers/mtd/nand/onenand/omap2.c
+++ b/drivers/mtd/nand/onenand/omap2.c
@@ -375,56 +375,42 @@ static int omap2_onenand_read_bufferram(struct mtd_info *mtd, int area,
{
struct omap2_onenand *c = container_of(mtd, struct omap2_onenand, mtd);
struct onenand_chip *this = mtd->priv;
- dma_addr_t dma_src, dma_dst;
- int bram_offset;
+ struct device *dev = &c->pdev->dev;
void *buf = (void *)buffer;
+ dma_addr_t dma_src, dma_dst;
+ int bram_offset, err;
size_t xtra;
- int ret;
bram_offset = omap2_onenand_bufferram_offset(mtd, area) + area + offset;
- if (bram_offset & 3 || (size_t)buf & 3 || count < 384)
- goto out_copy;
-
- /* panic_write() may be in an interrupt context */
- if (in_interrupt() || oops_in_progress)
+ /*
+ * If the buffer address is not DMA-able, len is not long enough to make
+ * DMA transfers profitable or panic_write() may be in an interrupt
+ * context fallback to PIO mode.
+ */
+ if (!virt_addr_valid(buf) || bram_offset & 3 || (size_t)buf & 3 ||
+ count < 384 || in_interrupt() || oops_in_progress )
goto out_copy;
- if (buf >= high_memory) {
- struct page *p1;
-
- if (((size_t)buf & PAGE_MASK) !=
- ((size_t)(buf + count - 1) & PAGE_MASK))
- goto out_copy;
- p1 = vmalloc_to_page(buf);
- if (!p1)
- goto out_copy;
- buf = page_address(p1) + ((size_t)buf & ~PAGE_MASK);
- }
-
xtra = count & 3;
if (xtra) {
count -= xtra;
memcpy(buf + count, this->base + bram_offset + count, xtra);
}
+ dma_dst = dma_map_single(dev, buf, count, DMA_FROM_DEVICE);
dma_src = c->phys_base + bram_offset;
- dma_dst = dma_map_single(&c->pdev->dev, buf, count, DMA_FROM_DEVICE);
- if (dma_mapping_error(&c->pdev->dev, dma_dst)) {
- dev_err(&c->pdev->dev,
- "Couldn't DMA map a %d byte buffer\n",
- count);
- goto out_copy;
- }
- ret = omap2_onenand_dma_transfer(c, dma_src, dma_dst, count);
- dma_unmap_single(&c->pdev->dev, dma_dst, count, DMA_FROM_DEVICE);
-
- if (ret) {
- dev_err(&c->pdev->dev, "timeout waiting for DMA\n");
+ if (dma_mapping_error(dev, dma_dst)) {
+ dev_err(dev, "Couldn't DMA map a %d byte buffer\n", count);
goto out_copy;
}
- return 0;
+ err = omap2_onenand_dma_transfer(c, dma_src, dma_dst, count);
+ dma_unmap_single(dev, dma_dst, count, DMA_FROM_DEVICE);
+ if (!err)
+ return 0;
+
+ dev_err(dev, "timeout waiting for DMA\n");
out_copy:
memcpy(buf, this->base + bram_offset, count);
@@ -437,49 +423,34 @@ static int omap2_onenand_write_bufferram(struct mtd_info *mtd, int area,
{
struct omap2_onenand *c = container_of(mtd, struct omap2_onenand, mtd);
struct onenand_chip *this = mtd->priv;
- dma_addr_t dma_src, dma_dst;
- int bram_offset;
+ struct device *dev = &c->pdev->dev;
void *buf = (void *)buffer;
- int ret;
+ dma_addr_t dma_src, dma_dst;
+ int bram_offset, err;
bram_offset = omap2_onenand_bufferram_offset(mtd, area) + area + offset;
- if (bram_offset & 3 || (size_t)buf & 3 || count < 384)
- goto out_copy;
-
- /* panic_write() may be in an interrupt context */
- if (in_interrupt() || oops_in_progress)
+ /*
+ * If the buffer address is not DMA-able, len is not long enough to make
+ * DMA transfers profitable or panic_write() may be in an interrupt
+ * context fallback to PIO mode.
+ */
+ if (!virt_addr_valid(buf) || bram_offset & 3 || (size_t)buf & 3 ||
+ count < 384 || in_interrupt() || oops_in_progress )
goto out_copy;
- if (buf >= high_memory) {
- struct page *p1;
-
- if (((size_t)buf & PAGE_MASK) !=
- ((size_t)(buf + count - 1) & PAGE_MASK))
- goto out_copy;
- p1 = vmalloc_to_page(buf);
- if (!p1)
- goto out_copy;
- buf = page_address(p1) + ((size_t)buf & ~PAGE_MASK);
- }
-
- dma_src = dma_map_single(&c->pdev->dev, buf, count, DMA_TO_DEVICE);
+ dma_src = dma_map_single(dev, buf, count, DMA_TO_DEVICE);
dma_dst = c->phys_base + bram_offset;
- if (dma_mapping_error(&c->pdev->dev, dma_src)) {
- dev_err(&c->pdev->dev,
- "Couldn't DMA map a %d byte buffer\n",
- count);
- return -1;
- }
-
- ret = omap2_onenand_dma_transfer(c, dma_src, dma_dst, count);
- dma_unmap_single(&c->pdev->dev, dma_src, count, DMA_TO_DEVICE);
-
- if (ret) {
- dev_err(&c->pdev->dev, "timeout waiting for DMA\n");
+ if (dma_mapping_error(dev, dma_src)) {
+ dev_err(dev, "Couldn't DMA map a %d byte buffer\n", count);
goto out_copy;
}
- return 0;
+ err = omap2_onenand_dma_transfer(c, dma_src, dma_dst, count);
+ dma_unmap_page(dev, dma_src, count, DMA_TO_DEVICE);
+ if (!err)
+ return 0;
+
+ dev_err(dev, "timeout waiting for DMA\n");
out_copy:
memcpy(this->base + bram_offset, buf, count);
diff --git a/drivers/mtd/nand/raw/marvell_nand.c b/drivers/mtd/nand/raw/marvell_nand.c
index 1d779a35ac8e..ebb1d141b900 100644
--- a/drivers/mtd/nand/raw/marvell_nand.c
+++ b/drivers/mtd/nand/raw/marvell_nand.c
@@ -1074,7 +1074,7 @@ static int marvell_nfc_hw_ecc_hmg_do_write_page(struct nand_chip *chip,
return ret;
ret = marvell_nfc_wait_op(chip,
- chip->data_interface.timings.sdr.tPROG_max);
+ PSEC_TO_MSEC(chip->data_interface.timings.sdr.tPROG_max));
return ret;
}
@@ -1194,11 +1194,13 @@ static void marvell_nfc_hw_ecc_bch_read_chunk(struct nand_chip *chip, int chunk,
NDCB0_CMD2(NAND_CMD_READSTART);
/*
- * Trigger the naked read operation only on the last chunk.
- * Otherwise, use monolithic read.
+ * Trigger the monolithic read on the first chunk, then naked read on
+ * intermediate chunks and finally a last naked read on the last chunk.
*/
- if (lt->nchunks == 1 || (chunk < lt->nchunks - 1))
+ if (chunk == 0)
nfc_op.ndcb[0] |= NDCB0_CMD_XTYPE(XTYPE_MONOLITHIC_RW);
+ else if (chunk < lt->nchunks - 1)
+ nfc_op.ndcb[0] |= NDCB0_CMD_XTYPE(XTYPE_NAKED_RW);
else
nfc_op.ndcb[0] |= NDCB0_CMD_XTYPE(XTYPE_LAST_NAKED_RW);
@@ -1408,6 +1410,7 @@ marvell_nfc_hw_ecc_bch_write_chunk(struct nand_chip *chip, int chunk,
struct marvell_nand_chip *marvell_nand = to_marvell_nand(chip);
struct marvell_nfc *nfc = to_marvell_nfc(chip->controller);
const struct marvell_hw_ecc_layout *lt = to_marvell_nand(chip)->layout;
+ u32 xtype;
int ret;
struct marvell_nfc_op nfc_op = {
.ndcb[0] = NDCB0_CMD_TYPE(TYPE_WRITE) | NDCB0_LEN_OVRD,
@@ -1423,7 +1426,12 @@ marvell_nfc_hw_ecc_bch_write_chunk(struct nand_chip *chip, int chunk,
* last naked write.
*/
if (chunk == 0) {
- nfc_op.ndcb[0] |= NDCB0_CMD_XTYPE(XTYPE_WRITE_DISPATCH) |
+ if (lt->nchunks == 1)
+ xtype = XTYPE_MONOLITHIC_RW;
+ else
+ xtype = XTYPE_WRITE_DISPATCH;
+
+ nfc_op.ndcb[0] |= NDCB0_CMD_XTYPE(xtype) |
NDCB0_ADDR_CYC(marvell_nand->addr_cyc) |
NDCB0_CMD1(NAND_CMD_SEQIN);
nfc_op.ndcb[1] |= NDCB1_ADDRS_PAGE(page);
@@ -1494,7 +1502,7 @@ static int marvell_nfc_hw_ecc_bch_write_page(struct mtd_info *mtd,
}
ret = marvell_nfc_wait_op(chip,
- chip->data_interface.timings.sdr.tPROG_max);
+ PSEC_TO_MSEC(chip->data_interface.timings.sdr.tPROG_max));
marvell_nfc_disable_hw_ecc(chip);
diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c
index 72f3a89da513..f28c3a555861 100644
--- a/drivers/mtd/nand/raw/nand_base.c
+++ b/drivers/mtd/nand/raw/nand_base.c
@@ -706,12 +706,17 @@ static void nand_wait_status_ready(struct mtd_info *mtd, unsigned long timeo)
*/
int nand_soft_waitrdy(struct nand_chip *chip, unsigned long timeout_ms)
{
+ const struct nand_sdr_timings *timings;
u8 status = 0;
int ret;
if (!chip->exec_op)
return -ENOTSUPP;
+ /* Wait tWB before polling the STATUS reg. */
+ timings = nand_get_sdr_timings(&chip->data_interface);
+ ndelay(PSEC_TO_NSEC(timings->tWB_max));
+
ret = nand_status_op(chip, NULL);
if (ret)
return ret;
diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c
index 1ed9529e7bd1..5eb0df2e5464 100644
--- a/drivers/net/bonding/bond_alb.c
+++ b/drivers/net/bonding/bond_alb.c
@@ -450,7 +450,7 @@ static void rlb_update_client(struct rlb_client_info *client_info)
{
int i;
- if (!client_info->slave)
+ if (!client_info->slave || !is_valid_ether_addr(client_info->mac_dst))
return;
for (i = 0; i < RLB_ARP_BURST_SIZE; i++) {
@@ -943,6 +943,10 @@ static void alb_send_lp_vid(struct slave *slave, u8 mac_addr[],
skb->priority = TC_PRIO_CONTROL;
skb->dev = slave->dev;
+ netdev_dbg(slave->bond->dev,
+ "Send learning packet: dev %s mac %pM vlan %d\n",
+ slave->dev->name, mac_addr, vid);
+
if (vid)
__vlan_hwaccel_put_tag(skb, vlan_proto, vid);
@@ -965,14 +969,13 @@ static int alb_upper_dev_walk(struct net_device *upper, void *_data)
u8 *mac_addr = data->mac_addr;
struct bond_vlan_tag *tags;
- if (is_vlan_dev(upper) && vlan_get_encap_level(upper) == 0) {
- if (strict_match &&
- ether_addr_equal_64bits(mac_addr,
- upper->dev_addr)) {
+ if (is_vlan_dev(upper) &&
+ bond->nest_level == vlan_get_encap_level(upper) - 1) {
+ if (upper->addr_assign_type == NET_ADDR_STOLEN) {
alb_send_lp_vid(slave, mac_addr,
vlan_dev_vlan_proto(upper),
vlan_dev_vlan_id(upper));
- } else if (!strict_match) {
+ } else {
alb_send_lp_vid(slave, upper->dev_addr,
vlan_dev_vlan_proto(upper),
vlan_dev_vlan_id(upper));
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 718e4914e3a0..1f1e97b26f95 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1738,6 +1738,8 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev,
if (bond_mode_uses_xmit_hash(bond))
bond_update_slave_arr(bond, NULL);
+ bond->nest_level = dev_get_nest_level(bond_dev);
+
netdev_info(bond_dev, "Enslaving %s as %s interface with %s link\n",
slave_dev->name,
bond_is_active_slave(new_slave) ? "an active" : "a backup",
diff --git a/drivers/net/bonding/bond_procfs.c b/drivers/net/bonding/bond_procfs.c
index 01059f1a7bca..9f7d83e827c3 100644
--- a/drivers/net/bonding/bond_procfs.c
+++ b/drivers/net/bonding/bond_procfs.c
@@ -10,7 +10,7 @@
static void *bond_info_seq_start(struct seq_file *seq, loff_t *pos)
__acquires(RCU)
{
- struct bonding *bond = seq->private;
+ struct bonding *bond = PDE_DATA(file_inode(seq->file));
struct list_head *iter;
struct slave *slave;
loff_t off = 0;
@@ -29,7 +29,7 @@ static void *bond_info_seq_start(struct seq_file *seq, loff_t *pos)
static void *bond_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
- struct bonding *bond = seq->private;
+ struct bonding *bond = PDE_DATA(file_inode(seq->file));
struct list_head *iter;
struct slave *slave;
bool found = false;
@@ -56,7 +56,7 @@ static void bond_info_seq_stop(struct seq_file *seq, void *v)
static void bond_info_show_master(struct seq_file *seq)
{
- struct bonding *bond = seq->private;
+ struct bonding *bond = PDE_DATA(file_inode(seq->file));
const struct bond_opt_value *optval;
struct slave *curr, *primary;
int i;
@@ -167,7 +167,7 @@ static void bond_info_show_master(struct seq_file *seq)
static void bond_info_show_slave(struct seq_file *seq,
const struct slave *slave)
{
- struct bonding *bond = seq->private;
+ struct bonding *bond = PDE_DATA(file_inode(seq->file));
seq_printf(seq, "\nSlave Interface: %s\n", slave->dev->name);
seq_printf(seq, "MII Status: %s\n", bond_slave_link_status(slave->link));
@@ -257,38 +257,14 @@ static const struct seq_operations bond_info_seq_ops = {
.show = bond_info_seq_show,
};
-static int bond_info_open(struct inode *inode, struct file *file)
-{
- struct seq_file *seq;
- int res;
-
- res = seq_open(file, &bond_info_seq_ops);
- if (!res) {
- /* recover the pointer buried in proc_dir_entry data */
- seq = file->private_data;
- seq->private = PDE_DATA(inode);
- }
-
- return res;
-}
-
-static const struct file_operations bond_info_fops = {
- .owner = THIS_MODULE,
- .open = bond_info_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
void bond_create_proc_entry(struct bonding *bond)
{
struct net_device *bond_dev = bond->dev;
struct bond_net *bn = net_generic(dev_net(bond_dev), bond_net_id);
if (bn->proc_dir) {
- bond->proc_entry = proc_create_data(bond_dev->name,
- 0444, bn->proc_dir,
- &bond_info_fops, bond);
+ bond->proc_entry = proc_create_seq_data(bond_dev->name, 0444,
+ bn->proc_dir, &bond_info_seq_ops, bond);
if (bond->proc_entry == NULL)
netdev_warn(bond_dev, "Cannot create /proc/net/%s/%s\n",
DRV_NAME, bond_dev->name);
diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c
index b1779566c5bb..3c71f1cb205f 100644
--- a/drivers/net/can/dev.c
+++ b/drivers/net/can/dev.c
@@ -605,7 +605,7 @@ void can_bus_off(struct net_device *dev)
{
struct can_priv *priv = netdev_priv(dev);
- netdev_dbg(dev, "bus-off\n");
+ netdev_info(dev, "bus-off\n");
netif_carrier_off(dev);
diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c
index 634c51e6b8ae..d53a45bf2a72 100644
--- a/drivers/net/can/flexcan.c
+++ b/drivers/net/can/flexcan.c
@@ -200,6 +200,7 @@
#define FLEXCAN_QUIRK_DISABLE_MECR BIT(4) /* Disable Memory error detection */
#define FLEXCAN_QUIRK_USE_OFF_TIMESTAMP BIT(5) /* Use timestamp based offloading */
#define FLEXCAN_QUIRK_BROKEN_PERR_STATE BIT(6) /* No interrupt for error passive */
+#define FLEXCAN_QUIRK_DEFAULT_BIG_ENDIAN BIT(7) /* default to BE register access */
/* Structure of the message buffer */
struct flexcan_mb {
@@ -288,6 +289,12 @@ struct flexcan_priv {
static const struct flexcan_devtype_data fsl_p1010_devtype_data = {
.quirks = FLEXCAN_QUIRK_BROKEN_WERR_STATE |
+ FLEXCAN_QUIRK_BROKEN_PERR_STATE |
+ FLEXCAN_QUIRK_DEFAULT_BIG_ENDIAN,
+};
+
+static const struct flexcan_devtype_data fsl_imx25_devtype_data = {
+ .quirks = FLEXCAN_QUIRK_BROKEN_WERR_STATE |
FLEXCAN_QUIRK_BROKEN_PERR_STATE,
};
@@ -1251,9 +1258,9 @@ static void unregister_flexcandev(struct net_device *dev)
static const struct of_device_id flexcan_of_match[] = {
{ .compatible = "fsl,imx6q-flexcan", .data = &fsl_imx6q_devtype_data, },
{ .compatible = "fsl,imx28-flexcan", .data = &fsl_imx28_devtype_data, },
- { .compatible = "fsl,imx53-flexcan", .data = &fsl_p1010_devtype_data, },
- { .compatible = "fsl,imx35-flexcan", .data = &fsl_p1010_devtype_data, },
- { .compatible = "fsl,imx25-flexcan", .data = &fsl_p1010_devtype_data, },
+ { .compatible = "fsl,imx53-flexcan", .data = &fsl_imx25_devtype_data, },
+ { .compatible = "fsl,imx35-flexcan", .data = &fsl_imx25_devtype_data, },
+ { .compatible = "fsl,imx25-flexcan", .data = &fsl_imx25_devtype_data, },
{ .compatible = "fsl,p1010-flexcan", .data = &fsl_p1010_devtype_data, },
{ .compatible = "fsl,vf610-flexcan", .data = &fsl_vf610_devtype_data, },
{ .compatible = "fsl,ls1021ar2-flexcan", .data = &fsl_ls1021a_r2_devtype_data, },
@@ -1337,18 +1344,13 @@ static int flexcan_probe(struct platform_device *pdev)
priv = netdev_priv(dev);
- if (of_property_read_bool(pdev->dev.of_node, "big-endian")) {
+ if (of_property_read_bool(pdev->dev.of_node, "big-endian") ||
+ devtype_data->quirks & FLEXCAN_QUIRK_DEFAULT_BIG_ENDIAN) {
priv->read = flexcan_read_be;
priv->write = flexcan_write_be;
} else {
- if (of_device_is_compatible(pdev->dev.of_node,
- "fsl,p1010-flexcan")) {
- priv->read = flexcan_read_be;
- priv->write = flexcan_write_be;
- } else {
- priv->read = flexcan_read_le;
- priv->write = flexcan_write_le;
- }
+ priv->read = flexcan_read_le;
+ priv->write = flexcan_write_le;
}
priv->can.clock.freq = clock_freq;
diff --git a/drivers/net/can/spi/hi311x.c b/drivers/net/can/spi/hi311x.c
index 5590c559a8ca..53e320c92a8b 100644
--- a/drivers/net/can/spi/hi311x.c
+++ b/drivers/net/can/spi/hi311x.c
@@ -91,6 +91,7 @@
#define HI3110_STAT_BUSOFF BIT(2)
#define HI3110_STAT_ERRP BIT(3)
#define HI3110_STAT_ERRW BIT(4)
+#define HI3110_STAT_TXMTY BIT(7)
#define HI3110_BTR0_SJW_SHIFT 6
#define HI3110_BTR0_BRP_SHIFT 0
@@ -427,8 +428,10 @@ static int hi3110_get_berr_counter(const struct net_device *net,
struct hi3110_priv *priv = netdev_priv(net);
struct spi_device *spi = priv->spi;
+ mutex_lock(&priv->hi3110_lock);
bec->txerr = hi3110_read(spi, HI3110_READ_TEC);
bec->rxerr = hi3110_read(spi, HI3110_READ_REC);
+ mutex_unlock(&priv->hi3110_lock);
return 0;
}
@@ -735,10 +738,7 @@ static irqreturn_t hi3110_can_ist(int irq, void *dev_id)
}
}
- if (intf == 0)
- break;
-
- if (intf & HI3110_INT_TXCPLT) {
+ if (priv->tx_len && statf & HI3110_STAT_TXMTY) {
net->stats.tx_packets++;
net->stats.tx_bytes += priv->tx_len - 1;
can_led_event(net, CAN_LED_EVENT_TX);
@@ -748,6 +748,9 @@ static irqreturn_t hi3110_can_ist(int irq, void *dev_id)
}
netif_wake_queue(net);
}
+
+ if (intf == 0)
+ break;
}
mutex_unlock(&priv->hi3110_lock);
return IRQ_HANDLED;
diff --git a/drivers/net/can/usb/kvaser_usb.c b/drivers/net/can/usb/kvaser_usb.c
index 63587b8e6825..daed57d3d209 100644
--- a/drivers/net/can/usb/kvaser_usb.c
+++ b/drivers/net/can/usb/kvaser_usb.c
@@ -1179,7 +1179,7 @@ static void kvaser_usb_rx_can_msg(const struct kvaser_usb *dev,
skb = alloc_can_skb(priv->netdev, &cf);
if (!skb) {
- stats->tx_dropped++;
+ stats->rx_dropped++;
return;
}
diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c
index 78616787f2a3..3da5fca77cbd 100644
--- a/drivers/net/dsa/b53/b53_common.c
+++ b/drivers/net/dsa/b53/b53_common.c
@@ -1712,6 +1712,18 @@ static const struct b53_chip_data b53_switch_chips[] = {
.duplex_reg = B53_DUPLEX_STAT_FE,
},
{
+ .chip_id = BCM5389_DEVICE_ID,
+ .dev_name = "BCM5389",
+ .vlans = 4096,
+ .enabled_ports = 0x1f,
+ .arl_entries = 4,
+ .cpu_port = B53_CPU_PORT,
+ .vta_regs = B53_VTA_REGS,
+ .duplex_reg = B53_DUPLEX_STAT_GE,
+ .jumbo_pm_reg = B53_JUMBO_PORT_MASK,
+ .jumbo_size_reg = B53_JUMBO_MAX_SIZE,
+ },
+ {
.chip_id = BCM5395_DEVICE_ID,
.dev_name = "BCM5395",
.vlans = 4096,
@@ -2034,6 +2046,7 @@ int b53_switch_detect(struct b53_device *dev)
else
dev->chip_id = BCM5365_DEVICE_ID;
break;
+ case BCM5389_DEVICE_ID:
case BCM5395_DEVICE_ID:
case BCM5397_DEVICE_ID:
case BCM5398_DEVICE_ID:
diff --git a/drivers/net/dsa/b53/b53_mdio.c b/drivers/net/dsa/b53/b53_mdio.c
index fa7556f5d4fb..a533a90e3904 100644
--- a/drivers/net/dsa/b53/b53_mdio.c
+++ b/drivers/net/dsa/b53/b53_mdio.c
@@ -285,6 +285,7 @@ static const struct b53_io_ops b53_mdio_ops = {
#define B53_BRCM_OUI_1 0x0143bc00
#define B53_BRCM_OUI_2 0x03625c00
#define B53_BRCM_OUI_3 0x00406000
+#define B53_BRCM_OUI_4 0x01410c00
static int b53_mdio_probe(struct mdio_device *mdiodev)
{
@@ -311,7 +312,8 @@ static int b53_mdio_probe(struct mdio_device *mdiodev)
*/
if ((phy_id & 0xfffffc00) != B53_BRCM_OUI_1 &&
(phy_id & 0xfffffc00) != B53_BRCM_OUI_2 &&
- (phy_id & 0xfffffc00) != B53_BRCM_OUI_3) {
+ (phy_id & 0xfffffc00) != B53_BRCM_OUI_3 &&
+ (phy_id & 0xfffffc00) != B53_BRCM_OUI_4) {
dev_err(&mdiodev->dev, "Unsupported device: 0x%08x\n", phy_id);
return -ENODEV;
}
@@ -360,6 +362,7 @@ static const struct of_device_id b53_of_match[] = {
{ .compatible = "brcm,bcm53125" },
{ .compatible = "brcm,bcm53128" },
{ .compatible = "brcm,bcm5365" },
+ { .compatible = "brcm,bcm5389" },
{ .compatible = "brcm,bcm5395" },
{ .compatible = "brcm,bcm5397" },
{ .compatible = "brcm,bcm5398" },
diff --git a/drivers/net/dsa/b53/b53_priv.h b/drivers/net/dsa/b53/b53_priv.h
index 1187ebd79287..3b57f47d0e79 100644
--- a/drivers/net/dsa/b53/b53_priv.h
+++ b/drivers/net/dsa/b53/b53_priv.h
@@ -48,6 +48,7 @@ struct b53_io_ops {
enum {
BCM5325_DEVICE_ID = 0x25,
BCM5365_DEVICE_ID = 0x65,
+ BCM5389_DEVICE_ID = 0x89,
BCM5395_DEVICE_ID = 0x95,
BCM5397_DEVICE_ID = 0x97,
BCM5398_DEVICE_ID = 0x98,
diff --git a/drivers/net/dsa/bcm_sf2_cfp.c b/drivers/net/dsa/bcm_sf2_cfp.c
index 23b45da784cb..b89acaee12d4 100644
--- a/drivers/net/dsa/bcm_sf2_cfp.c
+++ b/drivers/net/dsa/bcm_sf2_cfp.c
@@ -354,10 +354,13 @@ static int bcm_sf2_cfp_ipv4_rule_set(struct bcm_sf2_priv *priv, int port,
/* Locate the first rule available */
if (fs->location == RX_CLS_LOC_ANY)
rule_index = find_first_zero_bit(priv->cfp.used,
- bcm_sf2_cfp_rule_size(priv));
+ priv->num_cfp_rules);
else
rule_index = fs->location;
+ if (rule_index > bcm_sf2_cfp_rule_size(priv))
+ return -ENOSPC;
+
layout = &udf_tcpip4_layout;
/* We only use one UDF slice for now */
slice_num = bcm_sf2_get_slice_number(layout, 0);
@@ -562,19 +565,21 @@ static int bcm_sf2_cfp_ipv6_rule_set(struct bcm_sf2_priv *priv, int port,
* first half because the HW search is by incrementing addresses.
*/
if (fs->location == RX_CLS_LOC_ANY)
- rule_index[0] = find_first_zero_bit(priv->cfp.used,
- bcm_sf2_cfp_rule_size(priv));
+ rule_index[1] = find_first_zero_bit(priv->cfp.used,
+ priv->num_cfp_rules);
else
- rule_index[0] = fs->location;
+ rule_index[1] = fs->location;
+ if (rule_index[1] > bcm_sf2_cfp_rule_size(priv))
+ return -ENOSPC;
/* Flag it as used (cleared on error path) such that we can immediately
* obtain a second one to chain from.
*/
- set_bit(rule_index[0], priv->cfp.used);
+ set_bit(rule_index[1], priv->cfp.used);
- rule_index[1] = find_first_zero_bit(priv->cfp.used,
- bcm_sf2_cfp_rule_size(priv));
- if (rule_index[1] > bcm_sf2_cfp_rule_size(priv)) {
+ rule_index[0] = find_first_zero_bit(priv->cfp.used,
+ priv->num_cfp_rules);
+ if (rule_index[0] > bcm_sf2_cfp_rule_size(priv)) {
ret = -ENOSPC;
goto out_err;
}
@@ -712,14 +717,14 @@ static int bcm_sf2_cfp_ipv6_rule_set(struct bcm_sf2_priv *priv, int port,
/* Flag the second half rule as being used now, return it as the
* location, and flag it as unique while dumping rules
*/
- set_bit(rule_index[1], priv->cfp.used);
+ set_bit(rule_index[0], priv->cfp.used);
set_bit(rule_index[1], priv->cfp.unique);
fs->location = rule_index[1];
return ret;
out_err:
- clear_bit(rule_index[0], priv->cfp.used);
+ clear_bit(rule_index[1], priv->cfp.used);
return ret;
}
@@ -785,10 +790,6 @@ static int bcm_sf2_cfp_rule_del_one(struct bcm_sf2_priv *priv, int port,
int ret;
u32 reg;
- /* Refuse deletion of unused rules, and the default reserved rule */
- if (!test_bit(loc, priv->cfp.used) || loc == 0)
- return -EINVAL;
-
/* Indicate which rule we want to read */
bcm_sf2_cfp_rule_addr_set(priv, loc);
@@ -826,6 +827,13 @@ static int bcm_sf2_cfp_rule_del(struct bcm_sf2_priv *priv, int port,
u32 next_loc = 0;
int ret;
+ /* Refuse deleting unused rules, and those that are not unique since
+ * that could leave IPv6 rules with one of the chained rule in the
+ * table.
+ */
+ if (!test_bit(loc, priv->cfp.unique) || loc == 0)
+ return -EINVAL;
+
ret = bcm_sf2_cfp_rule_del_one(priv, port, loc, &next_loc);
if (ret)
return ret;
diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index 3d2091099f7f..5b4374f21d76 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -3370,6 +3370,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
.num_internal_phys = 5,
.max_vid = 4095,
.port_base_addr = 0x10,
+ .phy_base_addr = 0x0,
.global1_addr = 0x1b,
.global2_addr = 0x1c,
.age_time_coeff = 15000,
@@ -3391,6 +3392,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
.num_internal_phys = 0,
.max_vid = 4095,
.port_base_addr = 0x10,
+ .phy_base_addr = 0x0,
.global1_addr = 0x1b,
.global2_addr = 0x1c,
.age_time_coeff = 15000,
@@ -3410,6 +3412,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
.num_internal_phys = 8,
.max_vid = 4095,
.port_base_addr = 0x10,
+ .phy_base_addr = 0x0,
.global1_addr = 0x1b,
.global2_addr = 0x1c,
.age_time_coeff = 15000,
@@ -3431,6 +3434,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
.num_internal_phys = 5,
.max_vid = 4095,
.port_base_addr = 0x10,
+ .phy_base_addr = 0x0,
.global1_addr = 0x1b,
.global2_addr = 0x1c,
.age_time_coeff = 15000,
@@ -3452,6 +3456,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
.num_internal_phys = 0,
.max_vid = 4095,
.port_base_addr = 0x10,
+ .phy_base_addr = 0x0,
.global1_addr = 0x1b,
.global2_addr = 0x1c,
.age_time_coeff = 15000,
@@ -3472,6 +3477,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
.num_gpio = 11,
.max_vid = 4095,
.port_base_addr = 0x10,
+ .phy_base_addr = 0x10,
.global1_addr = 0x1b,
.global2_addr = 0x1c,
.age_time_coeff = 3750,
@@ -3493,6 +3499,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
.num_internal_phys = 5,
.max_vid = 4095,
.port_base_addr = 0x10,
+ .phy_base_addr = 0x0,
.global1_addr = 0x1b,
.global2_addr = 0x1c,
.age_time_coeff = 15000,
@@ -3514,6 +3521,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
.num_internal_phys = 0,
.max_vid = 4095,
.port_base_addr = 0x10,
+ .phy_base_addr = 0x0,
.global1_addr = 0x1b,
.global2_addr = 0x1c,
.age_time_coeff = 15000,
@@ -3535,6 +3543,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
.num_internal_phys = 5,
.max_vid = 4095,
.port_base_addr = 0x10,
+ .phy_base_addr = 0x0,
.global1_addr = 0x1b,
.global2_addr = 0x1c,
.age_time_coeff = 15000,
@@ -3557,6 +3566,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
.num_gpio = 15,
.max_vid = 4095,
.port_base_addr = 0x10,
+ .phy_base_addr = 0x0,
.global1_addr = 0x1b,
.global2_addr = 0x1c,
.age_time_coeff = 15000,
@@ -3578,6 +3588,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
.num_internal_phys = 5,
.max_vid = 4095,
.port_base_addr = 0x10,
+ .phy_base_addr = 0x0,
.global1_addr = 0x1b,
.global2_addr = 0x1c,
.age_time_coeff = 15000,
@@ -3600,6 +3611,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
.num_gpio = 15,
.max_vid = 4095,
.port_base_addr = 0x10,
+ .phy_base_addr = 0x0,
.global1_addr = 0x1b,
.global2_addr = 0x1c,
.age_time_coeff = 15000,
@@ -3621,6 +3633,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
.num_internal_phys = 0,
.max_vid = 4095,
.port_base_addr = 0x10,
+ .phy_base_addr = 0x0,
.global1_addr = 0x1b,
.global2_addr = 0x1c,
.age_time_coeff = 15000,
@@ -3641,6 +3654,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
.num_gpio = 16,
.max_vid = 8191,
.port_base_addr = 0x0,
+ .phy_base_addr = 0x0,
.global1_addr = 0x1b,
.global2_addr = 0x1c,
.tag_protocol = DSA_TAG_PROTO_DSA,
@@ -3663,6 +3677,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
.num_gpio = 16,
.max_vid = 8191,
.port_base_addr = 0x0,
+ .phy_base_addr = 0x0,
.global1_addr = 0x1b,
.global2_addr = 0x1c,
.age_time_coeff = 3750,
@@ -3684,6 +3699,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
.num_internal_phys = 11,
.max_vid = 8191,
.port_base_addr = 0x0,
+ .phy_base_addr = 0x0,
.global1_addr = 0x1b,
.global2_addr = 0x1c,
.age_time_coeff = 3750,
@@ -3707,6 +3723,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
.num_gpio = 15,
.max_vid = 4095,
.port_base_addr = 0x10,
+ .phy_base_addr = 0x0,
.global1_addr = 0x1b,
.global2_addr = 0x1c,
.age_time_coeff = 15000,
@@ -3730,6 +3747,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
.num_gpio = 16,
.max_vid = 8191,
.port_base_addr = 0x0,
+ .phy_base_addr = 0x0,
.global1_addr = 0x1b,
.global2_addr = 0x1c,
.age_time_coeff = 3750,
@@ -3753,6 +3771,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
.num_gpio = 15,
.max_vid = 4095,
.port_base_addr = 0x10,
+ .phy_base_addr = 0x0,
.global1_addr = 0x1b,
.global2_addr = 0x1c,
.age_time_coeff = 15000,
@@ -3776,6 +3795,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
.num_gpio = 15,
.max_vid = 4095,
.port_base_addr = 0x10,
+ .phy_base_addr = 0x0,
.global1_addr = 0x1b,
.global2_addr = 0x1c,
.age_time_coeff = 15000,
@@ -3798,6 +3818,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
.num_gpio = 11,
.max_vid = 4095,
.port_base_addr = 0x10,
+ .phy_base_addr = 0x10,
.global1_addr = 0x1b,
.global2_addr = 0x1c,
.age_time_coeff = 3750,
@@ -3820,6 +3841,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
.num_internal_phys = 5,
.max_vid = 4095,
.port_base_addr = 0x10,
+ .phy_base_addr = 0x0,
.global1_addr = 0x1b,
.global2_addr = 0x1c,
.age_time_coeff = 15000,
@@ -3841,6 +3863,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
.num_internal_phys = 5,
.max_vid = 4095,
.port_base_addr = 0x10,
+ .phy_base_addr = 0x0,
.global1_addr = 0x1b,
.global2_addr = 0x1c,
.age_time_coeff = 15000,
@@ -3863,6 +3886,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
.num_gpio = 15,
.max_vid = 4095,
.port_base_addr = 0x10,
+ .phy_base_addr = 0x0,
.global1_addr = 0x1b,
.global2_addr = 0x1c,
.age_time_coeff = 15000,
@@ -3885,6 +3909,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
.num_gpio = 16,
.max_vid = 8191,
.port_base_addr = 0x0,
+ .phy_base_addr = 0x0,
.global1_addr = 0x1b,
.global2_addr = 0x1c,
.age_time_coeff = 3750,
@@ -3907,6 +3932,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
.num_gpio = 16,
.max_vid = 8191,
.port_base_addr = 0x0,
+ .phy_base_addr = 0x0,
.global1_addr = 0x1b,
.global2_addr = 0x1c,
.age_time_coeff = 3750,
diff --git a/drivers/net/dsa/mv88e6xxx/chip.h b/drivers/net/dsa/mv88e6xxx/chip.h
index 80490f66bc06..12b7f4649b25 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.h
+++ b/drivers/net/dsa/mv88e6xxx/chip.h
@@ -114,6 +114,7 @@ struct mv88e6xxx_info {
unsigned int num_gpio;
unsigned int max_vid;
unsigned int port_base_addr;
+ unsigned int phy_base_addr;
unsigned int global1_addr;
unsigned int global2_addr;
unsigned int age_time_coeff;
diff --git a/drivers/net/dsa/mv88e6xxx/global2.c b/drivers/net/dsa/mv88e6xxx/global2.c
index 0ce627fded48..8d22d66d84b7 100644
--- a/drivers/net/dsa/mv88e6xxx/global2.c
+++ b/drivers/net/dsa/mv88e6xxx/global2.c
@@ -1118,7 +1118,7 @@ int mv88e6xxx_g2_irq_mdio_setup(struct mv88e6xxx_chip *chip,
err = irq;
goto out;
}
- bus->irq[chip->info->port_base_addr + phy] = irq;
+ bus->irq[chip->info->phy_base_addr + phy] = irq;
}
return 0;
out:
diff --git a/drivers/net/ethernet/3com/3c59x.c b/drivers/net/ethernet/3com/3c59x.c
index 36c8950dbd2d..176861bd2252 100644
--- a/drivers/net/ethernet/3com/3c59x.c
+++ b/drivers/net/ethernet/3com/3c59x.c
@@ -1212,9 +1212,9 @@ static int vortex_probe1(struct device *gendev, void __iomem *ioaddr, int irq,
vp->mii.reg_num_mask = 0x1f;
/* Makes sure rings are at least 16 byte aligned. */
- vp->rx_ring = pci_alloc_consistent(pdev, sizeof(struct boom_rx_desc) * RX_RING_SIZE
+ vp->rx_ring = dma_alloc_coherent(gendev, sizeof(struct boom_rx_desc) * RX_RING_SIZE
+ sizeof(struct boom_tx_desc) * TX_RING_SIZE,
- &vp->rx_ring_dma);
+ &vp->rx_ring_dma, GFP_KERNEL);
retval = -ENOMEM;
if (!vp->rx_ring)
goto free_device;
@@ -1476,11 +1476,10 @@ static int vortex_probe1(struct device *gendev, void __iomem *ioaddr, int irq,
return 0;
free_ring:
- pci_free_consistent(pdev,
- sizeof(struct boom_rx_desc) * RX_RING_SIZE
- + sizeof(struct boom_tx_desc) * TX_RING_SIZE,
- vp->rx_ring,
- vp->rx_ring_dma);
+ dma_free_coherent(&pdev->dev,
+ sizeof(struct boom_rx_desc) * RX_RING_SIZE +
+ sizeof(struct boom_tx_desc) * TX_RING_SIZE,
+ vp->rx_ring, vp->rx_ring_dma);
free_device:
free_netdev(dev);
pr_err(PFX "vortex_probe1 fails. Returns %d\n", retval);
@@ -1751,9 +1750,9 @@ vortex_open(struct net_device *dev)
break; /* Bad news! */
skb_reserve(skb, NET_IP_ALIGN); /* Align IP on 16 byte boundaries */
- dma = pci_map_single(VORTEX_PCI(vp), skb->data,
- PKT_BUF_SZ, PCI_DMA_FROMDEVICE);
- if (dma_mapping_error(&VORTEX_PCI(vp)->dev, dma))
+ dma = dma_map_single(vp->gendev, skb->data,
+ PKT_BUF_SZ, DMA_FROM_DEVICE);
+ if (dma_mapping_error(vp->gendev, dma))
break;
vp->rx_ring[i].addr = cpu_to_le32(dma);
}
@@ -2067,9 +2066,9 @@ vortex_start_xmit(struct sk_buff *skb, struct net_device *dev)
if (vp->bus_master) {
/* Set the bus-master controller to transfer the packet. */
int len = (skb->len + 3) & ~3;
- vp->tx_skb_dma = pci_map_single(VORTEX_PCI(vp), skb->data, len,
- PCI_DMA_TODEVICE);
- if (dma_mapping_error(&VORTEX_PCI(vp)->dev, vp->tx_skb_dma)) {
+ vp->tx_skb_dma = dma_map_single(vp->gendev, skb->data, len,
+ DMA_TO_DEVICE);
+ if (dma_mapping_error(vp->gendev, vp->tx_skb_dma)) {
dev_kfree_skb_any(skb);
dev->stats.tx_dropped++;
return NETDEV_TX_OK;
@@ -2168,9 +2167,9 @@ boomerang_start_xmit(struct sk_buff *skb, struct net_device *dev)
vp->tx_ring[entry].status = cpu_to_le32(skb->len | TxIntrUploaded | AddTCPChksum | AddUDPChksum);
if (!skb_shinfo(skb)->nr_frags) {
- dma_addr = pci_map_single(VORTEX_PCI(vp), skb->data, skb->len,
- PCI_DMA_TODEVICE);
- if (dma_mapping_error(&VORTEX_PCI(vp)->dev, dma_addr))
+ dma_addr = dma_map_single(vp->gendev, skb->data, skb->len,
+ DMA_TO_DEVICE);
+ if (dma_mapping_error(vp->gendev, dma_addr))
goto out_dma_err;
vp->tx_ring[entry].frag[0].addr = cpu_to_le32(dma_addr);
@@ -2178,9 +2177,9 @@ boomerang_start_xmit(struct sk_buff *skb, struct net_device *dev)
} else {
int i;
- dma_addr = pci_map_single(VORTEX_PCI(vp), skb->data,
- skb_headlen(skb), PCI_DMA_TODEVICE);
- if (dma_mapping_error(&VORTEX_PCI(vp)->dev, dma_addr))
+ dma_addr = dma_map_single(vp->gendev, skb->data,
+ skb_headlen(skb), DMA_TO_DEVICE);
+ if (dma_mapping_error(vp->gendev, dma_addr))
goto out_dma_err;
vp->tx_ring[entry].frag[0].addr = cpu_to_le32(dma_addr);
@@ -2189,21 +2188,21 @@ boomerang_start_xmit(struct sk_buff *skb, struct net_device *dev)
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
- dma_addr = skb_frag_dma_map(&VORTEX_PCI(vp)->dev, frag,
+ dma_addr = skb_frag_dma_map(vp->gendev, frag,
0,
frag->size,
DMA_TO_DEVICE);
- if (dma_mapping_error(&VORTEX_PCI(vp)->dev, dma_addr)) {
+ if (dma_mapping_error(vp->gendev, dma_addr)) {
for(i = i-1; i >= 0; i--)
- dma_unmap_page(&VORTEX_PCI(vp)->dev,
+ dma_unmap_page(vp->gendev,
le32_to_cpu(vp->tx_ring[entry].frag[i+1].addr),
le32_to_cpu(vp->tx_ring[entry].frag[i+1].length),
DMA_TO_DEVICE);
- pci_unmap_single(VORTEX_PCI(vp),
+ dma_unmap_single(vp->gendev,
le32_to_cpu(vp->tx_ring[entry].frag[0].addr),
le32_to_cpu(vp->tx_ring[entry].frag[0].length),
- PCI_DMA_TODEVICE);
+ DMA_TO_DEVICE);
goto out_dma_err;
}
@@ -2218,8 +2217,8 @@ boomerang_start_xmit(struct sk_buff *skb, struct net_device *dev)
}
}
#else
- dma_addr = pci_map_single(VORTEX_PCI(vp), skb->data, skb->len, PCI_DMA_TODEVICE);
- if (dma_mapping_error(&VORTEX_PCI(vp)->dev, dma_addr))
+ dma_addr = dma_map_single(vp->gendev, skb->data, skb->len, DMA_TO_DEVICE);
+ if (dma_mapping_error(vp->gendev, dma_addr))
goto out_dma_err;
vp->tx_ring[entry].addr = cpu_to_le32(dma_addr);
vp->tx_ring[entry].length = cpu_to_le32(skb->len | LAST_FRAG);
@@ -2254,7 +2253,7 @@ boomerang_start_xmit(struct sk_buff *skb, struct net_device *dev)
out:
return NETDEV_TX_OK;
out_dma_err:
- dev_err(&VORTEX_PCI(vp)->dev, "Error mapping dma buffer\n");
+ dev_err(vp->gendev, "Error mapping dma buffer\n");
goto out;
}
@@ -2322,7 +2321,7 @@ vortex_interrupt(int irq, void *dev_id)
if (status & DMADone) {
if (ioread16(ioaddr + Wn7_MasterStatus) & 0x1000) {
iowrite16(0x1000, ioaddr + Wn7_MasterStatus); /* Ack the event. */
- pci_unmap_single(VORTEX_PCI(vp), vp->tx_skb_dma, (vp->tx_skb->len + 3) & ~3, PCI_DMA_TODEVICE);
+ dma_unmap_single(vp->gendev, vp->tx_skb_dma, (vp->tx_skb->len + 3) & ~3, DMA_TO_DEVICE);
pkts_compl++;
bytes_compl += vp->tx_skb->len;
dev_kfree_skb_irq(vp->tx_skb); /* Release the transferred buffer */
@@ -2459,19 +2458,19 @@ boomerang_interrupt(int irq, void *dev_id)
struct sk_buff *skb = vp->tx_skbuff[entry];
#if DO_ZEROCOPY
int i;
- pci_unmap_single(VORTEX_PCI(vp),
+ dma_unmap_single(vp->gendev,
le32_to_cpu(vp->tx_ring[entry].frag[0].addr),
le32_to_cpu(vp->tx_ring[entry].frag[0].length)&0xFFF,
- PCI_DMA_TODEVICE);
+ DMA_TO_DEVICE);
for (i=1; i<=skb_shinfo(skb)->nr_frags; i++)
- pci_unmap_page(VORTEX_PCI(vp),
+ dma_unmap_page(vp->gendev,
le32_to_cpu(vp->tx_ring[entry].frag[i].addr),
le32_to_cpu(vp->tx_ring[entry].frag[i].length)&0xFFF,
- PCI_DMA_TODEVICE);
+ DMA_TO_DEVICE);
#else
- pci_unmap_single(VORTEX_PCI(vp),
- le32_to_cpu(vp->tx_ring[entry].addr), skb->len, PCI_DMA_TODEVICE);
+ dma_unmap_single(vp->gendev,
+ le32_to_cpu(vp->tx_ring[entry].addr), skb->len, DMA_TO_DEVICE);
#endif
pkts_compl++;
bytes_compl += skb->len;
@@ -2561,14 +2560,14 @@ static int vortex_rx(struct net_device *dev)
/* 'skb_put()' points to the start of sk_buff data area. */
if (vp->bus_master &&
! (ioread16(ioaddr + Wn7_MasterStatus) & 0x8000)) {
- dma_addr_t dma = pci_map_single(VORTEX_PCI(vp), skb_put(skb, pkt_len),
- pkt_len, PCI_DMA_FROMDEVICE);
+ dma_addr_t dma = dma_map_single(vp->gendev, skb_put(skb, pkt_len),
+ pkt_len, DMA_FROM_DEVICE);
iowrite32(dma, ioaddr + Wn7_MasterAddr);
iowrite16((skb->len + 3) & ~3, ioaddr + Wn7_MasterLen);
iowrite16(StartDMAUp, ioaddr + EL3_CMD);
while (ioread16(ioaddr + Wn7_MasterStatus) & 0x8000)
;
- pci_unmap_single(VORTEX_PCI(vp), dma, pkt_len, PCI_DMA_FROMDEVICE);
+ dma_unmap_single(vp->gendev, dma, pkt_len, DMA_FROM_DEVICE);
} else {
ioread32_rep(ioaddr + RX_FIFO,
skb_put(skb, pkt_len),
@@ -2635,11 +2634,11 @@ boomerang_rx(struct net_device *dev)
if (pkt_len < rx_copybreak &&
(skb = netdev_alloc_skb(dev, pkt_len + 2)) != NULL) {
skb_reserve(skb, 2); /* Align IP on 16 byte boundaries */
- pci_dma_sync_single_for_cpu(VORTEX_PCI(vp), dma, PKT_BUF_SZ, PCI_DMA_FROMDEVICE);
+ dma_sync_single_for_cpu(vp->gendev, dma, PKT_BUF_SZ, DMA_FROM_DEVICE);
/* 'skb_put()' points to the start of sk_buff data area. */
skb_put_data(skb, vp->rx_skbuff[entry]->data,
pkt_len);
- pci_dma_sync_single_for_device(VORTEX_PCI(vp), dma, PKT_BUF_SZ, PCI_DMA_FROMDEVICE);
+ dma_sync_single_for_device(vp->gendev, dma, PKT_BUF_SZ, DMA_FROM_DEVICE);
vp->rx_copy++;
} else {
/* Pre-allocate the replacement skb. If it or its
@@ -2651,9 +2650,9 @@ boomerang_rx(struct net_device *dev)
dev->stats.rx_dropped++;
goto clear_complete;
}
- newdma = pci_map_single(VORTEX_PCI(vp), newskb->data,
- PKT_BUF_SZ, PCI_DMA_FROMDEVICE);
- if (dma_mapping_error(&VORTEX_PCI(vp)->dev, newdma)) {
+ newdma = dma_map_single(vp->gendev, newskb->data,
+ PKT_BUF_SZ, DMA_FROM_DEVICE);
+ if (dma_mapping_error(vp->gendev, newdma)) {
dev->stats.rx_dropped++;
consume_skb(newskb);
goto clear_complete;
@@ -2664,7 +2663,7 @@ boomerang_rx(struct net_device *dev)
vp->rx_skbuff[entry] = newskb;
vp->rx_ring[entry].addr = cpu_to_le32(newdma);
skb_put(skb, pkt_len);
- pci_unmap_single(VORTEX_PCI(vp), dma, PKT_BUF_SZ, PCI_DMA_FROMDEVICE);
+ dma_unmap_single(vp->gendev, dma, PKT_BUF_SZ, DMA_FROM_DEVICE);
vp->rx_nocopy++;
}
skb->protocol = eth_type_trans(skb, dev);
@@ -2761,8 +2760,8 @@ vortex_close(struct net_device *dev)
if (vp->full_bus_master_rx) { /* Free Boomerang bus master Rx buffers. */
for (i = 0; i < RX_RING_SIZE; i++)
if (vp->rx_skbuff[i]) {
- pci_unmap_single( VORTEX_PCI(vp), le32_to_cpu(vp->rx_ring[i].addr),
- PKT_BUF_SZ, PCI_DMA_FROMDEVICE);
+ dma_unmap_single(vp->gendev, le32_to_cpu(vp->rx_ring[i].addr),
+ PKT_BUF_SZ, DMA_FROM_DEVICE);
dev_kfree_skb(vp->rx_skbuff[i]);
vp->rx_skbuff[i] = NULL;
}
@@ -2775,12 +2774,12 @@ vortex_close(struct net_device *dev)
int k;
for (k=0; k<=skb_shinfo(skb)->nr_frags; k++)
- pci_unmap_single(VORTEX_PCI(vp),
+ dma_unmap_single(vp->gendev,
le32_to_cpu(vp->tx_ring[i].frag[k].addr),
le32_to_cpu(vp->tx_ring[i].frag[k].length)&0xFFF,
- PCI_DMA_TODEVICE);
+ DMA_TO_DEVICE);
#else
- pci_unmap_single(VORTEX_PCI(vp), le32_to_cpu(vp->tx_ring[i].addr), skb->len, PCI_DMA_TODEVICE);
+ dma_unmap_single(vp->gendev, le32_to_cpu(vp->tx_ring[i].addr), skb->len, DMA_TO_DEVICE);
#endif
dev_kfree_skb(skb);
vp->tx_skbuff[i] = NULL;
@@ -3288,11 +3287,10 @@ static void vortex_remove_one(struct pci_dev *pdev)
pci_iounmap(pdev, vp->ioaddr);
- pci_free_consistent(pdev,
- sizeof(struct boom_rx_desc) * RX_RING_SIZE
- + sizeof(struct boom_tx_desc) * TX_RING_SIZE,
- vp->rx_ring,
- vp->rx_ring_dma);
+ dma_free_coherent(&pdev->dev,
+ sizeof(struct boom_rx_desc) * RX_RING_SIZE +
+ sizeof(struct boom_tx_desc) * TX_RING_SIZE,
+ vp->rx_ring, vp->rx_ring_dma);
pci_release_regions(pdev);
diff --git a/drivers/net/ethernet/8390/ne.c b/drivers/net/ethernet/8390/ne.c
index ac99d089ac72..1c97e39b478e 100644
--- a/drivers/net/ethernet/8390/ne.c
+++ b/drivers/net/ethernet/8390/ne.c
@@ -164,7 +164,9 @@ bad_clone_list[] __initdata = {
#define NESM_START_PG 0x40 /* First page of TX buffer */
#define NESM_STOP_PG 0x80 /* Last page +1 of RX ring */
-#if defined(CONFIG_ATARI) /* 8-bit mode on Atari, normal on Q40 */
+#if defined(CONFIG_MACH_TX49XX)
+# define DCR_VAL 0x48 /* 8-bit mode */
+#elif defined(CONFIG_ATARI) /* 8-bit mode on Atari, normal on Q40 */
# define DCR_VAL (MACH_IS_ATARI ? 0x48 : 0x49)
#else
# define DCR_VAL 0x49
diff --git a/drivers/net/ethernet/amd/pcnet32.c b/drivers/net/ethernet/amd/pcnet32.c
index a561705f232c..be198cc0b10c 100644
--- a/drivers/net/ethernet/amd/pcnet32.c
+++ b/drivers/net/ethernet/amd/pcnet32.c
@@ -1552,22 +1552,26 @@ pcnet32_probe_pci(struct pci_dev *pdev, const struct pci_device_id *ent)
if (!ioaddr) {
if (pcnet32_debug & NETIF_MSG_PROBE)
pr_err("card has no PCI IO resources, aborting\n");
- return -ENODEV;
+ err = -ENODEV;
+ goto err_disable_dev;
}
err = pci_set_dma_mask(pdev, PCNET32_DMA_MASK);
if (err) {
if (pcnet32_debug & NETIF_MSG_PROBE)
pr_err("architecture does not support 32bit PCI busmaster DMA\n");
- return err;
+ goto err_disable_dev;
}
if (!request_region(ioaddr, PCNET32_TOTAL_SIZE, "pcnet32_probe_pci")) {
if (pcnet32_debug & NETIF_MSG_PROBE)
pr_err("io address range already allocated\n");
- return -EBUSY;
+ err = -EBUSY;
+ goto err_disable_dev;
}
err = pcnet32_probe1(ioaddr, 1, pdev);
+
+err_disable_dev:
if (err < 0)
pci_disable_device(pdev);
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
index 32f6d2e24d66..1a1a6380c128 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
@@ -95,6 +95,7 @@ void aq_nic_cfg_start(struct aq_nic_s *self)
/*rss rings */
cfg->vecs = min(cfg->aq_hw_caps->vecs, AQ_CFG_VECS_DEF);
cfg->vecs = min(cfg->vecs, num_online_cpus());
+ cfg->vecs = min(cfg->vecs, self->irqvecs);
/* cfg->vecs should be power of 2 for RSS */
if (cfg->vecs >= 8U)
cfg->vecs = 8U;
@@ -246,6 +247,8 @@ void aq_nic_ndev_init(struct aq_nic_s *self)
self->ndev->hw_features |= aq_hw_caps->hw_features;
self->ndev->features = aq_hw_caps->hw_features;
+ self->ndev->vlan_features |= NETIF_F_HW_CSUM | NETIF_F_RXCSUM |
+ NETIF_F_RXHASH | NETIF_F_SG | NETIF_F_LRO;
self->ndev->priv_flags = aq_hw_caps->hw_priv_flags;
self->ndev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.h b/drivers/net/ethernet/aquantia/atlantic/aq_nic.h
index 219b550d1665..faa533a0ec47 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.h
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.h
@@ -80,6 +80,7 @@ struct aq_nic_s {
struct pci_dev *pdev;
unsigned int msix_entry_mask;
+ u32 irqvecs;
};
static inline struct device *aq_nic_get_dev(struct aq_nic_s *self)
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c
index ecc6306f940f..a50e08bb4748 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c
@@ -267,16 +267,16 @@ static int aq_pci_probe(struct pci_dev *pdev,
numvecs = min(numvecs, num_online_cpus());
/*enable interrupts */
#if !AQ_CFG_FORCE_LEGACY_INT
- err = pci_alloc_irq_vectors(self->pdev, numvecs, numvecs,
- PCI_IRQ_MSIX);
-
- if (err < 0) {
- err = pci_alloc_irq_vectors(self->pdev, 1, 1,
- PCI_IRQ_MSI | PCI_IRQ_LEGACY);
- if (err < 0)
- goto err_hwinit;
+ numvecs = pci_alloc_irq_vectors(self->pdev, 1, numvecs,
+ PCI_IRQ_MSIX | PCI_IRQ_MSI |
+ PCI_IRQ_LEGACY);
+
+ if (numvecs < 0) {
+ err = numvecs;
+ goto err_hwinit;
}
#endif
+ self->irqvecs = numvecs;
/* net device init */
aq_nic_cfg_start(self);
@@ -298,9 +298,9 @@ err_free_aq_hw:
kfree(self->aq_hw);
err_ioremap:
free_netdev(ndev);
-err_pci_func:
- pci_release_regions(pdev);
err_ndev:
+ pci_release_regions(pdev);
+err_pci_func:
pci_disable_device(pdev);
return err;
}
diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c
index f9a3c1a76d5d..f33b25fbca63 100644
--- a/drivers/net/ethernet/broadcom/bcmsysport.c
+++ b/drivers/net/ethernet/broadcom/bcmsysport.c
@@ -2144,14 +2144,21 @@ static const struct net_device_ops bcm_sysport_netdev_ops = {
.ndo_select_queue = bcm_sysport_select_queue,
};
-static int bcm_sysport_map_queues(struct net_device *dev,
+static int bcm_sysport_map_queues(struct notifier_block *nb,
struct dsa_notifier_register_info *info)
{
- struct bcm_sysport_priv *priv = netdev_priv(dev);
struct bcm_sysport_tx_ring *ring;
+ struct bcm_sysport_priv *priv;
struct net_device *slave_dev;
unsigned int num_tx_queues;
unsigned int q, start, port;
+ struct net_device *dev;
+
+ priv = container_of(nb, struct bcm_sysport_priv, dsa_notifier);
+ if (priv->netdev != info->master)
+ return 0;
+
+ dev = info->master;
/* We can't be setting up queue inspection for non directly attached
* switches
@@ -2174,11 +2181,12 @@ static int bcm_sysport_map_queues(struct net_device *dev,
if (priv->is_lite)
netif_set_real_num_tx_queues(slave_dev,
slave_dev->num_tx_queues / 2);
+
num_tx_queues = slave_dev->real_num_tx_queues;
if (priv->per_port_num_tx_queues &&
priv->per_port_num_tx_queues != num_tx_queues)
- netdev_warn(slave_dev, "asymetric number of per-port queues\n");
+ netdev_warn(slave_dev, "asymmetric number of per-port queues\n");
priv->per_port_num_tx_queues = num_tx_queues;
@@ -2201,7 +2209,7 @@ static int bcm_sysport_map_queues(struct net_device *dev,
return 0;
}
-static int bcm_sysport_dsa_notifier(struct notifier_block *unused,
+static int bcm_sysport_dsa_notifier(struct notifier_block *nb,
unsigned long event, void *ptr)
{
struct dsa_notifier_register_info *info;
@@ -2211,7 +2219,7 @@ static int bcm_sysport_dsa_notifier(struct notifier_block *unused,
info = ptr;
- return notifier_from_errno(bcm_sysport_map_queues(info->master, info));
+ return notifier_from_errno(bcm_sysport_map_queues(nb, info));
}
#define REV_FMT "v%2x.%02x"
diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
index 08bbb639be1a..9f59b1270a7c 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -8733,14 +8733,15 @@ static void tg3_free_consistent(struct tg3 *tp)
tg3_mem_rx_release(tp);
tg3_mem_tx_release(tp);
- /* Protect tg3_get_stats64() from reading freed tp->hw_stats. */
- tg3_full_lock(tp, 0);
+ /* tp->hw_stats can be referenced safely:
+ * 1. under rtnl_lock
+ * 2. or under tp->lock if TG3_FLAG_INIT_COMPLETE is set.
+ */
if (tp->hw_stats) {
dma_free_coherent(&tp->pdev->dev, sizeof(struct tg3_hw_stats),
tp->hw_stats, tp->stats_mapping);
tp->hw_stats = NULL;
}
- tg3_full_unlock(tp);
}
/*
@@ -14178,7 +14179,7 @@ static void tg3_get_stats64(struct net_device *dev,
struct tg3 *tp = netdev_priv(dev);
spin_lock_bh(&tp->lock);
- if (!tp->hw_stats) {
+ if (!tp->hw_stats || !tg3_flag(tp, INIT_COMPLETE)) {
*stats = tp->net_stats_prev;
spin_unlock_bh(&tp->lock);
return;
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h b/drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h
index b57acb8dc35b..dc25066c59a1 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h
@@ -419,15 +419,15 @@ static const u32 t6_up_cim_reg_array[][IREG_NUM_ELEM + 1] = {
{0x7b50, 0x7b54, 0x280, 0x20, 0}, /* up_cim_280_to_2fc */
{0x7b50, 0x7b54, 0x300, 0x20, 0}, /* up_cim_300_to_37c */
{0x7b50, 0x7b54, 0x380, 0x14, 0}, /* up_cim_380_to_3cc */
- {0x7b50, 0x7b54, 0x2900, 0x4, 0x4}, /* up_cim_2900_to_3d40 */
- {0x7b50, 0x7b54, 0x2904, 0x4, 0x4}, /* up_cim_2904_to_3d44 */
- {0x7b50, 0x7b54, 0x2908, 0x4, 0x4}, /* up_cim_2908_to_3d48 */
- {0x7b50, 0x7b54, 0x2910, 0x4, 0x4}, /* up_cim_2910_to_3d4c */
- {0x7b50, 0x7b54, 0x2914, 0x4, 0x4}, /* up_cim_2914_to_3d50 */
- {0x7b50, 0x7b54, 0x2920, 0x10, 0x10}, /* up_cim_2920_to_2a10 */
- {0x7b50, 0x7b54, 0x2924, 0x10, 0x10}, /* up_cim_2924_to_2a14 */
- {0x7b50, 0x7b54, 0x2928, 0x10, 0x10}, /* up_cim_2928_to_2a18 */
- {0x7b50, 0x7b54, 0x292c, 0x10, 0x10}, /* up_cim_292c_to_2a1c */
+ {0x7b50, 0x7b54, 0x4900, 0x4, 0x4}, /* up_cim_4900_to_4c60 */
+ {0x7b50, 0x7b54, 0x4904, 0x4, 0x4}, /* up_cim_4904_to_4c64 */
+ {0x7b50, 0x7b54, 0x4908, 0x4, 0x4}, /* up_cim_4908_to_4c68 */
+ {0x7b50, 0x7b54, 0x4910, 0x4, 0x4}, /* up_cim_4910_to_4c70 */
+ {0x7b50, 0x7b54, 0x4914, 0x4, 0x4}, /* up_cim_4914_to_4c74 */
+ {0x7b50, 0x7b54, 0x4920, 0x10, 0x10}, /* up_cim_4920_to_4a10 */
+ {0x7b50, 0x7b54, 0x4924, 0x10, 0x10}, /* up_cim_4924_to_4a14 */
+ {0x7b50, 0x7b54, 0x4928, 0x10, 0x10}, /* up_cim_4928_to_4a18 */
+ {0x7b50, 0x7b54, 0x492c, 0x10, 0x10}, /* up_cim_492c_to_4a1c */
};
static const u32 t5_up_cim_reg_array[][IREG_NUM_ELEM + 1] = {
@@ -444,16 +444,6 @@ static const u32 t5_up_cim_reg_array[][IREG_NUM_ELEM + 1] = {
{0x7b50, 0x7b54, 0x280, 0x20, 0}, /* up_cim_280_to_2fc */
{0x7b50, 0x7b54, 0x300, 0x20, 0}, /* up_cim_300_to_37c */
{0x7b50, 0x7b54, 0x380, 0x14, 0}, /* up_cim_380_to_3cc */
- {0x7b50, 0x7b54, 0x2900, 0x4, 0x4}, /* up_cim_2900_to_3d40 */
- {0x7b50, 0x7b54, 0x2904, 0x4, 0x4}, /* up_cim_2904_to_3d44 */
- {0x7b50, 0x7b54, 0x2908, 0x4, 0x4}, /* up_cim_2908_to_3d48 */
- {0x7b50, 0x7b54, 0x2910, 0x4, 0x4}, /* up_cim_2910_to_3d4c */
- {0x7b50, 0x7b54, 0x2914, 0x4, 0x4}, /* up_cim_2914_to_3d50 */
- {0x7b50, 0x7b54, 0x2918, 0x4, 0x4}, /* up_cim_2918_to_3d54 */
- {0x7b50, 0x7b54, 0x291c, 0x4, 0x4}, /* up_cim_291c_to_3d58 */
- {0x7b50, 0x7b54, 0x2924, 0x10, 0x10}, /* up_cim_2924_to_2914 */
- {0x7b50, 0x7b54, 0x2928, 0x10, 0x10}, /* up_cim_2928_to_2a18 */
- {0x7b50, 0x7b54, 0x292c, 0x10, 0x10}, /* up_cim_292c_to_2a1c */
};
static const u32 t6_hma_ireg_array[][IREG_NUM_ELEM] = {
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
index db92f1858060..b76447baccaf 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
@@ -836,7 +836,7 @@ bool is_filter_exact_match(struct adapter *adap,
{
struct tp_params *tp = &adap->params.tp;
u64 hash_filter_mask = tp->hash_filter_mask;
- u32 mask;
+ u64 ntuple_mask = 0;
if (!is_hashfilter(adap))
return false;
@@ -865,73 +865,45 @@ bool is_filter_exact_match(struct adapter *adap,
if (!fs->val.fport || fs->mask.fport != 0xffff)
return false;
- if (tp->fcoe_shift >= 0) {
- mask = (hash_filter_mask >> tp->fcoe_shift) & FT_FCOE_W;
- if (mask && !fs->mask.fcoe)
- return false;
- }
+ /* calculate tuple mask and compare with mask configured in hw */
+ if (tp->fcoe_shift >= 0)
+ ntuple_mask |= (u64)fs->mask.fcoe << tp->fcoe_shift;
- if (tp->port_shift >= 0) {
- mask = (hash_filter_mask >> tp->port_shift) & FT_PORT_W;
- if (mask && !fs->mask.iport)
- return false;
- }
+ if (tp->port_shift >= 0)
+ ntuple_mask |= (u64)fs->mask.iport << tp->port_shift;
if (tp->vnic_shift >= 0) {
- mask = (hash_filter_mask >> tp->vnic_shift) & FT_VNIC_ID_W;
-
- if ((adap->params.tp.ingress_config & VNIC_F)) {
- if (mask && !fs->mask.pfvf_vld)
- return false;
- } else {
- if (mask && !fs->mask.ovlan_vld)
- return false;
- }
+ if ((adap->params.tp.ingress_config & VNIC_F))
+ ntuple_mask |= (u64)fs->mask.pfvf_vld << tp->vnic_shift;
+ else
+ ntuple_mask |= (u64)fs->mask.ovlan_vld <<
+ tp->vnic_shift;
}
- if (tp->vlan_shift >= 0) {
- mask = (hash_filter_mask >> tp->vlan_shift) & FT_VLAN_W;
- if (mask && !fs->mask.ivlan)
- return false;
- }
+ if (tp->vlan_shift >= 0)
+ ntuple_mask |= (u64)fs->mask.ivlan << tp->vlan_shift;
- if (tp->tos_shift >= 0) {
- mask = (hash_filter_mask >> tp->tos_shift) & FT_TOS_W;
- if (mask && !fs->mask.tos)
- return false;
- }
+ if (tp->tos_shift >= 0)
+ ntuple_mask |= (u64)fs->mask.tos << tp->tos_shift;
- if (tp->protocol_shift >= 0) {
- mask = (hash_filter_mask >> tp->protocol_shift) & FT_PROTOCOL_W;
- if (mask && !fs->mask.proto)
- return false;
- }
+ if (tp->protocol_shift >= 0)
+ ntuple_mask |= (u64)fs->mask.proto << tp->protocol_shift;
- if (tp->ethertype_shift >= 0) {
- mask = (hash_filter_mask >> tp->ethertype_shift) &
- FT_ETHERTYPE_W;
- if (mask && !fs->mask.ethtype)
- return false;
- }
+ if (tp->ethertype_shift >= 0)
+ ntuple_mask |= (u64)fs->mask.ethtype << tp->ethertype_shift;
- if (tp->macmatch_shift >= 0) {
- mask = (hash_filter_mask >> tp->macmatch_shift) & FT_MACMATCH_W;
- if (mask && !fs->mask.macidx)
- return false;
- }
+ if (tp->macmatch_shift >= 0)
+ ntuple_mask |= (u64)fs->mask.macidx << tp->macmatch_shift;
+
+ if (tp->matchtype_shift >= 0)
+ ntuple_mask |= (u64)fs->mask.matchtype << tp->matchtype_shift;
+
+ if (tp->frag_shift >= 0)
+ ntuple_mask |= (u64)fs->mask.frag << tp->frag_shift;
+
+ if (ntuple_mask != hash_filter_mask)
+ return false;
- if (tp->matchtype_shift >= 0) {
- mask = (hash_filter_mask >> tp->matchtype_shift) &
- FT_MPSHITTYPE_W;
- if (mask && !fs->mask.matchtype)
- return false;
- }
- if (tp->frag_shift >= 0) {
- mask = (hash_filter_mask >> tp->frag_shift) &
- FT_FRAGMENTATION_W;
- if (mask && !fs->mask.frag)
- return false;
- }
return true;
}
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index 24d2865b8806..005283c7cdfe 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -3433,8 +3433,8 @@ static int adap_config_hma(struct adapter *adapter)
sgl = adapter->hma.sgt->sgl;
node = dev_to_node(adapter->pdev_dev);
for_each_sg(sgl, iter, sgt->orig_nents, i) {
- newpage = alloc_pages_node(node, __GFP_NOWARN | GFP_KERNEL,
- page_order);
+ newpage = alloc_pages_node(node, __GFP_NOWARN | GFP_KERNEL |
+ __GFP_ZERO, page_order);
if (!newpage) {
dev_err(adapter->pdev_dev,
"Not enough memory for HMA page allocation\n");
@@ -5474,6 +5474,7 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
}
spin_lock_init(&adapter->mbox_lock);
INIT_LIST_HEAD(&adapter->mlist.list);
+ adapter->mbox_log->size = T4_OS_LOG_MBOX_CMDS;
pci_set_drvdata(pdev, adapter);
if (func != ent->driver_data) {
@@ -5508,8 +5509,6 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
goto out_free_adapter;
}
- adapter->mbox_log->size = T4_OS_LOG_MBOX_CMDS;
-
/* PCI device has been enabled */
adapter->flags |= DEV_ENABLED;
memset(adapter->chan_map, 0xff, sizeof(adapter->chan_map));
diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c
index 81684acf52af..8a8b12b720ef 100644
--- a/drivers/net/ethernet/cisco/enic/enic_main.c
+++ b/drivers/net/ethernet/cisco/enic/enic_main.c
@@ -2747,11 +2747,11 @@ static int enic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
pci_set_master(pdev);
/* Query PCI controller on system for DMA addressing
- * limitation for the device. Try 64-bit first, and
+ * limitation for the device. Try 47-bit first, and
* fail to 32-bit.
*/
- err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
+ err = pci_set_dma_mask(pdev, DMA_BIT_MASK(47));
if (err) {
err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
if (err) {
@@ -2765,10 +2765,10 @@ static int enic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
goto err_out_release_regions;
}
} else {
- err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
+ err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(47));
if (err) {
dev_err(dev, "Unable to obtain %u-bit DMA "
- "for consistent allocations, aborting\n", 64);
+ "for consistent allocations, aborting\n", 47);
goto err_out_release_regions;
}
using_dac = 1;
diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c
index c697e79e491e..8f755009ff38 100644
--- a/drivers/net/ethernet/emulex/benet/be_main.c
+++ b/drivers/net/ethernet/emulex/benet/be_main.c
@@ -3309,7 +3309,9 @@ void be_detect_error(struct be_adapter *adapter)
if ((val & POST_STAGE_FAT_LOG_START)
!= POST_STAGE_FAT_LOG_START &&
(val & POST_STAGE_ARMFW_UE)
- != POST_STAGE_ARMFW_UE)
+ != POST_STAGE_ARMFW_UE &&
+ (val & POST_STAGE_RECOVERABLE_ERR)
+ != POST_STAGE_RECOVERABLE_ERR)
return;
}
diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index d4604bc8eb5b..9d3eed46830d 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0+
/*
* Fast Ethernet Controller (FEC) driver for Motorola MPC8xx.
* Copyright (c) 1997 Dan Malek (dmalek@jlc.net)
diff --git a/drivers/net/ethernet/freescale/fec_ptp.c b/drivers/net/ethernet/freescale/fec_ptp.c
index f81439796ac7..43d973215040 100644
--- a/drivers/net/ethernet/freescale/fec_ptp.c
+++ b/drivers/net/ethernet/freescale/fec_ptp.c
@@ -1,20 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Fast Ethernet Controller (ENET) PTP driver for MX6x.
*
* Copyright (C) 2012 Freescale Semiconductor, Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
diff --git a/drivers/net/ethernet/freescale/ucc_geth_ethtool.c b/drivers/net/ethernet/freescale/ucc_geth_ethtool.c
index 4df282ed22c7..0beee2cc2ddd 100644
--- a/drivers/net/ethernet/freescale/ucc_geth_ethtool.c
+++ b/drivers/net/ethernet/freescale/ucc_geth_ethtool.c
@@ -61,7 +61,7 @@ static const char hw_stat_gstrings[][ETH_GSTRING_LEN] = {
static const char tx_fw_stat_gstrings[][ETH_GSTRING_LEN] = {
"tx-single-collision",
"tx-multiple-collision",
- "tx-late-collsion",
+ "tx-late-collision",
"tx-aborted-frames",
"tx-lost-frames",
"tx-carrier-sense-errors",
diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 6e8d6a6f6aaf..5ec1185808e5 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -192,6 +192,7 @@ static int alloc_long_term_buff(struct ibmvnic_adapter *adapter,
if (adapter->fw_done_rc) {
dev_err(dev, "Couldn't map long term buffer,rc = %d\n",
adapter->fw_done_rc);
+ dma_free_coherent(dev, ltb->size, ltb->buff, ltb->addr);
return -1;
}
return 0;
@@ -795,9 +796,11 @@ static int ibmvnic_login(struct net_device *netdev)
struct ibmvnic_adapter *adapter = netdev_priv(netdev);
unsigned long timeout = msecs_to_jiffies(30000);
int retry_count = 0;
+ bool retry;
int rc;
do {
+ retry = false;
if (retry_count > IBMVNIC_MAX_QUEUES) {
netdev_warn(netdev, "Login attempts exceeded\n");
return -1;
@@ -821,6 +824,9 @@ static int ibmvnic_login(struct net_device *netdev)
retry_count++;
release_sub_crqs(adapter, 1);
+ retry = true;
+ netdev_dbg(netdev,
+ "Received partial success, retrying...\n");
adapter->init_done_rc = 0;
reinit_completion(&adapter->init_done);
send_cap_queries(adapter);
@@ -848,7 +854,7 @@ static int ibmvnic_login(struct net_device *netdev)
netdev_warn(netdev, "Adapter login failed\n");
return -1;
}
- } while (adapter->init_done_rc == PARTIALSUCCESS);
+ } while (retry);
/* handle pending MAC address changes after successful login */
if (adapter->mac_change_pending) {
@@ -1821,9 +1827,8 @@ static int do_reset(struct ibmvnic_adapter *adapter,
if (rc)
return rc;
}
+ ibmvnic_disable_irqs(adapter);
}
-
- ibmvnic_disable_irqs(adapter);
adapter->state = VNIC_CLOSED;
if (reset_state == VNIC_CLOSED)
@@ -2617,18 +2622,21 @@ static int enable_scrq_irq(struct ibmvnic_adapter *adapter,
{
struct device *dev = &adapter->vdev->dev;
unsigned long rc;
- u64 val;
if (scrq->hw_irq > 0x100000000ULL) {
dev_err(dev, "bad hw_irq = %lx\n", scrq->hw_irq);
return 1;
}
- val = (0xff000000) | scrq->hw_irq;
- rc = plpar_hcall_norets(H_EOI, val);
- if (rc)
- dev_err(dev, "H_EOI FAILED irq 0x%llx. rc=%ld\n",
- val, rc);
+ if (adapter->resetting &&
+ adapter->reset_reason == VNIC_RESET_MOBILITY) {
+ u64 val = (0xff000000) | scrq->hw_irq;
+
+ rc = plpar_hcall_norets(H_EOI, val);
+ if (rc)
+ dev_err(dev, "H_EOI FAILED irq 0x%llx. rc=%ld\n",
+ val, rc);
+ }
rc = plpar_hcall_norets(H_VIOCTL, adapter->vdev->unit_address,
H_ENABLE_VIO_INTERRUPT, scrq->hw_irq, 0, 0);
@@ -4586,14 +4594,6 @@ static int ibmvnic_init(struct ibmvnic_adapter *adapter)
release_crq_queue(adapter);
}
- rc = init_stats_buffers(adapter);
- if (rc)
- return rc;
-
- rc = init_stats_token(adapter);
- if (rc)
- return rc;
-
return rc;
}
@@ -4662,13 +4662,21 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id)
goto ibmvnic_init_fail;
} while (rc == EAGAIN);
+ rc = init_stats_buffers(adapter);
+ if (rc)
+ goto ibmvnic_init_fail;
+
+ rc = init_stats_token(adapter);
+ if (rc)
+ goto ibmvnic_stats_fail;
+
netdev->mtu = adapter->req_mtu - ETH_HLEN;
netdev->min_mtu = adapter->min_mtu - ETH_HLEN;
netdev->max_mtu = adapter->max_mtu - ETH_HLEN;
rc = device_create_file(&dev->dev, &dev_attr_failover);
if (rc)
- goto ibmvnic_init_fail;
+ goto ibmvnic_dev_file_err;
netif_carrier_off(netdev);
rc = register_netdev(netdev);
@@ -4687,6 +4695,12 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id)
ibmvnic_register_fail:
device_remove_file(&dev->dev, &dev_attr_failover);
+ibmvnic_dev_file_err:
+ release_stats_token(adapter);
+
+ibmvnic_stats_fail:
+ release_stats_buffers(adapter);
+
ibmvnic_init_fail:
release_sub_crqs(adapter, 1);
release_crq_queue(adapter);
diff --git a/drivers/net/ethernet/intel/ice/ice_controlq.c b/drivers/net/ethernet/intel/ice/ice_controlq.c
index 5909a4407e38..7c511f144ed6 100644
--- a/drivers/net/ethernet/intel/ice/ice_controlq.c
+++ b/drivers/net/ethernet/intel/ice/ice_controlq.c
@@ -1014,10 +1014,10 @@ ice_clean_rq_elem(struct ice_hw *hw, struct ice_ctl_q_info *cq,
desc = ICE_CTL_Q_DESC(cq->rq, ntc);
desc_idx = ntc;
+ cq->rq_last_status = (enum ice_aq_err)le16_to_cpu(desc->retval);
flags = le16_to_cpu(desc->flags);
if (flags & ICE_AQ_FLAG_ERR) {
ret_code = ICE_ERR_AQ_ERROR;
- cq->rq_last_status = (enum ice_aq_err)le16_to_cpu(desc->retval);
ice_debug(hw, ICE_DBG_AQ_MSG,
"Control Receive Queue Event received with error 0x%x\n",
cq->rq_last_status);
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
index 68af127987bc..cead23e3db0c 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
@@ -943,8 +943,8 @@ err2:
kfree(ipsec->ip_tbl);
kfree(ipsec->rx_tbl);
kfree(ipsec->tx_tbl);
+ kfree(ipsec);
err1:
- kfree(adapter->ipsec);
netdev_err(adapter->netdev, "Unable to allocate memory for SA tables");
}
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index afadba99f7b8..2ecd55856c50 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -9054,7 +9054,6 @@ static int parse_tc_actions(struct ixgbe_adapter *adapter,
{
const struct tc_action *a;
LIST_HEAD(actions);
- int err;
if (!tcf_exts_has_actions(exts))
return -EINVAL;
@@ -9075,11 +9074,11 @@ static int parse_tc_actions(struct ixgbe_adapter *adapter,
if (!dev)
return -EINVAL;
- err = handle_redirect_action(adapter, dev->ifindex, queue,
- action);
- if (err == 0)
- return err;
+ return handle_redirect_action(adapter, dev->ifindex,
+ queue, action);
}
+
+ return -EINVAL;
}
return -EINVAL;
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
index 3123267dfba9..9592f3e3e42e 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
@@ -3427,6 +3427,9 @@ static s32 ixgbe_reset_hw_X550em(struct ixgbe_hw *hw)
hw->phy.sfp_setup_needed = false;
}
+ if (status == IXGBE_ERR_SFP_NOT_SUPPORTED)
+ return status;
+
/* Reset PHY */
if (!hw->phy.reset_disable && hw->phy.ops.reset)
hw->phy.ops.reset(hw);
diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
index e3d04f226d57..850f8af95e49 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
@@ -4137,7 +4137,7 @@ out_drop:
return NETDEV_TX_OK;
}
-static int ixgbevf_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
+static netdev_tx_t ixgbevf_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
{
struct ixgbevf_adapter *adapter = netdev_priv(netdev);
struct ixgbevf_ring *tx_ring;
diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c
index 4202f9b5b966..6f410235987c 100644
--- a/drivers/net/ethernet/marvell/mvpp2.c
+++ b/drivers/net/ethernet/marvell/mvpp2.c
@@ -942,6 +942,7 @@ struct mvpp2 {
struct clk *pp_clk;
struct clk *gop_clk;
struct clk *mg_clk;
+ struct clk *mg_core_clk;
struct clk *axi_clk;
/* List of pointers to port structures */
@@ -8768,18 +8769,27 @@ static int mvpp2_probe(struct platform_device *pdev)
err = clk_prepare_enable(priv->mg_clk);
if (err < 0)
goto err_gop_clk;
+
+ priv->mg_core_clk = devm_clk_get(&pdev->dev, "mg_core_clk");
+ if (IS_ERR(priv->mg_core_clk)) {
+ priv->mg_core_clk = NULL;
+ } else {
+ err = clk_prepare_enable(priv->mg_core_clk);
+ if (err < 0)
+ goto err_mg_clk;
+ }
}
priv->axi_clk = devm_clk_get(&pdev->dev, "axi_clk");
if (IS_ERR(priv->axi_clk)) {
err = PTR_ERR(priv->axi_clk);
if (err == -EPROBE_DEFER)
- goto err_gop_clk;
+ goto err_mg_core_clk;
priv->axi_clk = NULL;
} else {
err = clk_prepare_enable(priv->axi_clk);
if (err < 0)
- goto err_gop_clk;
+ goto err_mg_core_clk;
}
/* Get system's tclk rate */
@@ -8793,7 +8803,7 @@ static int mvpp2_probe(struct platform_device *pdev)
if (priv->hw_version == MVPP22) {
err = dma_set_mask(&pdev->dev, MVPP2_DESC_DMA_MASK);
if (err)
- goto err_mg_clk;
+ goto err_axi_clk;
/* Sadly, the BM pools all share the same register to
* store the high 32 bits of their address. So they
* must all have the same high 32 bits, which forces
@@ -8801,14 +8811,14 @@ static int mvpp2_probe(struct platform_device *pdev)
*/
err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
if (err)
- goto err_mg_clk;
+ goto err_axi_clk;
}
/* Initialize network controller */
err = mvpp2_init(pdev, priv);
if (err < 0) {
dev_err(&pdev->dev, "failed to initialize controller\n");
- goto err_mg_clk;
+ goto err_axi_clk;
}
/* Initialize ports */
@@ -8821,7 +8831,7 @@ static int mvpp2_probe(struct platform_device *pdev)
if (priv->port_count == 0) {
dev_err(&pdev->dev, "no ports enabled\n");
err = -ENODEV;
- goto err_mg_clk;
+ goto err_axi_clk;
}
/* Statistics must be gathered regularly because some of them (like
@@ -8849,8 +8859,13 @@ err_port_probe:
mvpp2_port_remove(priv->port_list[i]);
i++;
}
-err_mg_clk:
+err_axi_clk:
clk_disable_unprepare(priv->axi_clk);
+
+err_mg_core_clk:
+ if (priv->hw_version == MVPP22)
+ clk_disable_unprepare(priv->mg_core_clk);
+err_mg_clk:
if (priv->hw_version == MVPP22)
clk_disable_unprepare(priv->mg_clk);
err_gop_clk:
@@ -8897,6 +8912,7 @@ static int mvpp2_remove(struct platform_device *pdev)
return 0;
clk_disable_unprepare(priv->axi_clk);
+ clk_disable_unprepare(priv->mg_core_clk);
clk_disable_unprepare(priv->mg_clk);
clk_disable_unprepare(priv->pp_clk);
clk_disable_unprepare(priv->gop_clk);
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
index a30a2e95d13f..f11b45001cad 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
@@ -1027,6 +1027,22 @@ static int mlx4_en_set_coalesce(struct net_device *dev,
if (!coal->tx_max_coalesced_frames_irq)
return -EINVAL;
+ if (coal->tx_coalesce_usecs > MLX4_EN_MAX_COAL_TIME ||
+ coal->rx_coalesce_usecs > MLX4_EN_MAX_COAL_TIME ||
+ coal->rx_coalesce_usecs_low > MLX4_EN_MAX_COAL_TIME ||
+ coal->rx_coalesce_usecs_high > MLX4_EN_MAX_COAL_TIME) {
+ netdev_info(dev, "%s: maximum coalesce time supported is %d usecs\n",
+ __func__, MLX4_EN_MAX_COAL_TIME);
+ return -ERANGE;
+ }
+
+ if (coal->tx_max_coalesced_frames > MLX4_EN_MAX_COAL_PKTS ||
+ coal->rx_max_coalesced_frames > MLX4_EN_MAX_COAL_PKTS) {
+ netdev_info(dev, "%s: maximum coalesced frames supported is %d\n",
+ __func__, MLX4_EN_MAX_COAL_PKTS);
+ return -ERANGE;
+ }
+
priv->rx_frames = (coal->rx_max_coalesced_frames ==
MLX4_EN_AUTO_CONF) ?
MLX4_EN_RX_COAL_TARGET :
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index e0adac4a9a19..9670b33fc9b1 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -3324,12 +3324,11 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port,
MAX_TX_RINGS, GFP_KERNEL);
if (!priv->tx_ring[t]) {
err = -ENOMEM;
- goto err_free_tx;
+ goto out;
}
priv->tx_cq[t] = kzalloc(sizeof(struct mlx4_en_cq *) *
MAX_TX_RINGS, GFP_KERNEL);
if (!priv->tx_cq[t]) {
- kfree(priv->tx_ring[t]);
err = -ENOMEM;
goto out;
}
@@ -3582,11 +3581,6 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port,
return 0;
-err_free_tx:
- while (t--) {
- kfree(priv->tx_ring[t]);
- kfree(priv->tx_cq[t]);
- }
out:
mlx4_en_destroy_netdev(dev);
return err;
diff --git a/drivers/net/ethernet/mellanox/mlx4/icm.c b/drivers/net/ethernet/mellanox/mlx4/icm.c
index a822f7a56bc5..685337d58276 100644
--- a/drivers/net/ethernet/mellanox/mlx4/icm.c
+++ b/drivers/net/ethernet/mellanox/mlx4/icm.c
@@ -43,12 +43,12 @@
#include "fw.h"
/*
- * We allocate in as big chunks as we can, up to a maximum of 256 KB
- * per chunk.
+ * We allocate in page size (default 4KB on many archs) chunks to avoid high
+ * order memory allocations in fragmented/high usage memory situation.
*/
enum {
- MLX4_ICM_ALLOC_SIZE = 1 << 18,
- MLX4_TABLE_CHUNK_SIZE = 1 << 18
+ MLX4_ICM_ALLOC_SIZE = PAGE_SIZE,
+ MLX4_TABLE_CHUNK_SIZE = PAGE_SIZE,
};
static void mlx4_free_icm_pages(struct mlx4_dev *dev, struct mlx4_icm_chunk *chunk)
@@ -398,9 +398,11 @@ int mlx4_init_icm_table(struct mlx4_dev *dev, struct mlx4_icm_table *table,
u64 size;
obj_per_chunk = MLX4_TABLE_CHUNK_SIZE / obj_size;
+ if (WARN_ON(!obj_per_chunk))
+ return -EINVAL;
num_icm = (nobj + obj_per_chunk - 1) / obj_per_chunk;
- table->icm = kcalloc(num_icm, sizeof(*table->icm), GFP_KERNEL);
+ table->icm = kvzalloc(num_icm * sizeof(*table->icm), GFP_KERNEL);
if (!table->icm)
return -ENOMEM;
table->virt = virt;
@@ -446,7 +448,7 @@ err:
mlx4_free_icm(dev, table->icm[i], use_coherent);
}
- kfree(table->icm);
+ kvfree(table->icm);
return -ENOMEM;
}
@@ -462,5 +464,5 @@ void mlx4_cleanup_icm_table(struct mlx4_dev *dev, struct mlx4_icm_table *table)
mlx4_free_icm(dev, table->icm[i], table->coherent);
}
- kfree(table->icm);
+ kvfree(table->icm);
}
diff --git a/drivers/net/ethernet/mellanox/mlx4/intf.c b/drivers/net/ethernet/mellanox/mlx4/intf.c
index 2edcce98ab2d..65482f004e50 100644
--- a/drivers/net/ethernet/mellanox/mlx4/intf.c
+++ b/drivers/net/ethernet/mellanox/mlx4/intf.c
@@ -172,7 +172,7 @@ int mlx4_do_bond(struct mlx4_dev *dev, bool enable)
list_add_tail(&dev_ctx->list, &priv->ctx_list);
spin_unlock_irqrestore(&priv->ctx_lock, flags);
- mlx4_dbg(dev, "Inrerface for protocol %d restarted with when bonded mode is %s\n",
+ mlx4_dbg(dev, "Interface for protocol %d restarted with bonded mode %s\n",
dev_ctx->intf->protocol, enable ?
"enabled" : "disabled");
}
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index bfef69235d71..60172a38c4a4 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -1317,7 +1317,7 @@ static int mlx4_mf_unbond(struct mlx4_dev *dev)
ret = mlx4_unbond_fs_rules(dev);
if (ret)
- mlx4_warn(dev, "multifunction unbond for flow rules failedi (%d)\n", ret);
+ mlx4_warn(dev, "multifunction unbond for flow rules failed (%d)\n", ret);
ret1 = mlx4_unbond_mac_table(dev);
if (ret1) {
mlx4_warn(dev, "multifunction unbond for MAC table failed (%d)\n", ret1);
@@ -2929,6 +2929,7 @@ static int mlx4_init_port_info(struct mlx4_dev *dev, int port)
mlx4_err(dev, "Failed to create file for port %d\n", port);
devlink_port_unregister(&info->devlink_port);
info->port = -1;
+ return err;
}
sprintf(info->dev_mtu_name, "mlx4_port%d_mtu", port);
@@ -2950,9 +2951,10 @@ static int mlx4_init_port_info(struct mlx4_dev *dev, int port)
&info->port_attr);
devlink_port_unregister(&info->devlink_port);
info->port = -1;
+ return err;
}
- return err;
+ return 0;
}
static void mlx4_cleanup_port_info(struct mlx4_port_info *info)
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
index f7c81133594f..ace6545f82e6 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
@@ -132,6 +132,9 @@
#define MLX4_EN_TX_COAL_PKTS 16
#define MLX4_EN_TX_COAL_TIME 0x10
+#define MLX4_EN_MAX_COAL_PKTS U16_MAX
+#define MLX4_EN_MAX_COAL_TIME U16_MAX
+
#define MLX4_EN_RX_RATE_LOW 400000
#define MLX4_EN_RX_COAL_TIME_LOW 0
#define MLX4_EN_RX_RATE_HIGH 450000
@@ -552,8 +555,8 @@ struct mlx4_en_priv {
u16 rx_usecs_low;
u32 pkt_rate_high;
u16 rx_usecs_high;
- u16 sample_interval;
- u16 adaptive_rx_coal;
+ u32 sample_interval;
+ u32 adaptive_rx_coal;
u32 msg_enable;
u32 loopback_ok;
u32 validate_loopback;
diff --git a/drivers/net/ethernet/mellanox/mlx4/qp.c b/drivers/net/ethernet/mellanox/mlx4/qp.c
index 3aaf4bad6c5a..427e7a31862c 100644
--- a/drivers/net/ethernet/mellanox/mlx4/qp.c
+++ b/drivers/net/ethernet/mellanox/mlx4/qp.c
@@ -393,11 +393,11 @@ struct mlx4_qp *mlx4_qp_lookup(struct mlx4_dev *dev, u32 qpn)
struct mlx4_qp_table *qp_table = &mlx4_priv(dev)->qp_table;
struct mlx4_qp *qp;
- spin_lock(&qp_table->lock);
+ spin_lock_irq(&qp_table->lock);
qp = __mlx4_qp_lookup(dev, qpn);
- spin_unlock(&qp_table->lock);
+ spin_unlock_irq(&qp_table->lock);
return qp;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
index 3d46ef48d5b8..c641d5656b2d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
@@ -1007,12 +1007,14 @@ static void mlx5e_trust_update_sq_inline_mode(struct mlx5e_priv *priv)
mutex_lock(&priv->state_lock);
- if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
- goto out;
-
new_channels.params = priv->channels.params;
mlx5e_trust_update_tx_min_inline_mode(priv, &new_channels.params);
+ if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
+ priv->channels.params = new_channels.params;
+ goto out;
+ }
+
/* Skip if tx_min_inline is the same */
if (new_channels.params.tx_min_inline_mode ==
priv->channels.params.tx_min_inline_mode)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index d8f68e4d1018..876c3e4c6193 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -877,13 +877,14 @@ static const struct net_device_ops mlx5e_netdev_ops_rep = {
};
static void mlx5e_build_rep_params(struct mlx5_core_dev *mdev,
- struct mlx5e_params *params)
+ struct mlx5e_params *params, u16 mtu)
{
u8 cq_period_mode = MLX5_CAP_GEN(mdev, cq_period_start_from_cqe) ?
MLX5_CQ_PERIOD_MODE_START_FROM_CQE :
MLX5_CQ_PERIOD_MODE_START_FROM_EQE;
params->hard_mtu = MLX5E_ETH_HARD_MTU;
+ params->sw_mtu = mtu;
params->log_sq_size = MLX5E_REP_PARAMS_LOG_SQ_SIZE;
params->rq_wq_type = MLX5_WQ_TYPE_LINKED_LIST;
params->log_rq_mtu_frames = MLX5E_REP_PARAMS_LOG_RQ_SIZE;
@@ -931,7 +932,7 @@ static void mlx5e_init_rep(struct mlx5_core_dev *mdev,
priv->channels.params.num_channels = profile->max_nch(mdev);
- mlx5e_build_rep_params(mdev, &priv->channels.params);
+ mlx5e_build_rep_params(mdev, &priv->channels.params, netdev->mtu);
mlx5e_build_rep_netdev(netdev);
mlx5e_timestamp_init(priv);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index 176645762e49..1ff0b0e93804 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -615,6 +615,45 @@ static inline bool is_last_ethertype_ip(struct sk_buff *skb, int *network_depth)
return (ethertype == htons(ETH_P_IP) || ethertype == htons(ETH_P_IPV6));
}
+static __be32 mlx5e_get_fcs(struct sk_buff *skb)
+{
+ int last_frag_sz, bytes_in_prev, nr_frags;
+ u8 *fcs_p1, *fcs_p2;
+ skb_frag_t *last_frag;
+ __be32 fcs_bytes;
+
+ if (!skb_is_nonlinear(skb))
+ return *(__be32 *)(skb->data + skb->len - ETH_FCS_LEN);
+
+ nr_frags = skb_shinfo(skb)->nr_frags;
+ last_frag = &skb_shinfo(skb)->frags[nr_frags - 1];
+ last_frag_sz = skb_frag_size(last_frag);
+
+ /* If all FCS data is in last frag */
+ if (last_frag_sz >= ETH_FCS_LEN)
+ return *(__be32 *)(skb_frag_address(last_frag) +
+ last_frag_sz - ETH_FCS_LEN);
+
+ fcs_p2 = (u8 *)skb_frag_address(last_frag);
+ bytes_in_prev = ETH_FCS_LEN - last_frag_sz;
+
+ /* Find where the other part of the FCS is - Linear or another frag */
+ if (nr_frags == 1) {
+ fcs_p1 = skb_tail_pointer(skb);
+ } else {
+ skb_frag_t *prev_frag = &skb_shinfo(skb)->frags[nr_frags - 2];
+
+ fcs_p1 = skb_frag_address(prev_frag) +
+ skb_frag_size(prev_frag);
+ }
+ fcs_p1 -= bytes_in_prev;
+
+ memcpy(&fcs_bytes, fcs_p1, bytes_in_prev);
+ memcpy(((u8 *)&fcs_bytes) + bytes_in_prev, fcs_p2, last_frag_sz);
+
+ return fcs_bytes;
+}
+
static inline void mlx5e_handle_csum(struct net_device *netdev,
struct mlx5_cqe64 *cqe,
struct mlx5e_rq *rq,
@@ -643,6 +682,9 @@ static inline void mlx5e_handle_csum(struct net_device *netdev,
skb->csum = csum_partial(skb->data + ETH_HLEN,
network_depth - ETH_HLEN,
skb->csum);
+ if (unlikely(netdev->features & NETIF_F_RXFCS))
+ skb->csum = csum_add(skb->csum,
+ (__force __wsum)mlx5e_get_fcs(skb));
rq->stats.csum_complete++;
return;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c
index 707976482c09..027f54ac1ca2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c
@@ -290,7 +290,7 @@ static int mlx5e_test_loopback(struct mlx5e_priv *priv)
if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
netdev_err(priv->netdev,
- "\tCan't perform loobpack test while device is down\n");
+ "\tCan't perform loopback test while device is down\n");
return -ENODEV;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 4197001f9801..b94276db3ce9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -1261,6 +1261,10 @@ static int __parse_cls_flower(struct mlx5e_priv *priv,
f->mask);
addr_type = key->addr_type;
+ /* the HW doesn't support frag first/later */
+ if (mask->flags & FLOW_DIS_FIRST_FRAG)
+ return -EOPNOTSUPP;
+
if (mask->flags & FLOW_DIS_IS_FRAGMENT) {
MLX5_SET(fte_match_set_lyr_2_4, headers_c, frag, 1);
MLX5_SET(fte_match_set_lyr_2_4, headers_v, frag,
@@ -1864,7 +1868,8 @@ static bool modify_header_match_supported(struct mlx5_flow_spec *spec,
}
ip_proto = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ip_protocol);
- if (modify_ip_header && ip_proto != IPPROTO_TCP && ip_proto != IPPROTO_UDP) {
+ if (modify_ip_header && ip_proto != IPPROTO_TCP &&
+ ip_proto != IPPROTO_UDP && ip_proto != IPPROTO_ICMP) {
pr_info("can't offload re-write of ip proto %d\n", ip_proto);
return false;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
index 20297108528a..5532aa3675c7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
@@ -255,7 +255,7 @@ mlx5e_txwqe_build_dsegs(struct mlx5e_txqsq *sq, struct sk_buff *skb,
dma_addr = dma_map_single(sq->pdev, skb_data, headlen,
DMA_TO_DEVICE);
if (unlikely(dma_mapping_error(sq->pdev, dma_addr)))
- return -ENOMEM;
+ goto dma_unmap_wqe_err;
dseg->addr = cpu_to_be64(dma_addr);
dseg->lkey = sq->mkey_be;
@@ -273,7 +273,7 @@ mlx5e_txwqe_build_dsegs(struct mlx5e_txqsq *sq, struct sk_buff *skb,
dma_addr = skb_frag_dma_map(sq->pdev, frag, 0, fsz,
DMA_TO_DEVICE);
if (unlikely(dma_mapping_error(sq->pdev, dma_addr)))
- return -ENOMEM;
+ goto dma_unmap_wqe_err;
dseg->addr = cpu_to_be64(dma_addr);
dseg->lkey = sq->mkey_be;
@@ -285,6 +285,10 @@ mlx5e_txwqe_build_dsegs(struct mlx5e_txqsq *sq, struct sk_buff *skb,
}
return num_dma;
+
+dma_unmap_wqe_err:
+ mlx5e_dma_unmap_wqe_err(sq, num_dma);
+ return -ENOMEM;
}
static inline void
@@ -380,17 +384,15 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
num_dma = mlx5e_txwqe_build_dsegs(sq, skb, skb_data, headlen,
(struct mlx5_wqe_data_seg *)cseg + ds_cnt);
if (unlikely(num_dma < 0))
- goto dma_unmap_wqe_err;
+ goto err_drop;
mlx5e_txwqe_complete(sq, skb, opcode, ds_cnt + num_dma,
num_bytes, num_dma, wi, cseg);
return NETDEV_TX_OK;
-dma_unmap_wqe_err:
+err_drop:
sq->stats.dropped++;
- mlx5e_dma_unmap_wqe_err(sq, wi->num_dma);
-
dev_kfree_skb_any(skb);
return NETDEV_TX_OK;
@@ -645,17 +647,15 @@ netdev_tx_t mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
num_dma = mlx5e_txwqe_build_dsegs(sq, skb, skb_data, headlen,
(struct mlx5_wqe_data_seg *)cseg + ds_cnt);
if (unlikely(num_dma < 0))
- goto dma_unmap_wqe_err;
+ goto err_drop;
mlx5e_txwqe_complete(sq, skb, opcode, ds_cnt + num_dma,
num_bytes, num_dma, wi, cseg);
return NETDEV_TX_OK;
-dma_unmap_wqe_err:
+err_drop:
sq->stats.dropped++;
- mlx5e_dma_unmap_wqe_err(sq, wi->num_dma);
-
dev_kfree_skb_any(skb);
return NETDEV_TX_OK;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index c1c94974e16b..1814f803bd2c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -34,6 +34,9 @@
#include <linux/module.h>
#include <linux/mlx5/driver.h>
#include <linux/mlx5/cmd.h>
+#ifdef CONFIG_RFS_ACCEL
+#include <linux/cpu_rmap.h>
+#endif
#include "mlx5_core.h"
#include "fpga/core.h"
#include "eswitch.h"
@@ -923,3 +926,28 @@ int mlx5_core_eq_query(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
MLX5_SET(query_eq_in, in, eq_number, eq->eqn);
return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen);
}
+
+/* This function should only be called after mlx5_cmd_force_teardown_hca */
+void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev)
+{
+ struct mlx5_eq_table *table = &dev->priv.eq_table;
+ struct mlx5_eq *eq;
+
+#ifdef CONFIG_RFS_ACCEL
+ if (dev->rmap) {
+ free_irq_cpu_rmap(dev->rmap);
+ dev->rmap = NULL;
+ }
+#endif
+ list_for_each_entry(eq, &table->comp_eqs_list, list)
+ free_irq(eq->irqn, eq);
+
+ free_irq(table->pages_eq.irqn, &table->pages_eq);
+ free_irq(table->async_eq.irqn, &table->async_eq);
+ free_irq(table->cmd_eq.irqn, &table->cmd_eq);
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+ if (MLX5_CAP_GEN(dev, pg))
+ free_irq(table->pfault_eq.irqn, &table->pfault_eq);
+#endif
+ pci_free_irq_vectors(dev->pdev);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index 332bc56306bf..1352d13eedb3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -2175,26 +2175,35 @@ int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw,
memset(vf_stats, 0, sizeof(*vf_stats));
vf_stats->rx_packets =
MLX5_GET_CTR(out, received_eth_unicast.packets) +
+ MLX5_GET_CTR(out, received_ib_unicast.packets) +
MLX5_GET_CTR(out, received_eth_multicast.packets) +
+ MLX5_GET_CTR(out, received_ib_multicast.packets) +
MLX5_GET_CTR(out, received_eth_broadcast.packets);
vf_stats->rx_bytes =
MLX5_GET_CTR(out, received_eth_unicast.octets) +
+ MLX5_GET_CTR(out, received_ib_unicast.octets) +
MLX5_GET_CTR(out, received_eth_multicast.octets) +
+ MLX5_GET_CTR(out, received_ib_multicast.octets) +
MLX5_GET_CTR(out, received_eth_broadcast.octets);
vf_stats->tx_packets =
MLX5_GET_CTR(out, transmitted_eth_unicast.packets) +
+ MLX5_GET_CTR(out, transmitted_ib_unicast.packets) +
MLX5_GET_CTR(out, transmitted_eth_multicast.packets) +
+ MLX5_GET_CTR(out, transmitted_ib_multicast.packets) +
MLX5_GET_CTR(out, transmitted_eth_broadcast.packets);
vf_stats->tx_bytes =
MLX5_GET_CTR(out, transmitted_eth_unicast.octets) +
+ MLX5_GET_CTR(out, transmitted_ib_unicast.octets) +
MLX5_GET_CTR(out, transmitted_eth_multicast.octets) +
+ MLX5_GET_CTR(out, transmitted_ib_multicast.octets) +
MLX5_GET_CTR(out, transmitted_eth_broadcast.octets);
vf_stats->multicast =
- MLX5_GET_CTR(out, received_eth_multicast.packets);
+ MLX5_GET_CTR(out, received_eth_multicast.packets) +
+ MLX5_GET_CTR(out, received_ib_multicast.packets);
vf_stats->broadcast =
MLX5_GET_CTR(out, received_eth_broadcast.packets);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
index 0f5da499a223..fad8c2e3804e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
@@ -237,19 +237,17 @@ static void *mlx5_fpga_ipsec_cmd_exec(struct mlx5_core_dev *mdev,
context->buf.sg[0].data = &context->command;
spin_lock_irqsave(&fdev->ipsec->pending_cmds_lock, flags);
- list_add_tail(&context->list, &fdev->ipsec->pending_cmds);
+ res = mlx5_fpga_sbu_conn_sendmsg(fdev->ipsec->conn, &context->buf);
+ if (!res)
+ list_add_tail(&context->list, &fdev->ipsec->pending_cmds);
spin_unlock_irqrestore(&fdev->ipsec->pending_cmds_lock, flags);
- res = mlx5_fpga_sbu_conn_sendmsg(fdev->ipsec->conn, &context->buf);
if (res) {
- mlx5_fpga_warn(fdev, "Failure sending IPSec command: %d\n",
- res);
- spin_lock_irqsave(&fdev->ipsec->pending_cmds_lock, flags);
- list_del(&context->list);
- spin_unlock_irqrestore(&fdev->ipsec->pending_cmds_lock, flags);
+ mlx5_fpga_warn(fdev, "Failed to send IPSec command: %d\n", res);
kfree(context);
return ERR_PTR(res);
}
+
/* Context will be freed by wait func after completion */
return context;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index de51e7c39bc8..c39c1692e674 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -187,6 +187,7 @@ static void del_sw_ns(struct fs_node *node);
static void del_sw_hw_rule(struct fs_node *node);
static bool mlx5_flow_dests_cmp(struct mlx5_flow_destination *d1,
struct mlx5_flow_destination *d2);
+static void cleanup_root_ns(struct mlx5_flow_root_namespace *root_ns);
static struct mlx5_flow_rule *
find_flow_rule(struct fs_fte *fte,
struct mlx5_flow_destination *dest);
@@ -481,7 +482,8 @@ static void del_sw_hw_rule(struct fs_node *node)
if (rule->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_COUNTER &&
--fte->dests_size) {
- modify_mask = BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_ACTION);
+ modify_mask = BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_ACTION) |
+ BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_FLOW_COUNTERS);
fte->action.action &= ~MLX5_FLOW_CONTEXT_ACTION_COUNT;
update_fte = true;
goto out;
@@ -2351,23 +2353,27 @@ static int create_anchor_flow_table(struct mlx5_flow_steering *steering)
static int init_root_ns(struct mlx5_flow_steering *steering)
{
+ int err;
+
steering->root_ns = create_root_ns(steering, FS_FT_NIC_RX);
if (!steering->root_ns)
- goto cleanup;
+ return -ENOMEM;
- if (init_root_tree(steering, &root_fs, &steering->root_ns->ns.node))
- goto cleanup;
+ err = init_root_tree(steering, &root_fs, &steering->root_ns->ns.node);
+ if (err)
+ goto out_err;
set_prio_attrs(steering->root_ns);
-
- if (create_anchor_flow_table(steering))
- goto cleanup;
+ err = create_anchor_flow_table(steering);
+ if (err)
+ goto out_err;
return 0;
-cleanup:
- mlx5_cleanup_fs(steering->dev);
- return -ENOMEM;
+out_err:
+ cleanup_root_ns(steering->root_ns);
+ steering->root_ns = NULL;
+ return err;
}
static void clean_tree(struct fs_node *node)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 63a8ea31601c..e2c465b0b3f8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -1587,6 +1587,14 @@ static int mlx5_try_fast_unload(struct mlx5_core_dev *dev)
mlx5_enter_error_state(dev, true);
+ /* Some platforms requiring freeing the IRQ's in the shutdown
+ * flow. If they aren't freed they can't be allocated after
+ * kexec. There is no need to cleanup the mlx5_core software
+ * contexts.
+ */
+ mlx5_irq_clear_affinity_hints(dev);
+ mlx5_core_eq_free_irqs(dev);
+
return 0;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
index 7d001fe6e631..023882d9a22e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
@@ -128,6 +128,8 @@ int mlx5_core_eq_query(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
u32 *out, int outlen);
int mlx5_start_eqs(struct mlx5_core_dev *dev);
void mlx5_stop_eqs(struct mlx5_core_dev *dev);
+/* This function should only be called after mlx5_cmd_force_teardown_hca */
+void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev);
struct mlx5_eq *mlx5_eqn2eq(struct mlx5_core_dev *dev, int eqn);
u32 mlx5_eq_poll_irq_disabled(struct mlx5_eq *eq);
void mlx5_cq_tasklet_cb(unsigned long data);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c
index 93ea56620a24..e13ac3b8dff7 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core.c
@@ -1100,11 +1100,11 @@ err_emad_init:
err_alloc_lag_mapping:
mlxsw_ports_fini(mlxsw_core);
err_ports_init:
- mlxsw_bus->fini(bus_priv);
-err_bus_init:
if (!reload)
devlink_resources_unregister(devlink, NULL);
err_register_resources:
+ mlxsw_bus->fini(bus_priv);
+err_bus_init:
if (!reload)
devlink_free(devlink);
err_devlink_alloc:
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index ca38a30fbe91..adc6ab2cf429 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -4433,6 +4433,11 @@ static int mlxsw_sp_netdevice_port_upper_event(struct net_device *lower_dev,
NL_SET_ERR_MSG_MOD(extack, "Can not put a VLAN on an OVS port");
return -EINVAL;
}
+ if (is_vlan_dev(upper_dev) &&
+ vlan_dev_vlan_id(upper_dev) == 1) {
+ NL_SET_ERR_MSG_MOD(extack, "Creating a VLAN device with VID 1 is unsupported: VLAN 1 carries untagged traffic");
+ return -EINVAL;
+ }
break;
case NETDEV_CHANGEUPPER:
upper_dev = info->upper_dev;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
index c11c9a635866..4ed01182a82c 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
@@ -1718,13 +1718,11 @@ __mlxsw_sp_port_mdb_del(struct mlxsw_sp_port *mlxsw_sp_port,
struct net_device *dev = mlxsw_sp_port->dev;
int err;
- if (bridge_port->bridge_device->multicast_enabled) {
- if (bridge_port->bridge_device->multicast_enabled) {
- err = mlxsw_sp_port_smid_set(mlxsw_sp_port, mid->mid,
- false);
- if (err)
- netdev_err(dev, "Unable to remove port from SMID\n");
- }
+ if (bridge_port->bridge_device->multicast_enabled &&
+ !bridge_port->mrouter) {
+ err = mlxsw_sp_port_smid_set(mlxsw_sp_port, mid->mid, false);
+ if (err)
+ netdev_err(dev, "Unable to remove port from SMID\n");
}
err = mlxsw_sp_port_remove_from_mid(mlxsw_sp_port, mid);
diff --git a/drivers/net/ethernet/natsemi/sonic.c b/drivers/net/ethernet/natsemi/sonic.c
index 7ed08486ae23..c805dcbebd02 100644
--- a/drivers/net/ethernet/natsemi/sonic.c
+++ b/drivers/net/ethernet/natsemi/sonic.c
@@ -84,7 +84,7 @@ static int sonic_open(struct net_device *dev)
for (i = 0; i < SONIC_NUM_RRS; i++) {
dma_addr_t laddr = dma_map_single(lp->device, skb_put(lp->rx_skb[i], SONIC_RBSIZE),
SONIC_RBSIZE, DMA_FROM_DEVICE);
- if (!laddr) {
+ if (dma_mapping_error(lp->device, laddr)) {
while(i > 0) { /* free any that were mapped successfully */
i--;
dma_unmap_single(lp->device, lp->rx_laddr[i], SONIC_RBSIZE, DMA_FROM_DEVICE);
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.c b/drivers/net/ethernet/netronome/nfp/bpf/main.c
index 1dc424685f4e..35fb31f682af 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/main.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/main.c
@@ -335,7 +335,7 @@ static int nfp_bpf_parse_capabilities(struct nfp_app *app)
return PTR_ERR(mem) == -ENOENT ? 0 : PTR_ERR(mem);
start = mem;
- while (mem - start + 8 < nfp_cpp_area_size(area)) {
+ while (mem - start + 8 <= nfp_cpp_area_size(area)) {
u8 __iomem *value;
u32 type, length;
diff --git a/drivers/net/ethernet/netronome/nfp/flower/action.c b/drivers/net/ethernet/netronome/nfp/flower/action.c
index b3567a596fc1..80df9a5d4217 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/action.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/action.c
@@ -183,17 +183,21 @@ static int
nfp_fl_set_ipv4_udp_tun(struct nfp_fl_set_ipv4_udp_tun *set_tun,
const struct tc_action *action,
struct nfp_fl_pre_tunnel *pre_tun,
- enum nfp_flower_tun_type tun_type)
+ enum nfp_flower_tun_type tun_type,
+ struct net_device *netdev)
{
size_t act_size = sizeof(struct nfp_fl_set_ipv4_udp_tun);
struct ip_tunnel_info *ip_tun = tcf_tunnel_info(action);
u32 tmp_set_ip_tun_type_index = 0;
/* Currently support one pre-tunnel so index is always 0. */
int pretun_idx = 0;
+ struct net *net;
if (ip_tun->options_len)
return -EOPNOTSUPP;
+ net = dev_net(netdev);
+
set_tun->head.jump_id = NFP_FL_ACTION_OPCODE_SET_IPV4_TUNNEL;
set_tun->head.len_lw = act_size >> NFP_FL_LW_SIZ;
@@ -204,6 +208,7 @@ nfp_fl_set_ipv4_udp_tun(struct nfp_fl_set_ipv4_udp_tun *set_tun,
set_tun->tun_type_index = cpu_to_be32(tmp_set_ip_tun_type_index);
set_tun->tun_id = ip_tun->key.tun_id;
+ set_tun->ttl = net->ipv4.sysctl_ip_default_ttl;
/* Complete pre_tunnel action. */
pre_tun->ipv4_dst = ip_tun->key.u.ipv4.dst;
@@ -511,7 +516,8 @@ nfp_flower_loop_action(const struct tc_action *a,
*a_len += sizeof(struct nfp_fl_pre_tunnel);
set_tun = (void *)&nfp_fl->action_data[*a_len];
- err = nfp_fl_set_ipv4_udp_tun(set_tun, a, pre_tun, *tun_type);
+ err = nfp_fl_set_ipv4_udp_tun(set_tun, a, pre_tun, *tun_type,
+ netdev);
if (err)
return err;
*a_len += sizeof(struct nfp_fl_set_ipv4_udp_tun);
diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h
index b6c0fd053a50..bee4367a2c38 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h
+++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h
@@ -190,7 +190,10 @@ struct nfp_fl_set_ipv4_udp_tun {
__be16 reserved;
__be64 tun_id __packed;
__be32 tun_type_index;
- __be32 extra[3];
+ __be16 reserved2;
+ u8 ttl;
+ u8 reserved3;
+ __be32 extra[2];
};
/* Metadata with L2 (1W/4B)
diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.c b/drivers/net/ethernet/netronome/nfp/flower/main.c
index ad02592a82b7..84e3b9f5abb1 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/main.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/main.c
@@ -52,8 +52,6 @@
#define NFP_FLOWER_ALLOWED_VER 0x0001000000010000UL
-#define NFP_FLOWER_FRAME_HEADROOM 158
-
static const char *nfp_flower_extra_cap(struct nfp_app *app, struct nfp_net *nn)
{
return "FLOWER";
@@ -360,7 +358,7 @@ nfp_flower_spawn_phy_reprs(struct nfp_app *app, struct nfp_flower_priv *priv)
}
SET_NETDEV_DEV(repr, &priv->nn->pdev->dev);
- nfp_net_get_mac_addr(app->pf, port);
+ nfp_net_get_mac_addr(app->pf, repr, port);
cmsg_port_id = nfp_flower_cmsg_phys_port(phys_port);
err = nfp_repr_init(app, repr,
@@ -559,22 +557,6 @@ static void nfp_flower_clean(struct nfp_app *app)
app->priv = NULL;
}
-static int
-nfp_flower_check_mtu(struct nfp_app *app, struct net_device *netdev,
- int new_mtu)
-{
- /* The flower fw reserves NFP_FLOWER_FRAME_HEADROOM bytes of the
- * supported max MTU to allow for appending tunnel headers. To prevent
- * unexpected behaviour this needs to be accounted for.
- */
- if (new_mtu > netdev->max_mtu - NFP_FLOWER_FRAME_HEADROOM) {
- nfp_err(app->cpp, "New MTU (%d) is not valid\n", new_mtu);
- return -EINVAL;
- }
-
- return 0;
-}
-
static bool nfp_flower_check_ack(struct nfp_flower_priv *app_priv)
{
bool ret;
@@ -656,7 +638,6 @@ const struct nfp_app_type app_flower = {
.init = nfp_flower_init,
.clean = nfp_flower_clean,
- .check_mtu = nfp_flower_check_mtu,
.repr_change_mtu = nfp_flower_repr_change_mtu,
.vnic_alloc = nfp_flower_vnic_alloc,
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_app_nic.c b/drivers/net/ethernet/netronome/nfp/nfp_app_nic.c
index 2a2f2fbc8850..b9618c37403f 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_app_nic.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_app_nic.c
@@ -69,7 +69,7 @@ int nfp_app_nic_vnic_alloc(struct nfp_app *app, struct nfp_net *nn,
if (err)
return err < 0 ? err : 0;
- nfp_net_get_mac_addr(app->pf, nn->port);
+ nfp_net_get_mac_addr(app->pf, nn->dp.netdev, nn->port);
return 0;
}
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_main.h b/drivers/net/ethernet/netronome/nfp/nfp_main.h
index add46e28212b..42211083b51f 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_main.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_main.h
@@ -171,7 +171,9 @@ void nfp_net_pci_remove(struct nfp_pf *pf);
int nfp_hwmon_register(struct nfp_pf *pf);
void nfp_hwmon_unregister(struct nfp_pf *pf);
-void nfp_net_get_mac_addr(struct nfp_pf *pf, struct nfp_port *port);
+void
+nfp_net_get_mac_addr(struct nfp_pf *pf, struct net_device *netdev,
+ struct nfp_port *port);
bool nfp_ctrl_tx(struct nfp_net *nn, struct sk_buff *skb);
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_main.c b/drivers/net/ethernet/netronome/nfp/nfp_net_main.c
index 15fa47f622aa..45cd2092e498 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_main.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_main.c
@@ -67,23 +67,26 @@
/**
* nfp_net_get_mac_addr() - Get the MAC address.
* @pf: NFP PF handle
+ * @netdev: net_device to set MAC address on
* @port: NFP port structure
*
* First try to get the MAC address from NSP ETH table. If that
* fails generate a random address.
*/
-void nfp_net_get_mac_addr(struct nfp_pf *pf, struct nfp_port *port)
+void
+nfp_net_get_mac_addr(struct nfp_pf *pf, struct net_device *netdev,
+ struct nfp_port *port)
{
struct nfp_eth_table_port *eth_port;
eth_port = __nfp_port_get_eth_port(port);
if (!eth_port) {
- eth_hw_addr_random(port->netdev);
+ eth_hw_addr_random(netdev);
return;
}
- ether_addr_copy(port->netdev->dev_addr, eth_port->mac_addr);
- ether_addr_copy(port->netdev->perm_addr, eth_port->mac_addr);
+ ether_addr_copy(netdev->dev_addr, eth_port->mac_addr);
+ ether_addr_copy(netdev->perm_addr, eth_port->mac_addr);
}
static struct nfp_eth_table_port *
@@ -511,16 +514,18 @@ static int nfp_net_pci_map_mem(struct nfp_pf *pf)
return PTR_ERR(mem);
}
- min_size = NFP_MAC_STATS_SIZE * (pf->eth_tbl->max_index + 1);
- pf->mac_stats_mem = nfp_rtsym_map(pf->rtbl, "_mac_stats",
- "net.macstats", min_size,
- &pf->mac_stats_bar);
- if (IS_ERR(pf->mac_stats_mem)) {
- if (PTR_ERR(pf->mac_stats_mem) != -ENOENT) {
- err = PTR_ERR(pf->mac_stats_mem);
- goto err_unmap_ctrl;
+ if (pf->eth_tbl) {
+ min_size = NFP_MAC_STATS_SIZE * (pf->eth_tbl->max_index + 1);
+ pf->mac_stats_mem = nfp_rtsym_map(pf->rtbl, "_mac_stats",
+ "net.macstats", min_size,
+ &pf->mac_stats_bar);
+ if (IS_ERR(pf->mac_stats_mem)) {
+ if (PTR_ERR(pf->mac_stats_mem) != -ENOENT) {
+ err = PTR_ERR(pf->mac_stats_mem);
+ goto err_unmap_ctrl;
+ }
+ pf->mac_stats_mem = NULL;
}
- pf->mac_stats_mem = NULL;
}
pf->vf_cfg_mem = nfp_net_pf_map_rtsym(pf, "net.vfcfg",
diff --git a/drivers/net/ethernet/ni/nixge.c b/drivers/net/ethernet/ni/nixge.c
index 27364b7572fc..b092894dd128 100644
--- a/drivers/net/ethernet/ni/nixge.c
+++ b/drivers/net/ethernet/ni/nixge.c
@@ -1170,7 +1170,7 @@ static void *nixge_get_nvmem_address(struct device *dev)
cell = nvmem_cell_get(dev, "address");
if (IS_ERR(cell))
- return cell;
+ return NULL;
mac = nvmem_cell_read(cell, &cell_size);
nvmem_cell_put(cell);
@@ -1183,7 +1183,7 @@ static int nixge_probe(struct platform_device *pdev)
struct nixge_priv *priv;
struct net_device *ndev;
struct resource *dmares;
- const char *mac_addr;
+ const u8 *mac_addr;
int err;
ndev = alloc_etherdev(sizeof(*priv));
@@ -1202,10 +1202,12 @@ static int nixge_probe(struct platform_device *pdev)
ndev->max_mtu = NIXGE_JUMBO_MTU;
mac_addr = nixge_get_nvmem_address(&pdev->dev);
- if (mac_addr && is_valid_ether_addr(mac_addr))
+ if (mac_addr && is_valid_ether_addr(mac_addr)) {
ether_addr_copy(ndev->dev_addr, mac_addr);
- else
+ kfree(mac_addr);
+ } else {
eth_hw_addr_random(ndev);
+ }
priv = netdev_priv(ndev);
priv->ndev = ndev;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_cxt.c b/drivers/net/ethernet/qlogic/qed/qed_cxt.c
index 00f41c145d4d..820b226d6ff8 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_cxt.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_cxt.c
@@ -77,7 +77,7 @@
#define ILT_CFG_REG(cli, reg) PSWRQ2_REG_ ## cli ## _ ## reg ## _RT_OFFSET
/* ILT entry structure */
-#define ILT_ENTRY_PHY_ADDR_MASK 0x000FFFFFFFFFFFULL
+#define ILT_ENTRY_PHY_ADDR_MASK (~0ULL >> 12)
#define ILT_ENTRY_PHY_ADDR_SHIFT 0
#define ILT_ENTRY_VALID_MASK 0x1ULL
#define ILT_ENTRY_VALID_SHIFT 52
diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c b/drivers/net/ethernet/qlogic/qed/qed_l2.c
index e874504e8b28..8667799d0069 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_l2.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c
@@ -115,8 +115,7 @@ int qed_l2_alloc(struct qed_hwfn *p_hwfn)
void qed_l2_setup(struct qed_hwfn *p_hwfn)
{
- if (p_hwfn->hw_info.personality != QED_PCI_ETH &&
- p_hwfn->hw_info.personality != QED_PCI_ETH_ROCE)
+ if (!QED_IS_L2_PERSONALITY(p_hwfn))
return;
mutex_init(&p_hwfn->p_l2_info->lock);
@@ -126,8 +125,7 @@ void qed_l2_free(struct qed_hwfn *p_hwfn)
{
u32 i;
- if (p_hwfn->hw_info.personality != QED_PCI_ETH &&
- p_hwfn->hw_info.personality != QED_PCI_ETH_ROCE)
+ if (!QED_IS_L2_PERSONALITY(p_hwfn))
return;
if (!p_hwfn->p_l2_info)
diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.c b/drivers/net/ethernet/qlogic/qed/qed_ll2.c
index 74fc626b1ec1..468c59d2e491 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_ll2.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.c
@@ -292,6 +292,7 @@ static void qed_ll2_txq_flush(struct qed_hwfn *p_hwfn, u8 connection_handle)
struct qed_ll2_tx_packet *p_pkt = NULL;
struct qed_ll2_info *p_ll2_conn;
struct qed_ll2_tx_queue *p_tx;
+ unsigned long flags = 0;
dma_addr_t tx_frag;
p_ll2_conn = qed_ll2_handle_sanity_inactive(p_hwfn, connection_handle);
@@ -300,6 +301,7 @@ static void qed_ll2_txq_flush(struct qed_hwfn *p_hwfn, u8 connection_handle)
p_tx = &p_ll2_conn->tx_queue;
+ spin_lock_irqsave(&p_tx->lock, flags);
while (!list_empty(&p_tx->active_descq)) {
p_pkt = list_first_entry(&p_tx->active_descq,
struct qed_ll2_tx_packet, list_entry);
@@ -309,6 +311,7 @@ static void qed_ll2_txq_flush(struct qed_hwfn *p_hwfn, u8 connection_handle)
list_del(&p_pkt->list_entry);
b_last_packet = list_empty(&p_tx->active_descq);
list_add_tail(&p_pkt->list_entry, &p_tx->free_descq);
+ spin_unlock_irqrestore(&p_tx->lock, flags);
if (p_ll2_conn->input.conn_type == QED_LL2_TYPE_OOO) {
struct qed_ooo_buffer *p_buffer;
@@ -328,7 +331,9 @@ static void qed_ll2_txq_flush(struct qed_hwfn *p_hwfn, u8 connection_handle)
b_last_frag,
b_last_packet);
}
+ spin_lock_irqsave(&p_tx->lock, flags);
}
+ spin_unlock_irqrestore(&p_tx->lock, flags);
}
static int qed_ll2_txq_completion(struct qed_hwfn *p_hwfn, void *p_cookie)
@@ -556,6 +561,7 @@ static void qed_ll2_rxq_flush(struct qed_hwfn *p_hwfn, u8 connection_handle)
struct qed_ll2_info *p_ll2_conn = NULL;
struct qed_ll2_rx_packet *p_pkt = NULL;
struct qed_ll2_rx_queue *p_rx;
+ unsigned long flags = 0;
p_ll2_conn = qed_ll2_handle_sanity_inactive(p_hwfn, connection_handle);
if (!p_ll2_conn)
@@ -563,13 +569,14 @@ static void qed_ll2_rxq_flush(struct qed_hwfn *p_hwfn, u8 connection_handle)
p_rx = &p_ll2_conn->rx_queue;
+ spin_lock_irqsave(&p_rx->lock, flags);
while (!list_empty(&p_rx->active_descq)) {
p_pkt = list_first_entry(&p_rx->active_descq,
struct qed_ll2_rx_packet, list_entry);
if (!p_pkt)
break;
-
list_move_tail(&p_pkt->list_entry, &p_rx->free_descq);
+ spin_unlock_irqrestore(&p_rx->lock, flags);
if (p_ll2_conn->input.conn_type == QED_LL2_TYPE_OOO) {
struct qed_ooo_buffer *p_buffer;
@@ -588,7 +595,30 @@ static void qed_ll2_rxq_flush(struct qed_hwfn *p_hwfn, u8 connection_handle)
cookie,
rx_buf_addr, b_last);
}
+ spin_lock_irqsave(&p_rx->lock, flags);
}
+ spin_unlock_irqrestore(&p_rx->lock, flags);
+}
+
+static bool
+qed_ll2_lb_rxq_handler_slowpath(struct qed_hwfn *p_hwfn,
+ struct core_rx_slow_path_cqe *p_cqe)
+{
+ struct ooo_opaque *iscsi_ooo;
+ u32 cid;
+
+ if (p_cqe->ramrod_cmd_id != CORE_RAMROD_RX_QUEUE_FLUSH)
+ return false;
+
+ iscsi_ooo = (struct ooo_opaque *)&p_cqe->opaque_data;
+ if (iscsi_ooo->ooo_opcode != TCP_EVENT_DELETE_ISLES)
+ return false;
+
+ /* Need to make a flush */
+ cid = le32_to_cpu(iscsi_ooo->cid);
+ qed_ooo_release_connection_isles(p_hwfn, p_hwfn->p_ooo_info, cid);
+
+ return true;
}
static int qed_ll2_lb_rxq_handler(struct qed_hwfn *p_hwfn,
@@ -617,6 +647,11 @@ static int qed_ll2_lb_rxq_handler(struct qed_hwfn *p_hwfn,
cq_old_idx = qed_chain_get_cons_idx(&p_rx->rcq_chain);
cqe_type = cqe->rx_cqe_sp.type;
+ if (cqe_type == CORE_RX_CQE_TYPE_SLOW_PATH)
+ if (qed_ll2_lb_rxq_handler_slowpath(p_hwfn,
+ &cqe->rx_cqe_sp))
+ continue;
+
if (cqe_type != CORE_RX_CQE_TYPE_REGULAR) {
DP_NOTICE(p_hwfn,
"Got a non-regular LB LL2 completion [type 0x%02x]\n",
@@ -794,6 +829,9 @@ static int qed_ll2_lb_rxq_completion(struct qed_hwfn *p_hwfn, void *p_cookie)
struct qed_ll2_info *p_ll2_conn = (struct qed_ll2_info *)p_cookie;
int rc;
+ if (!QED_LL2_RX_REGISTERED(p_ll2_conn))
+ return 0;
+
rc = qed_ll2_lb_rxq_handler(p_hwfn, p_ll2_conn);
if (rc)
return rc;
@@ -814,6 +852,9 @@ static int qed_ll2_lb_txq_completion(struct qed_hwfn *p_hwfn, void *p_cookie)
u16 new_idx = 0, num_bds = 0;
int rc;
+ if (!QED_LL2_TX_REGISTERED(p_ll2_conn))
+ return 0;
+
new_idx = le16_to_cpu(*p_tx->p_fw_cons);
num_bds = ((s16)new_idx - (s16)p_tx->bds_idx);
@@ -1867,17 +1908,25 @@ int qed_ll2_terminate_connection(void *cxt, u8 connection_handle)
/* Stop Tx & Rx of connection, if needed */
if (QED_LL2_TX_REGISTERED(p_ll2_conn)) {
+ p_ll2_conn->tx_queue.b_cb_registred = false;
+ smp_wmb(); /* Make sure this is seen by ll2_lb_rxq_completion */
rc = qed_sp_ll2_tx_queue_stop(p_hwfn, p_ll2_conn);
if (rc)
goto out;
+
qed_ll2_txq_flush(p_hwfn, connection_handle);
+ qed_int_unregister_cb(p_hwfn, p_ll2_conn->tx_queue.tx_sb_index);
}
if (QED_LL2_RX_REGISTERED(p_ll2_conn)) {
+ p_ll2_conn->rx_queue.b_cb_registred = false;
+ smp_wmb(); /* Make sure this is seen by ll2_lb_rxq_completion */
rc = qed_sp_ll2_rx_queue_stop(p_hwfn, p_ll2_conn);
if (rc)
goto out;
+
qed_ll2_rxq_flush(p_hwfn, connection_handle);
+ qed_int_unregister_cb(p_hwfn, p_ll2_conn->rx_queue.rx_sb_index);
}
if (p_ll2_conn->input.conn_type == QED_LL2_TYPE_OOO)
@@ -1925,16 +1974,6 @@ void qed_ll2_release_connection(void *cxt, u8 connection_handle)
if (!p_ll2_conn)
return;
- if (QED_LL2_RX_REGISTERED(p_ll2_conn)) {
- p_ll2_conn->rx_queue.b_cb_registred = false;
- qed_int_unregister_cb(p_hwfn, p_ll2_conn->rx_queue.rx_sb_index);
- }
-
- if (QED_LL2_TX_REGISTERED(p_ll2_conn)) {
- p_ll2_conn->tx_queue.b_cb_registred = false;
- qed_int_unregister_cb(p_hwfn, p_ll2_conn->tx_queue.tx_sb_index);
- }
-
kfree(p_ll2_conn->tx_queue.descq_mem);
qed_chain_free(p_hwfn->cdev, &p_ll2_conn->tx_queue.txq_chain);
@@ -2370,7 +2409,7 @@ static int qed_ll2_start_xmit(struct qed_dev *cdev, struct sk_buff *skb)
u8 flags = 0;
if (unlikely(skb->ip_summed != CHECKSUM_NONE)) {
- DP_INFO(cdev, "Cannot transmit a checksumed packet\n");
+ DP_INFO(cdev, "Cannot transmit a checksummed packet\n");
return -EINVAL;
}
diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c
index 9854aa9139af..7870ae2a6f7e 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_main.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_main.c
@@ -680,7 +680,7 @@ static int qed_nic_stop(struct qed_dev *cdev)
tasklet_disable(p_hwfn->sp_dpc);
p_hwfn->b_sp_dpc_enabled = false;
DP_VERBOSE(cdev, NETIF_MSG_IFDOWN,
- "Disabled sp taskelt [hwfn %d] at %p\n",
+ "Disabled sp tasklet [hwfn %d] at %p\n",
i, p_hwfn->sp_dpc);
}
}
diff --git a/drivers/net/ethernet/qlogic/qed/qed_roce.c b/drivers/net/ethernet/qlogic/qed/qed_roce.c
index fb7c2d1562ae..6acfd43c1a4f 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_roce.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_roce.c
@@ -848,7 +848,7 @@ int qed_roce_query_qp(struct qed_hwfn *p_hwfn,
if (!(qp->resp_offloaded)) {
DP_NOTICE(p_hwfn,
- "The responder's qp should be offloded before requester's\n");
+ "The responder's qp should be offloaded before requester's\n");
return -EINVAL;
}
diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c
index a01e7d6e5442..f6655e251bbd 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_main.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_main.c
@@ -1066,13 +1066,12 @@ static void __qede_remove(struct pci_dev *pdev, enum qede_remove_mode mode)
DP_INFO(edev, "Starting qede_remove\n");
+ qede_rdma_dev_remove(edev);
unregister_netdev(ndev);
cancel_delayed_work_sync(&edev->sp_task);
qede_ptp_disable(edev);
- qede_rdma_dev_remove(edev);
-
edev->ops->common->set_power_state(cdev, PCI_D0);
pci_set_drvdata(pdev, NULL);
diff --git a/drivers/net/ethernet/qlogic/qede/qede_rdma.c b/drivers/net/ethernet/qlogic/qede/qede_rdma.c
index 50b142fad6b8..1900bf7e67d1 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_rdma.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_rdma.c
@@ -238,7 +238,7 @@ qede_rdma_get_free_event_node(struct qede_dev *edev)
}
if (!found) {
- event_node = kzalloc(sizeof(*event_node), GFP_KERNEL);
+ event_node = kzalloc(sizeof(*event_node), GFP_ATOMIC);
if (!event_node) {
DP_NOTICE(edev,
"qedr: Could not allocate memory for rdma work\n");
diff --git a/drivers/net/ethernet/realtek/8139too.c b/drivers/net/ethernet/realtek/8139too.c
index d24b47b8e0b2..d118da5a10a2 100644
--- a/drivers/net/ethernet/realtek/8139too.c
+++ b/drivers/net/ethernet/realtek/8139too.c
@@ -2224,7 +2224,7 @@ static void rtl8139_poll_controller(struct net_device *dev)
struct rtl8139_private *tp = netdev_priv(dev);
const int irq = tp->pci_dev->irq;
- disable_irq(irq);
+ disable_irq_nosync(irq);
rtl8139_interrupt(irq, dev);
enable_irq(irq);
}
diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c
index 604ae78381ae..c7aac1fc99e8 100644
--- a/drivers/net/ethernet/realtek/r8169.c
+++ b/drivers/net/ethernet/realtek/r8169.c
@@ -4981,6 +4981,9 @@ static void rtl_pll_power_down(struct rtl8169_private *tp)
static void rtl_pll_power_up(struct rtl8169_private *tp)
{
rtl_generic_op(tp, tp->pll_power_ops.up);
+
+ /* give MAC/PHY some time to resume */
+ msleep(20);
}
static void rtl_init_pll_power_ops(struct rtl8169_private *tp)
diff --git a/drivers/net/ethernet/renesas/sh_eth.h b/drivers/net/ethernet/renesas/sh_eth.h
index a5b792ce2ae7..1bf930d4a1e5 100644
--- a/drivers/net/ethernet/renesas/sh_eth.h
+++ b/drivers/net/ethernet/renesas/sh_eth.h
@@ -163,7 +163,7 @@ enum {
};
/* Driver's parameters */
-#if defined(CONFIG_CPU_SH4) || defined(CONFIG_ARCH_SHMOBILE)
+#if defined(CONFIG_CPU_SH4) || defined(CONFIG_ARCH_RENESAS)
#define SH_ETH_RX_ALIGN 32
#else
#define SH_ETH_RX_ALIGN 2
diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c
index 63036d9bf3e6..d90a7b1f4088 100644
--- a/drivers/net/ethernet/sfc/ef10.c
+++ b/drivers/net/ethernet/sfc/ef10.c
@@ -4784,8 +4784,9 @@ expire:
* will set rule->filter_id to EFX_ARFS_FILTER_ID_PENDING, meaning that
* the rule is not removed by efx_rps_hash_del() below.
*/
- ret = efx_ef10_filter_remove_internal(efx, 1U << spec->priority,
- filter_idx, true) == 0;
+ if (ret)
+ ret = efx_ef10_filter_remove_internal(efx, 1U << spec->priority,
+ filter_idx, true) == 0;
/* While we can't safely dereference rule (we dropped the lock), we can
* still test it for NULL.
*/
diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c
index a4ebd8715494..661828e8fdcf 100644
--- a/drivers/net/ethernet/sfc/efx.c
+++ b/drivers/net/ethernet/sfc/efx.c
@@ -1289,9 +1289,8 @@ static int efx_init_io(struct efx_nic *efx)
pci_set_master(pci_dev);
- /* Set the PCI DMA mask. Try all possibilities from our
- * genuine mask down to 32 bits, because some architectures
- * (e.g. x86_64 with iommu_sac_force set) will allow 40 bit
+ /* Set the PCI DMA mask. Try all possibilities from our genuine mask
+ * down to 32 bits, because some architectures will allow 40 bit
* masks event though they reject 46 bit masks.
*/
while (dma_mask > 0x7fffffffUL) {
diff --git a/drivers/net/ethernet/sfc/falcon/efx.c b/drivers/net/ethernet/sfc/falcon/efx.c
index 3d6c91e96589..dd5530a4f8c8 100644
--- a/drivers/net/ethernet/sfc/falcon/efx.c
+++ b/drivers/net/ethernet/sfc/falcon/efx.c
@@ -1242,9 +1242,8 @@ static int ef4_init_io(struct ef4_nic *efx)
pci_set_master(pci_dev);
- /* Set the PCI DMA mask. Try all possibilities from our
- * genuine mask down to 32 bits, because some architectures
- * (e.g. x86_64 with iommu_sac_force set) will allow 40 bit
+ /* Set the PCI DMA mask. Try all possibilities from our genuine mask
+ * down to 32 bits, because some architectures will allow 40 bit
* masks event though they reject 46 bit masks.
*/
while (dma_mask > 0x7fffffffUL) {
diff --git a/drivers/net/ethernet/sfc/rx.c b/drivers/net/ethernet/sfc/rx.c
index 64a94f242027..d2e254f2f72b 100644
--- a/drivers/net/ethernet/sfc/rx.c
+++ b/drivers/net/ethernet/sfc/rx.c
@@ -839,6 +839,8 @@ static void efx_filter_rfs_work(struct work_struct *data)
int rc;
rc = efx->type->filter_insert(efx, &req->spec, true);
+ if (rc >= 0)
+ rc %= efx->type->max_rx_ip_filters;
if (efx->rps_hash_table) {
spin_lock_bh(&efx->rps_hash_lock);
rule = efx_rps_hash_find(efx, &req->spec);
diff --git a/drivers/net/ethernet/socionext/netsec.c b/drivers/net/ethernet/socionext/netsec.c
index f4c0b02ddad8..59fbf74dcada 100644
--- a/drivers/net/ethernet/socionext/netsec.c
+++ b/drivers/net/ethernet/socionext/netsec.c
@@ -1674,8 +1674,8 @@ static int netsec_probe(struct platform_device *pdev)
if (ret)
goto unreg_napi;
- if (dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)))
- dev_warn(&pdev->dev, "Failed to enable 64-bit DMA\n");
+ if (dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(40)))
+ dev_warn(&pdev->dev, "Failed to set DMA mask\n");
ret = register_netdev(ndev);
if (ret) {
diff --git a/drivers/net/ethernet/sun/niu.c b/drivers/net/ethernet/sun/niu.c
index f081de4f38d7..88c12474a0c3 100644
--- a/drivers/net/ethernet/sun/niu.c
+++ b/drivers/net/ethernet/sun/niu.c
@@ -3443,7 +3443,7 @@ static int niu_process_rx_pkt(struct napi_struct *napi, struct niu *np,
len = (val & RCR_ENTRY_L2_LEN) >>
RCR_ENTRY_L2_LEN_SHIFT;
- len -= ETH_FCS_LEN;
+ append_size = len + ETH_HLEN + ETH_FCS_LEN;
addr = (val & RCR_ENTRY_PKT_BUF_ADDR) <<
RCR_ENTRY_PKT_BUF_ADDR_SHIFT;
@@ -3453,7 +3453,6 @@ static int niu_process_rx_pkt(struct napi_struct *napi, struct niu *np,
RCR_ENTRY_PKTBUFSZ_SHIFT];
off = addr & ~PAGE_MASK;
- append_size = rcr_size;
if (num_rcr == 1) {
int ptype;
@@ -3466,7 +3465,7 @@ static int niu_process_rx_pkt(struct napi_struct *napi, struct niu *np,
else
skb_checksum_none_assert(skb);
} else if (!(val & RCR_ENTRY_MULTI))
- append_size = len - skb->len;
+ append_size = append_size - skb->len;
niu_rx_skb_append(skb, page, off, append_size, rcr_size);
if ((page->index + rp->rbr_block_size) - rcr_size == addr) {
diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index 74f828412055..28d893b93d30 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -1340,6 +1340,8 @@ static inline void cpsw_add_dual_emac_def_ale_entries(
cpsw_ale_add_ucast(cpsw->ale, priv->mac_addr,
HOST_PORT_NUM, ALE_VLAN |
ALE_SECURE, slave->port_vlan);
+ cpsw_ale_control_set(cpsw->ale, slave_port,
+ ALE_PORT_DROP_UNKNOWN_VLAN, 1);
}
static void soft_reset_slave(struct cpsw_slave *slave)
diff --git a/drivers/net/ethernet/ti/davinci_emac.c b/drivers/net/ethernet/ti/davinci_emac.c
index abceea802ea1..38828ab77eb9 100644
--- a/drivers/net/ethernet/ti/davinci_emac.c
+++ b/drivers/net/ethernet/ti/davinci_emac.c
@@ -1873,7 +1873,7 @@ static int davinci_emac_probe(struct platform_device *pdev)
if (IS_ERR(priv->txchan)) {
dev_err(&pdev->dev, "error initializing tx dma channel\n");
rc = PTR_ERR(priv->txchan);
- goto no_cpdma_chan;
+ goto err_free_dma;
}
priv->rxchan = cpdma_chan_create(priv->dma, EMAC_DEF_RX_CH,
@@ -1881,14 +1881,14 @@ static int davinci_emac_probe(struct platform_device *pdev)
if (IS_ERR(priv->rxchan)) {
dev_err(&pdev->dev, "error initializing rx dma channel\n");
rc = PTR_ERR(priv->rxchan);
- goto no_cpdma_chan;
+ goto err_free_txchan;
}
res = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
if (!res) {
dev_err(&pdev->dev, "error getting irq res\n");
rc = -ENOENT;
- goto no_cpdma_chan;
+ goto err_free_rxchan;
}
ndev->irq = res->start;
@@ -1914,7 +1914,7 @@ static int davinci_emac_probe(struct platform_device *pdev)
pm_runtime_put_noidle(&pdev->dev);
dev_err(&pdev->dev, "%s: failed to get_sync(%d)\n",
__func__, rc);
- goto no_cpdma_chan;
+ goto err_napi_del;
}
/* register the network device */
@@ -1924,7 +1924,7 @@ static int davinci_emac_probe(struct platform_device *pdev)
dev_err(&pdev->dev, "error in register_netdev\n");
rc = -ENODEV;
pm_runtime_put(&pdev->dev);
- goto no_cpdma_chan;
+ goto err_napi_del;
}
@@ -1937,11 +1937,13 @@ static int davinci_emac_probe(struct platform_device *pdev)
return 0;
-no_cpdma_chan:
- if (priv->txchan)
- cpdma_chan_destroy(priv->txchan);
- if (priv->rxchan)
- cpdma_chan_destroy(priv->rxchan);
+err_napi_del:
+ netif_napi_del(&priv->napi);
+err_free_rxchan:
+ cpdma_chan_destroy(priv->rxchan);
+err_free_txchan:
+ cpdma_chan_destroy(priv->txchan);
+err_free_dma:
cpdma_ctlr_destroy(priv->dma);
no_pdata:
if (of_phy_is_fixed_link(np))
diff --git a/drivers/net/hamradio/bpqether.c b/drivers/net/hamradio/bpqether.c
index dfabbae72efd..f347fd9c5b28 100644
--- a/drivers/net/hamradio/bpqether.c
+++ b/drivers/net/hamradio/bpqether.c
@@ -449,20 +449,6 @@ static const struct seq_operations bpq_seqops = {
.show = bpq_seq_show,
};
-static int bpq_info_open(struct inode *inode, struct file *file)
-{
- return seq_open(file, &bpq_seqops);
-}
-
-static const struct file_operations bpq_info_fops = {
- .owner = THIS_MODULE,
- .open = bpq_info_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
-
/* ------------------------------------------------------------------------ */
static const struct net_device_ops bpq_netdev_ops = {
@@ -590,7 +576,7 @@ static int bpq_device_event(struct notifier_block *this,
static int __init bpq_init_driver(void)
{
#ifdef CONFIG_PROC_FS
- if (!proc_create("bpqether", 0444, init_net.proc_net, &bpq_info_fops)) {
+ if (!proc_create_seq("bpqether", 0444, init_net.proc_net, &bpq_seqops)) {
printk(KERN_ERR
"bpq: cannot create /proc/net/bpqether entry.\n");
return -ENOENT;
diff --git a/drivers/net/hamradio/scc.c b/drivers/net/hamradio/scc.c
index 3de272959090..6c03932d8a6b 100644
--- a/drivers/net/hamradio/scc.c
+++ b/drivers/net/hamradio/scc.c
@@ -2084,21 +2084,6 @@ static const struct seq_operations scc_net_seq_ops = {
.stop = scc_net_seq_stop,
.show = scc_net_seq_show,
};
-
-
-static int scc_net_seq_open(struct inode *inode, struct file *file)
-{
- return seq_open(file, &scc_net_seq_ops);
-}
-
-static const struct file_operations scc_net_seq_fops = {
- .owner = THIS_MODULE,
- .open = scc_net_seq_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release_private,
-};
-
#endif /* CONFIG_PROC_FS */
@@ -2122,7 +2107,7 @@ static int __init scc_init_driver (void)
}
rtnl_unlock();
- proc_create("z8530drv", 0, init_net.proc_net, &scc_net_seq_fops);
+ proc_create_seq("z8530drv", 0, init_net.proc_net, &scc_net_seq_ops);
return 0;
}
diff --git a/drivers/net/hamradio/yam.c b/drivers/net/hamradio/yam.c
index 83034eb7ed4f..16ec7af6ab7b 100644
--- a/drivers/net/hamradio/yam.c
+++ b/drivers/net/hamradio/yam.c
@@ -841,20 +841,6 @@ static const struct seq_operations yam_seqops = {
.stop = yam_seq_stop,
.show = yam_seq_show,
};
-
-static int yam_info_open(struct inode *inode, struct file *file)
-{
- return seq_open(file, &yam_seqops);
-}
-
-static const struct file_operations yam_info_fops = {
- .owner = THIS_MODULE,
- .open = yam_info_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
#endif
@@ -1168,7 +1154,7 @@ static int __init yam_init_driver(void)
yam_timer.expires = jiffies + HZ / 100;
add_timer(&yam_timer);
- proc_create("yam", 0444, init_net.proc_net, &yam_info_fops);
+ proc_create_seq("yam", 0444, init_net.proc_net, &yam_seqops);
return 0;
error:
while (--i >= 0) {
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index ecc84954c511..da07ccdf84bf 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -1840,7 +1840,8 @@ static int netvsc_vf_join(struct net_device *vf_netdev,
goto rx_handler_failed;
}
- ret = netdev_upper_dev_link(vf_netdev, ndev, NULL);
+ ret = netdev_master_upper_dev_link(vf_netdev, ndev,
+ NULL, NULL, NULL);
if (ret != 0) {
netdev_err(vf_netdev,
"can not set master device %s (err = %d)\n",
diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c
index 6b127be781d9..e7ca5b5f39ed 100644
--- a/drivers/net/hyperv/rndis_filter.c
+++ b/drivers/net/hyperv/rndis_filter.c
@@ -1288,7 +1288,7 @@ struct netvsc_device *rndis_filter_device_add(struct hv_device *dev,
rndis_device->link_state ? "down" : "up");
if (net_device->nvsp_version < NVSP_PROTOCOL_VERSION_5)
- return net_device;
+ goto out;
rndis_filter_query_link_speed(rndis_device, net_device);
diff --git a/drivers/net/ieee802154/atusb.c b/drivers/net/ieee802154/atusb.c
index 9fb9b565a002..4f684cbcdc57 100644
--- a/drivers/net/ieee802154/atusb.c
+++ b/drivers/net/ieee802154/atusb.c
@@ -1045,7 +1045,7 @@ static int atusb_probe(struct usb_interface *interface,
atusb->tx_dr.bRequest = ATUSB_TX;
atusb->tx_dr.wValue = cpu_to_le16(0);
- atusb->tx_urb = usb_alloc_urb(0, GFP_ATOMIC);
+ atusb->tx_urb = usb_alloc_urb(0, GFP_KERNEL);
if (!atusb->tx_urb)
goto fail;
diff --git a/drivers/net/ieee802154/mcr20a.c b/drivers/net/ieee802154/mcr20a.c
index 55a22c761808..de0d7f28a181 100644
--- a/drivers/net/ieee802154/mcr20a.c
+++ b/drivers/net/ieee802154/mcr20a.c
@@ -1267,7 +1267,7 @@ mcr20a_probe(struct spi_device *spi)
ret = mcr20a_get_platform_data(spi, pdata);
if (ret < 0) {
dev_crit(&spi->dev, "mcr20a_get_platform_data failed.\n");
- return ret;
+ goto free_pdata;
}
/* init reset gpio */
@@ -1275,7 +1275,7 @@ mcr20a_probe(struct spi_device *spi)
ret = devm_gpio_request_one(&spi->dev, pdata->rst_gpio,
GPIOF_OUT_INIT_HIGH, "reset");
if (ret)
- return ret;
+ goto free_pdata;
}
/* reset mcr20a */
@@ -1291,7 +1291,8 @@ mcr20a_probe(struct spi_device *spi)
hw = ieee802154_alloc_hw(sizeof(*lp), &mcr20a_hw_ops);
if (!hw) {
dev_crit(&spi->dev, "ieee802154_alloc_hw failed\n");
- return -ENOMEM;
+ ret = -ENOMEM;
+ goto free_pdata;
}
/* init mcr20a local data */
@@ -1308,8 +1309,10 @@ mcr20a_probe(struct spi_device *spi)
/* init buf */
lp->buf = devm_kzalloc(&spi->dev, SPI_COMMAND_BUFFER, GFP_KERNEL);
- if (!lp->buf)
- return -ENOMEM;
+ if (!lp->buf) {
+ ret = -ENOMEM;
+ goto free_dev;
+ }
mcr20a_setup_tx_spi_messages(lp);
mcr20a_setup_rx_spi_messages(lp);
@@ -1366,6 +1369,8 @@ mcr20a_probe(struct spi_device *spi)
free_dev:
ieee802154_free_hw(lp->hw);
+free_pdata:
+ kfree(pdata);
return ret;
}
diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c
index 450eec264a5e..4377c26f714d 100644
--- a/drivers/net/ipvlan/ipvlan_main.c
+++ b/drivers/net/ipvlan/ipvlan_main.c
@@ -792,8 +792,10 @@ static int ipvlan_device_event(struct notifier_block *unused,
break;
case NETDEV_CHANGEADDR:
- list_for_each_entry(ipvlan, &port->ipvlans, pnode)
+ list_for_each_entry(ipvlan, &port->ipvlans, pnode) {
ether_addr_copy(ipvlan->dev->dev_addr, dev->dev_addr);
+ call_netdevice_notifiers(NETDEV_CHANGEADDR, ipvlan->dev);
+ }
break;
case NETDEV_PRE_TYPE_CHANGE:
diff --git a/drivers/net/phy/bcm-cygnus.c b/drivers/net/phy/bcm-cygnus.c
index 6838129839ca..e757b09f1889 100644
--- a/drivers/net/phy/bcm-cygnus.c
+++ b/drivers/net/phy/bcm-cygnus.c
@@ -61,17 +61,17 @@ static int bcm_cygnus_afe_config(struct phy_device *phydev)
return rc;
/* make rcal=100, since rdb default is 000 */
- rc = bcm_phy_write_exp(phydev, MII_BRCM_CORE_EXPB1, 0x10);
+ rc = bcm_phy_write_exp_sel(phydev, MII_BRCM_CORE_EXPB1, 0x10);
if (rc < 0)
return rc;
/* CORE_EXPB0, Reset R_CAL/RC_CAL Engine */
- rc = bcm_phy_write_exp(phydev, MII_BRCM_CORE_EXPB0, 0x10);
+ rc = bcm_phy_write_exp_sel(phydev, MII_BRCM_CORE_EXPB0, 0x10);
if (rc < 0)
return rc;
/* CORE_EXPB0, Disable Reset R_CAL/RC_CAL Engine */
- rc = bcm_phy_write_exp(phydev, MII_BRCM_CORE_EXPB0, 0x00);
+ rc = bcm_phy_write_exp_sel(phydev, MII_BRCM_CORE_EXPB0, 0x00);
return 0;
}
diff --git a/drivers/net/phy/bcm-phy-lib.c b/drivers/net/phy/bcm-phy-lib.c
index 5ad130c3da43..d5e0833d69b9 100644
--- a/drivers/net/phy/bcm-phy-lib.c
+++ b/drivers/net/phy/bcm-phy-lib.c
@@ -56,7 +56,7 @@ int bcm54xx_auxctl_read(struct phy_device *phydev, u16 regnum)
/* The register must be written to both the Shadow Register Select and
* the Shadow Read Register Selector
*/
- phy_write(phydev, MII_BCM54XX_AUX_CTL, regnum |
+ phy_write(phydev, MII_BCM54XX_AUX_CTL, MII_BCM54XX_AUXCTL_SHDWSEL_MASK |
regnum << MII_BCM54XX_AUXCTL_SHDWSEL_READ_SHIFT);
return phy_read(phydev, MII_BCM54XX_AUX_CTL);
}
diff --git a/drivers/net/phy/bcm-phy-lib.h b/drivers/net/phy/bcm-phy-lib.h
index 7c73808cbbde..81cceaa412fe 100644
--- a/drivers/net/phy/bcm-phy-lib.h
+++ b/drivers/net/phy/bcm-phy-lib.h
@@ -14,11 +14,18 @@
#ifndef _LINUX_BCM_PHY_LIB_H
#define _LINUX_BCM_PHY_LIB_H
+#include <linux/brcmphy.h>
#include <linux/phy.h>
int bcm_phy_write_exp(struct phy_device *phydev, u16 reg, u16 val);
int bcm_phy_read_exp(struct phy_device *phydev, u16 reg);
+static inline int bcm_phy_write_exp_sel(struct phy_device *phydev,
+ u16 reg, u16 val)
+{
+ return bcm_phy_write_exp(phydev, reg | MII_BCM54XX_EXP_SEL_ER, val);
+}
+
int bcm54xx_auxctl_write(struct phy_device *phydev, u16 regnum, u16 val);
int bcm54xx_auxctl_read(struct phy_device *phydev, u16 regnum);
diff --git a/drivers/net/phy/bcm7xxx.c b/drivers/net/phy/bcm7xxx.c
index 29b1c88b55cc..01d2ff2f6241 100644
--- a/drivers/net/phy/bcm7xxx.c
+++ b/drivers/net/phy/bcm7xxx.c
@@ -65,10 +65,10 @@ struct bcm7xxx_phy_priv {
static void r_rc_cal_reset(struct phy_device *phydev)
{
/* Reset R_CAL/RC_CAL Engine */
- bcm_phy_write_exp(phydev, 0x00b0, 0x0010);
+ bcm_phy_write_exp_sel(phydev, 0x00b0, 0x0010);
/* Disable Reset R_AL/RC_CAL Engine */
- bcm_phy_write_exp(phydev, 0x00b0, 0x0000);
+ bcm_phy_write_exp_sel(phydev, 0x00b0, 0x0000);
}
static int bcm7xxx_28nm_b0_afe_config_init(struct phy_device *phydev)
diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c
index 3bb6b66dc7bf..f9c25912eb98 100644
--- a/drivers/net/phy/broadcom.c
+++ b/drivers/net/phy/broadcom.c
@@ -720,6 +720,15 @@ static struct phy_driver broadcom_drivers[] = {
.get_strings = bcm_phy_get_strings,
.get_stats = bcm53xx_phy_get_stats,
.probe = bcm53xx_phy_probe,
+}, {
+ .phy_id = PHY_ID_BCM89610,
+ .phy_id_mask = 0xfffffff0,
+ .name = "Broadcom BCM89610",
+ .features = PHY_GBIT_FEATURES,
+ .flags = PHY_HAS_INTERRUPT,
+ .config_init = bcm54xx_config_init,
+ .ack_interrupt = bcm_phy_ack_intr,
+ .config_intr = bcm_phy_config_intr,
} };
module_phy_driver(broadcom_drivers);
@@ -741,6 +750,7 @@ static struct mdio_device_id __maybe_unused broadcom_tbl[] = {
{ PHY_ID_BCMAC131, 0xfffffff0 },
{ PHY_ID_BCM5241, 0xfffffff0 },
{ PHY_ID_BCM5395, 0xfffffff0 },
+ { PHY_ID_BCM89610, 0xfffffff0 },
{ }
};
diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c
index f41b224a9cdb..ab195f0916d6 100644
--- a/drivers/net/phy/micrel.c
+++ b/drivers/net/phy/micrel.c
@@ -573,9 +573,40 @@ static int ksz9031_config_init(struct phy_device *phydev)
ksz9031_of_load_skew_values(phydev, of_node,
MII_KSZ9031RN_TX_DATA_PAD_SKEW, 4,
tx_data_skews, 4);
+
+ /* Silicon Errata Sheet (DS80000691D or DS80000692D):
+ * When the device links in the 1000BASE-T slave mode only,
+ * the optional 125MHz reference output clock (CLK125_NDO)
+ * has wide duty cycle variation.
+ *
+ * The optional CLK125_NDO clock does not meet the RGMII
+ * 45/55 percent (min/max) duty cycle requirement and therefore
+ * cannot be used directly by the MAC side for clocking
+ * applications that have setup/hold time requirements on
+ * rising and falling clock edges.
+ *
+ * Workaround:
+ * Force the phy to be the master to receive a stable clock
+ * which meets the duty cycle requirement.
+ */
+ if (of_property_read_bool(of_node, "micrel,force-master")) {
+ result = phy_read(phydev, MII_CTRL1000);
+ if (result < 0)
+ goto err_force_master;
+
+ /* enable master mode, config & prefer master */
+ result |= CTL1000_ENABLE_MASTER | CTL1000_AS_MASTER;
+ result = phy_write(phydev, MII_CTRL1000, result);
+ if (result < 0)
+ goto err_force_master;
+ }
}
return ksz9031_center_flp_timing(phydev);
+
+err_force_master:
+ phydev_err(phydev, "failed to force the phy to master mode\n");
+ return result;
}
#define KSZ8873MLL_GLOBAL_CONTROL_4 0x06
diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index ac23322a32e1..9e4ba8e80a18 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -535,8 +535,17 @@ static int get_phy_id(struct mii_bus *bus, int addr, u32 *phy_id,
/* Grab the bits from PHYIR1, and put them in the upper half */
phy_reg = mdiobus_read(bus, addr, MII_PHYSID1);
- if (phy_reg < 0)
+ if (phy_reg < 0) {
+ /* if there is no device, return without an error so scanning
+ * the bus works properly
+ */
+ if (phy_reg == -EIO || phy_reg == -ENODEV) {
+ *phy_id = 0xffffffff;
+ return 0;
+ }
+
return -EIO;
+ }
*phy_id = (phy_reg & 0xffff) << 16;
diff --git a/drivers/net/phy/sfp-bus.c b/drivers/net/phy/sfp-bus.c
index 0381da78d228..fd6c23f69c2f 100644
--- a/drivers/net/phy/sfp-bus.c
+++ b/drivers/net/phy/sfp-bus.c
@@ -125,7 +125,7 @@ void sfp_parse_support(struct sfp_bus *bus, const struct sfp_eeprom_id *id,
if (id->base.br_nominal) {
if (id->base.br_nominal != 255) {
br_nom = id->base.br_nominal * 100;
- br_min = br_nom + id->base.br_nominal * id->ext.br_min;
+ br_min = br_nom - id->base.br_nominal * id->ext.br_min;
br_max = br_nom + id->base.br_nominal * id->ext.br_max;
} else if (id->ext.br_max) {
br_nom = 250 * id->ext.br_max;
diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c
index dc7c7ec43202..02ad03a2fab7 100644
--- a/drivers/net/ppp/ppp_generic.c
+++ b/drivers/net/ppp/ppp_generic.c
@@ -605,30 +605,13 @@ static long ppp_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
if (cmd == PPPIOCDETACH) {
/*
- * We have to be careful here... if the file descriptor
- * has been dup'd, we could have another process in the
- * middle of a poll using the same file *, so we had
- * better not free the interface data structures -
- * instead we fail the ioctl. Even in this case, we
- * shut down the interface if we are the owner of it.
- * Actually, we should get rid of PPPIOCDETACH, userland
- * (i.e. pppd) could achieve the same effect by closing
- * this fd and reopening /dev/ppp.
+ * PPPIOCDETACH is no longer supported as it was heavily broken,
+ * and is only known to have been used by pppd older than
+ * ppp-2.4.2 (released November 2003).
*/
+ pr_warn_once("%s (%d) used obsolete PPPIOCDETACH ioctl\n",
+ current->comm, current->pid);
err = -EINVAL;
- if (pf->kind == INTERFACE) {
- ppp = PF_TO_PPP(pf);
- rtnl_lock();
- if (file == ppp->owner)
- unregister_netdevice(ppp->dev);
- rtnl_unlock();
- }
- if (atomic_long_read(&file->f_count) < 2) {
- ppp_release(NULL, file);
- err = 0;
- } else
- pr_warn("PPPIOCDETACH file->f_count=%ld\n",
- atomic_long_read(&file->f_count));
goto out;
}
diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c
index 7df07337d69c..de51e8f70f44 100644
--- a/drivers/net/ppp/pppoe.c
+++ b/drivers/net/ppp/pppoe.c
@@ -1096,21 +1096,6 @@ static const struct seq_operations pppoe_seq_ops = {
.stop = pppoe_seq_stop,
.show = pppoe_seq_show,
};
-
-static int pppoe_seq_open(struct inode *inode, struct file *file)
-{
- return seq_open_net(inode, file, &pppoe_seq_ops,
- sizeof(struct seq_net_private));
-}
-
-static const struct file_operations pppoe_seq_fops = {
- .owner = THIS_MODULE,
- .open = pppoe_seq_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release_net,
-};
-
#endif /* CONFIG_PROC_FS */
static const struct proto_ops pppoe_ops = {
@@ -1122,7 +1107,7 @@ static const struct proto_ops pppoe_ops = {
.socketpair = sock_no_socketpair,
.accept = sock_no_accept,
.getname = pppoe_getname,
- .poll = datagram_poll,
+ .poll_mask = datagram_poll_mask,
.listen = sock_no_listen,
.shutdown = sock_no_shutdown,
.setsockopt = sock_no_setsockopt,
@@ -1146,7 +1131,8 @@ static __net_init int pppoe_init_net(struct net *net)
rwlock_init(&pn->hash_lock);
- pde = proc_create("pppoe", 0444, net->proc_net, &pppoe_seq_fops);
+ pde = proc_create_net("pppoe", 0444, net->proc_net,
+ &pppoe_seq_ops, sizeof(struct seq_net_private));
#ifdef CONFIG_PROC_FS
if (!pde)
return -ENOMEM;
diff --git a/drivers/net/ppp/pptp.c b/drivers/net/ppp/pptp.c
index c4267ecefd85..157b67c1bf8e 100644
--- a/drivers/net/ppp/pptp.c
+++ b/drivers/net/ppp/pptp.c
@@ -624,7 +624,6 @@ static const struct proto_ops pptp_ops = {
.socketpair = sock_no_socketpair,
.accept = sock_no_accept,
.getname = pptp_getname,
- .poll = sock_no_poll,
.listen = sock_no_listen,
.shutdown = sock_no_shutdown,
.setsockopt = sock_no_setsockopt,
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index ef33950a45d9..23e9eb66197f 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -681,15 +681,6 @@ static void tun_queue_purge(struct tun_file *tfile)
skb_queue_purge(&tfile->sk.sk_error_queue);
}
-static void tun_cleanup_tx_ring(struct tun_file *tfile)
-{
- if (tfile->tx_ring.queue) {
- ptr_ring_cleanup(&tfile->tx_ring, tun_ptr_free);
- xdp_rxq_info_unreg(&tfile->xdp_rxq);
- memset(&tfile->tx_ring, 0, sizeof(tfile->tx_ring));
- }
-}
-
static void __tun_detach(struct tun_file *tfile, bool clean)
{
struct tun_file *ntfile;
@@ -736,7 +727,9 @@ static void __tun_detach(struct tun_file *tfile, bool clean)
tun->dev->reg_state == NETREG_REGISTERED)
unregister_netdevice(tun->dev);
}
- tun_cleanup_tx_ring(tfile);
+ if (tun)
+ xdp_rxq_info_unreg(&tfile->xdp_rxq);
+ ptr_ring_cleanup(&tfile->tx_ring, tun_ptr_free);
sock_put(&tfile->sk);
}
}
@@ -783,14 +776,14 @@ static void tun_detach_all(struct net_device *dev)
tun_napi_del(tun, tfile);
/* Drop read queue */
tun_queue_purge(tfile);
+ xdp_rxq_info_unreg(&tfile->xdp_rxq);
sock_put(&tfile->sk);
- tun_cleanup_tx_ring(tfile);
}
list_for_each_entry_safe(tfile, tmp, &tun->disabled, next) {
tun_enable_queue(tfile);
tun_queue_purge(tfile);
+ xdp_rxq_info_unreg(&tfile->xdp_rxq);
sock_put(&tfile->sk);
- tun_cleanup_tx_ring(tfile);
}
BUG_ON(tun->numdisabled != 0);
@@ -834,7 +827,8 @@ static int tun_attach(struct tun_struct *tun, struct file *file,
}
if (!tfile->detached &&
- ptr_ring_init(&tfile->tx_ring, dev->tx_queue_len, GFP_KERNEL)) {
+ ptr_ring_resize(&tfile->tx_ring, dev->tx_queue_len,
+ GFP_KERNEL, tun_ptr_free)) {
err = -ENOMEM;
goto out;
}
@@ -1429,6 +1423,13 @@ static void tun_net_init(struct net_device *dev)
dev->max_mtu = MAX_MTU - dev->hard_header_len;
}
+static bool tun_sock_writeable(struct tun_struct *tun, struct tun_file *tfile)
+{
+ struct sock *sk = tfile->socket.sk;
+
+ return (tun->dev->flags & IFF_UP) && sock_writeable(sk);
+}
+
/* Character device part */
/* Poll */
@@ -1451,10 +1452,14 @@ static __poll_t tun_chr_poll(struct file *file, poll_table *wait)
if (!ptr_ring_empty(&tfile->tx_ring))
mask |= EPOLLIN | EPOLLRDNORM;
- if (tun->dev->flags & IFF_UP &&
- (sock_writeable(sk) ||
- (!test_and_set_bit(SOCKWQ_ASYNC_NOSPACE, &sk->sk_socket->flags) &&
- sock_writeable(sk))))
+ /* Make sure SOCKWQ_ASYNC_NOSPACE is set if not writable to
+ * guarantee EPOLLOUT to be raised by either here or
+ * tun_sock_write_space(). Then process could get notification
+ * after it writes to a down device and meets -EIO.
+ */
+ if (tun_sock_writeable(tun, tfile) ||
+ (!test_and_set_bit(SOCKWQ_ASYNC_NOSPACE, &sk->sk_socket->flags) &&
+ tun_sock_writeable(tun, tfile)))
mask |= EPOLLOUT | EPOLLWRNORM;
if (tun->dev->reg_state != NETREG_REGISTERED)
@@ -1645,7 +1650,7 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun,
else
*skb_xdp = 0;
- preempt_disable();
+ local_bh_disable();
rcu_read_lock();
xdp_prog = rcu_dereference(tun->xdp_prog);
if (xdp_prog && !*skb_xdp) {
@@ -1670,7 +1675,7 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun,
if (err)
goto err_redirect;
rcu_read_unlock();
- preempt_enable();
+ local_bh_enable();
return NULL;
case XDP_TX:
get_page(alloc_frag->page);
@@ -1679,7 +1684,7 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun,
goto err_redirect;
tun_xdp_flush(tun->dev);
rcu_read_unlock();
- preempt_enable();
+ local_bh_enable();
return NULL;
case XDP_PASS:
delta = orig_data - xdp.data;
@@ -1698,7 +1703,7 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun,
skb = build_skb(buf, buflen);
if (!skb) {
rcu_read_unlock();
- preempt_enable();
+ local_bh_enable();
return ERR_PTR(-ENOMEM);
}
@@ -1708,7 +1713,7 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun,
alloc_frag->offset += buflen;
rcu_read_unlock();
- preempt_enable();
+ local_bh_enable();
return skb;
@@ -1716,7 +1721,7 @@ err_redirect:
put_page(alloc_frag->page);
err_xdp:
rcu_read_unlock();
- preempt_enable();
+ local_bh_enable();
this_cpu_inc(tun->pcpu_stats->rx_dropped);
return NULL;
}
@@ -1912,16 +1917,19 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
struct bpf_prog *xdp_prog;
int ret;
+ local_bh_disable();
rcu_read_lock();
xdp_prog = rcu_dereference(tun->xdp_prog);
if (xdp_prog) {
ret = do_xdp_generic(xdp_prog, skb);
if (ret != XDP_PASS) {
rcu_read_unlock();
+ local_bh_enable();
return total_len;
}
}
rcu_read_unlock();
+ local_bh_enable();
}
rcu_read_lock();
@@ -3219,6 +3227,11 @@ static int tun_chr_open(struct inode *inode, struct file * file)
&tun_proto, 0);
if (!tfile)
return -ENOMEM;
+ if (ptr_ring_init(&tfile->tx_ring, 0, GFP_KERNEL)) {
+ sk_free(&tfile->sk);
+ return -ENOMEM;
+ }
+
RCU_INIT_POINTER(tfile->tun, NULL);
tfile->flags = 0;
tfile->ifindex = 0;
@@ -3239,8 +3252,6 @@ static int tun_chr_open(struct inode *inode, struct file * file)
sock_set_flag(&tfile->sk, SOCK_ZEROCOPY);
- memset(&tfile->tx_ring, 0, sizeof(tfile->tx_ring));
-
return 0;
}
diff --git a/drivers/net/usb/cdc_mbim.c b/drivers/net/usb/cdc_mbim.c
index 7220cd620717..0362acd5cdca 100644
--- a/drivers/net/usb/cdc_mbim.c
+++ b/drivers/net/usb/cdc_mbim.c
@@ -609,7 +609,7 @@ static const struct driver_info cdc_mbim_info_ndp_to_end = {
*/
static const struct driver_info cdc_mbim_info_avoid_altsetting_toggle = {
.description = "CDC MBIM",
- .flags = FLAG_NO_SETINT | FLAG_MULTI_PACKET | FLAG_WWAN,
+ .flags = FLAG_NO_SETINT | FLAG_MULTI_PACKET | FLAG_WWAN | FLAG_SEND_ZLP,
.bind = cdc_mbim_bind,
.unbind = cdc_mbim_unbind,
.manage_power = cdc_mbim_manage_power,
diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c
index c853e7410f5a..094680871687 100644
--- a/drivers/net/usb/qmi_wwan.c
+++ b/drivers/net/usb/qmi_wwan.c
@@ -1098,10 +1098,12 @@ static const struct usb_device_id products[] = {
{QMI_FIXED_INTF(0x05c6, 0x9080, 8)},
{QMI_FIXED_INTF(0x05c6, 0x9083, 3)},
{QMI_FIXED_INTF(0x05c6, 0x9084, 4)},
+ {QMI_FIXED_INTF(0x05c6, 0x90b2, 3)}, /* ublox R410M */
{QMI_FIXED_INTF(0x05c6, 0x920d, 0)},
{QMI_FIXED_INTF(0x05c6, 0x920d, 5)},
{QMI_QUIRK_SET_DTR(0x05c6, 0x9625, 4)}, /* YUGA CLM920-NC5 */
{QMI_FIXED_INTF(0x0846, 0x68a2, 8)},
+ {QMI_FIXED_INTF(0x0846, 0x68d3, 8)}, /* Netgear Aircard 779S */
{QMI_FIXED_INTF(0x12d1, 0x140c, 1)}, /* Huawei E173 */
{QMI_FIXED_INTF(0x12d1, 0x14ac, 1)}, /* Huawei E1820 */
{QMI_FIXED_INTF(0x1435, 0xd181, 3)}, /* Wistron NeWeb D18Q1 */
@@ -1343,6 +1345,18 @@ static int qmi_wwan_probe(struct usb_interface *intf,
id->driver_info = (unsigned long)&qmi_wwan_info;
}
+ /* There are devices where the same interface number can be
+ * configured as different functions. We should only bind to
+ * vendor specific functions when matching on interface number
+ */
+ if (id->match_flags & USB_DEVICE_ID_MATCH_INT_NUMBER &&
+ desc->bInterfaceClass != USB_CLASS_VENDOR_SPEC) {
+ dev_dbg(&intf->dev,
+ "Rejecting interface number match for class %02x\n",
+ desc->bInterfaceClass);
+ return -ENODEV;
+ }
+
/* Quectel EC20 quirk where we've QMI on interface 4 instead of 0 */
if (quectel_ec20_detected(intf) && desc->bInterfaceNumber == 0) {
dev_dbg(&intf->dev, "Quectel EC20 quirk, skipping interface 0\n");
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 770422e953f7..032e1ac10a30 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -707,6 +707,13 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
void *data;
u32 act;
+ /* Transient failure which in theory could occur if
+ * in-flight packets from before XDP was enabled reach
+ * the receive path after XDP is loaded.
+ */
+ if (unlikely(hdr->hdr.gso_type))
+ goto err_xdp;
+
/* This happens when rx buffer size is underestimated
* or headroom is not enough because of the buffer
* was refilled before XDP is set. This should only
@@ -727,14 +734,6 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
xdp_page = page;
}
- /* Transient failure which in theory could occur if
- * in-flight packets from before XDP was enabled reach
- * the receive path after XDP is loaded. In practice I
- * was not able to create this condition.
- */
- if (unlikely(hdr->hdr.gso_type))
- goto err_xdp;
-
/* Allow consuming headroom but reserve enough space to push
* the descriptor on if we get an XDP_TX return code.
*/
@@ -775,7 +774,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
}
*xdp_xmit = true;
if (unlikely(xdp_page != page))
- goto err_xdp;
+ put_page(page);
rcu_read_unlock();
goto xdp_xmit;
case XDP_REDIRECT:
@@ -787,7 +786,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
}
*xdp_xmit = true;
if (unlikely(xdp_page != page))
- goto err_xdp;
+ put_page(page);
rcu_read_unlock();
goto xdp_xmit;
default:
@@ -875,7 +874,7 @@ err_xdp:
rcu_read_unlock();
err_skb:
put_page(page);
- while (--num_buf) {
+ while (num_buf-- > 1) {
buf = virtqueue_get_buf(rq->vq, &len);
if (unlikely(!buf)) {
pr_debug("%s: rx error: %d buffers missing\n",
diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c
index 9ebe2a689966..27a9bb8c9611 100644
--- a/drivers/net/vmxnet3/vmxnet3_drv.c
+++ b/drivers/net/vmxnet3/vmxnet3_drv.c
@@ -369,6 +369,11 @@ vmxnet3_tq_tx_complete(struct vmxnet3_tx_queue *tq,
gdesc = tq->comp_ring.base + tq->comp_ring.next2proc;
while (VMXNET3_TCD_GET_GEN(&gdesc->tcd) == tq->comp_ring.gen) {
+ /* Prevent any &gdesc->tcd field from being (speculatively)
+ * read before (&gdesc->tcd)->gen is read.
+ */
+ dma_rmb();
+
completed += vmxnet3_unmap_pkt(VMXNET3_TCD_GET_TXIDX(
&gdesc->tcd), tq, adapter->pdev,
adapter);
@@ -1103,6 +1108,11 @@ vmxnet3_tq_xmit(struct sk_buff *skb, struct vmxnet3_tx_queue *tq,
gdesc->txd.tci = skb_vlan_tag_get(skb);
}
+ /* Ensure that the write to (&gdesc->txd)->gen will be observed after
+ * all other writes to &gdesc->txd.
+ */
+ dma_wmb();
+
/* finally flips the GEN bit of the SOP desc. */
gdesc->dword[2] = cpu_to_le32(le32_to_cpu(gdesc->dword[2]) ^
VMXNET3_TXD_GEN);
@@ -1298,6 +1308,12 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq,
*/
break;
}
+
+ /* Prevent any rcd field from being (speculatively) read before
+ * rcd->gen is read.
+ */
+ dma_rmb();
+
BUG_ON(rcd->rqID != rq->qid && rcd->rqID != rq->qid2 &&
rcd->rqID != rq->dataRingQid);
idx = rcd->rxdIdx;
@@ -1528,6 +1544,12 @@ rcd_done:
ring->next2comp = idx;
num_to_alloc = vmxnet3_cmd_ring_desc_avail(ring);
ring = rq->rx_ring + ring_idx;
+
+ /* Ensure that the writes to rxd->gen bits will be observed
+ * after all other writes to rxd objects.
+ */
+ dma_wmb();
+
while (num_to_alloc) {
vmxnet3_getRxDesc(rxd, &ring->base[ring->next2fill].rxd,
&rxCmdDesc);
@@ -2688,7 +2710,7 @@ vmxnet3_set_mac_addr(struct net_device *netdev, void *p)
/* ==================== initialization and cleanup routines ============ */
static int
-vmxnet3_alloc_pci_resources(struct vmxnet3_adapter *adapter, bool *dma64)
+vmxnet3_alloc_pci_resources(struct vmxnet3_adapter *adapter)
{
int err;
unsigned long mmio_start, mmio_len;
@@ -2700,30 +2722,12 @@ vmxnet3_alloc_pci_resources(struct vmxnet3_adapter *adapter, bool *dma64)
return err;
}
- if (pci_set_dma_mask(pdev, DMA_BIT_MASK(64)) == 0) {
- if (pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)) != 0) {
- dev_err(&pdev->dev,
- "pci_set_consistent_dma_mask failed\n");
- err = -EIO;
- goto err_set_mask;
- }
- *dma64 = true;
- } else {
- if (pci_set_dma_mask(pdev, DMA_BIT_MASK(32)) != 0) {
- dev_err(&pdev->dev,
- "pci_set_dma_mask failed\n");
- err = -EIO;
- goto err_set_mask;
- }
- *dma64 = false;
- }
-
err = pci_request_selected_regions(pdev, (1 << 2) - 1,
vmxnet3_driver_name);
if (err) {
dev_err(&pdev->dev,
"Failed to request region for adapter: error %d\n", err);
- goto err_set_mask;
+ goto err_enable_device;
}
pci_set_master(pdev);
@@ -2751,7 +2755,7 @@ err_bar1:
iounmap(adapter->hw_addr0);
err_ioremap:
pci_release_selected_regions(pdev, (1 << 2) - 1);
-err_set_mask:
+err_enable_device:
pci_disable_device(pdev);
return err;
}
@@ -3254,7 +3258,7 @@ vmxnet3_probe_device(struct pci_dev *pdev,
#endif
};
int err;
- bool dma64 = false; /* stupid gcc */
+ bool dma64;
u32 ver;
struct net_device *netdev;
struct vmxnet3_adapter *adapter;
@@ -3300,6 +3304,24 @@ vmxnet3_probe_device(struct pci_dev *pdev,
adapter->rx_ring_size = VMXNET3_DEF_RX_RING_SIZE;
adapter->rx_ring2_size = VMXNET3_DEF_RX_RING2_SIZE;
+ if (pci_set_dma_mask(pdev, DMA_BIT_MASK(64)) == 0) {
+ if (pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)) != 0) {
+ dev_err(&pdev->dev,
+ "pci_set_consistent_dma_mask failed\n");
+ err = -EIO;
+ goto err_set_mask;
+ }
+ dma64 = true;
+ } else {
+ if (pci_set_dma_mask(pdev, DMA_BIT_MASK(32)) != 0) {
+ dev_err(&pdev->dev,
+ "pci_set_dma_mask failed\n");
+ err = -EIO;
+ goto err_set_mask;
+ }
+ dma64 = false;
+ }
+
spin_lock_init(&adapter->cmd_lock);
adapter->adapter_pa = dma_map_single(&adapter->pdev->dev, adapter,
sizeof(struct vmxnet3_adapter),
@@ -3307,7 +3329,7 @@ vmxnet3_probe_device(struct pci_dev *pdev,
if (dma_mapping_error(&adapter->pdev->dev, adapter->adapter_pa)) {
dev_err(&pdev->dev, "Failed to map dma\n");
err = -EFAULT;
- goto err_dma_map;
+ goto err_set_mask;
}
adapter->shared = dma_alloc_coherent(
&adapter->pdev->dev,
@@ -3358,7 +3380,7 @@ vmxnet3_probe_device(struct pci_dev *pdev,
}
#endif /* VMXNET3_RSS */
- err = vmxnet3_alloc_pci_resources(adapter, &dma64);
+ err = vmxnet3_alloc_pci_resources(adapter);
if (err < 0)
goto err_alloc_pci;
@@ -3504,7 +3526,7 @@ err_alloc_queue_desc:
err_alloc_shared:
dma_unmap_single(&adapter->pdev->dev, adapter->adapter_pa,
sizeof(struct vmxnet3_adapter), PCI_DMA_TODEVICE);
-err_dma_map:
+err_set_mask:
free_netdev(netdev);
return err;
}
diff --git a/drivers/net/vmxnet3/vmxnet3_int.h b/drivers/net/vmxnet3/vmxnet3_int.h
index a3326463b71f..a2c554f8a61b 100644
--- a/drivers/net/vmxnet3/vmxnet3_int.h
+++ b/drivers/net/vmxnet3/vmxnet3_int.h
@@ -69,10 +69,12 @@
/*
* Version numbers
*/
-#define VMXNET3_DRIVER_VERSION_STRING "1.4.14.0-k"
+#define VMXNET3_DRIVER_VERSION_STRING "1.4.16.0-k"
-/* a 32-bit int, each byte encode a verion number in VMXNET3_DRIVER_VERSION */
-#define VMXNET3_DRIVER_VERSION_NUM 0x01040e00
+/* Each byte of this 32-bit integer encodes a version number in
+ * VMXNET3_DRIVER_VERSION_STRING.
+ */
+#define VMXNET3_DRIVER_VERSION_NUM 0x01041000
#if defined(CONFIG_PCI_MSI)
/* RSS only makes sense if MSI-X is supported. */
diff --git a/drivers/net/wireless/atmel/atmel.c b/drivers/net/wireless/atmel/atmel.c
index d122386c382b..b01dc34d55af 100644
--- a/drivers/net/wireless/atmel/atmel.c
+++ b/drivers/net/wireless/atmel/atmel.c
@@ -1482,18 +1482,6 @@ static int atmel_proc_show(struct seq_file *m, void *v)
return 0;
}
-static int atmel_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, atmel_proc_show, PDE_DATA(inode));
-}
-
-static const struct file_operations atmel_proc_fops = {
- .open = atmel_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
static const struct net_device_ops atmel_netdev_ops = {
.ndo_open = atmel_open,
.ndo_stop = atmel_close,
@@ -1614,7 +1602,8 @@ struct net_device *init_atmel_card(unsigned short irq, unsigned long port,
netif_carrier_off(dev);
- if (!proc_create_data("driver/atmel", 0, NULL, &atmel_proc_fops, priv))
+ if (!proc_create_single_data("driver/atmel", 0, NULL, atmel_proc_show,
+ priv))
printk(KERN_WARNING "atmel: unable to create /proc entry.\n");
printk(KERN_INFO "%s: Atmel at76c50x. Version %d.%d. MAC %pM\n",
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.c
index 9277f4c2bfeb..94e177d7c9b5 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.c
@@ -459,7 +459,7 @@ static void brcmf_fw_free_request(struct brcmf_fw_request *req)
kfree(req);
}
-static void brcmf_fw_request_nvram_done(const struct firmware *fw, void *ctx)
+static int brcmf_fw_request_nvram_done(const struct firmware *fw, void *ctx)
{
struct brcmf_fw *fwctx = ctx;
struct brcmf_fw_item *cur;
@@ -498,13 +498,10 @@ static void brcmf_fw_request_nvram_done(const struct firmware *fw, void *ctx)
brcmf_dbg(TRACE, "nvram %p len %d\n", nvram, nvram_length);
cur->nv_data.data = nvram;
cur->nv_data.len = nvram_length;
- return;
+ return 0;
fail:
- brcmf_dbg(TRACE, "failed: dev=%s\n", dev_name(fwctx->dev));
- fwctx->done(fwctx->dev, -ENOENT, NULL);
- brcmf_fw_free_request(fwctx->req);
- kfree(fwctx);
+ return -ENOENT;
}
static int brcmf_fw_request_next_item(struct brcmf_fw *fwctx, bool async)
@@ -553,20 +550,27 @@ static void brcmf_fw_request_done(const struct firmware *fw, void *ctx)
brcmf_dbg(TRACE, "enter: firmware %s %sfound\n", cur->path,
fw ? "" : "not ");
- if (fw) {
- if (cur->type == BRCMF_FW_TYPE_BINARY)
- cur->binary = fw;
- else if (cur->type == BRCMF_FW_TYPE_NVRAM)
- brcmf_fw_request_nvram_done(fw, fwctx);
- else
- release_firmware(fw);
- } else if (cur->type == BRCMF_FW_TYPE_NVRAM) {
- brcmf_fw_request_nvram_done(NULL, fwctx);
- } else if (!(cur->flags & BRCMF_FW_REQF_OPTIONAL)) {
+ if (!fw)
ret = -ENOENT;
+
+ switch (cur->type) {
+ case BRCMF_FW_TYPE_NVRAM:
+ ret = brcmf_fw_request_nvram_done(fw, fwctx);
+ break;
+ case BRCMF_FW_TYPE_BINARY:
+ cur->binary = fw;
+ break;
+ default:
+ /* something fishy here so bail out early */
+ brcmf_err("unknown fw type: %d\n", cur->type);
+ release_firmware(fw);
+ ret = -EINVAL;
goto fail;
}
+ if (ret < 0 && !(cur->flags & BRCMF_FW_REQF_OPTIONAL))
+ goto fail;
+
do {
if (++fwctx->curpos == fwctx->req->n_items) {
ret = 0;
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/scan.h b/drivers/net/wireless/intel/iwlwifi/fw/api/scan.h
index 7af3a0f51b77..a17c4a79b8d4 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/api/scan.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/api/scan.h
@@ -8,6 +8,7 @@
* Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
* Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
* Copyright(c) 2016 - 2017 Intel Deutschland GmbH
+ * Copyright(c) 2018 Intel Corporation
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
@@ -30,7 +31,7 @@
* Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
* Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
* Copyright(c) 2016 - 2017 Intel Deutschland GmbH
- * Copyright(c) 2018 Intel Corporation
+ * Copyright(c) 2018 Intel Corporation
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -749,13 +750,9 @@ struct iwl_scan_req_umac {
} __packed;
#define IWL_SCAN_REQ_UMAC_SIZE_V8 sizeof(struct iwl_scan_req_umac)
-#define IWL_SCAN_REQ_UMAC_SIZE_V7 (sizeof(struct iwl_scan_req_umac) - \
- 4 * sizeof(u8))
-#define IWL_SCAN_REQ_UMAC_SIZE_V6 (sizeof(struct iwl_scan_req_umac) - \
- 2 * sizeof(u8) - sizeof(__le16))
-#define IWL_SCAN_REQ_UMAC_SIZE_V1 (sizeof(struct iwl_scan_req_umac) - \
- 2 * sizeof(__le32) - 2 * sizeof(u8) - \
- sizeof(__le16))
+#define IWL_SCAN_REQ_UMAC_SIZE_V7 48
+#define IWL_SCAN_REQ_UMAC_SIZE_V6 44
+#define IWL_SCAN_REQ_UMAC_SIZE_V1 36
/**
* struct iwl_umac_scan_abort
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c
index 8928613e033e..ca0174680af9 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c
@@ -76,6 +76,7 @@
#include "iwl-io.h"
#include "iwl-csr.h"
#include "fw/acpi.h"
+#include "fw/api/nvm-reg.h"
/* NVM offsets (in words) definitions */
enum nvm_offsets {
@@ -146,8 +147,8 @@ static const u8 iwl_ext_nvm_channels[] = {
149, 153, 157, 161, 165, 169, 173, 177, 181
};
-#define IWL_NUM_CHANNELS ARRAY_SIZE(iwl_nvm_channels)
-#define IWL_NUM_CHANNELS_EXT ARRAY_SIZE(iwl_ext_nvm_channels)
+#define IWL_NVM_NUM_CHANNELS ARRAY_SIZE(iwl_nvm_channels)
+#define IWL_NVM_NUM_CHANNELS_EXT ARRAY_SIZE(iwl_ext_nvm_channels)
#define NUM_2GHZ_CHANNELS 14
#define NUM_2GHZ_CHANNELS_EXT 14
#define FIRST_2GHZ_HT_MINUS 5
@@ -301,11 +302,11 @@ static int iwl_init_channel_map(struct device *dev, const struct iwl_cfg *cfg,
const u8 *nvm_chan;
if (cfg->nvm_type != IWL_NVM_EXT) {
- num_of_ch = IWL_NUM_CHANNELS;
+ num_of_ch = IWL_NVM_NUM_CHANNELS;
nvm_chan = &iwl_nvm_channels[0];
num_2ghz_channels = NUM_2GHZ_CHANNELS;
} else {
- num_of_ch = IWL_NUM_CHANNELS_EXT;
+ num_of_ch = IWL_NVM_NUM_CHANNELS_EXT;
nvm_chan = &iwl_ext_nvm_channels[0];
num_2ghz_channels = NUM_2GHZ_CHANNELS_EXT;
}
@@ -720,12 +721,12 @@ iwl_parse_nvm_data(struct iwl_trans *trans, const struct iwl_cfg *cfg,
if (cfg->nvm_type != IWL_NVM_EXT)
data = kzalloc(sizeof(*data) +
sizeof(struct ieee80211_channel) *
- IWL_NUM_CHANNELS,
+ IWL_NVM_NUM_CHANNELS,
GFP_KERNEL);
else
data = kzalloc(sizeof(*data) +
sizeof(struct ieee80211_channel) *
- IWL_NUM_CHANNELS_EXT,
+ IWL_NVM_NUM_CHANNELS_EXT,
GFP_KERNEL);
if (!data)
return NULL;
@@ -842,24 +843,34 @@ static u32 iwl_nvm_get_regdom_bw_flags(const u8 *nvm_chan,
return flags;
}
+struct regdb_ptrs {
+ struct ieee80211_wmm_rule *rule;
+ u32 token;
+};
+
struct ieee80211_regdomain *
iwl_parse_nvm_mcc_info(struct device *dev, const struct iwl_cfg *cfg,
- int num_of_ch, __le32 *channels, u16 fw_mcc)
+ int num_of_ch, __le32 *channels, u16 fw_mcc,
+ u16 geo_info)
{
int ch_idx;
u16 ch_flags;
u32 reg_rule_flags, prev_reg_rule_flags = 0;
const u8 *nvm_chan = cfg->nvm_type == IWL_NVM_EXT ?
iwl_ext_nvm_channels : iwl_nvm_channels;
- struct ieee80211_regdomain *regd;
- int size_of_regd;
+ struct ieee80211_regdomain *regd, *copy_rd;
+ int size_of_regd, regd_to_copy, wmms_to_copy;
+ int size_of_wmms = 0;
struct ieee80211_reg_rule *rule;
+ struct ieee80211_wmm_rule *wmm_rule, *d_wmm, *s_wmm;
+ struct regdb_ptrs *regdb_ptrs;
enum nl80211_band band;
int center_freq, prev_center_freq = 0;
- int valid_rules = 0;
+ int valid_rules = 0, n_wmms = 0;
+ int i;
bool new_rule;
int max_num_ch = cfg->nvm_type == IWL_NVM_EXT ?
- IWL_NUM_CHANNELS_EXT : IWL_NUM_CHANNELS;
+ IWL_NVM_NUM_CHANNELS_EXT : IWL_NVM_NUM_CHANNELS;
if (WARN_ON_ONCE(num_of_ch > NL80211_MAX_SUPP_REG_RULES))
return ERR_PTR(-EINVAL);
@@ -875,10 +886,26 @@ iwl_parse_nvm_mcc_info(struct device *dev, const struct iwl_cfg *cfg,
sizeof(struct ieee80211_regdomain) +
num_of_ch * sizeof(struct ieee80211_reg_rule);
- regd = kzalloc(size_of_regd, GFP_KERNEL);
+ if (geo_info & GEO_WMM_ETSI_5GHZ_INFO)
+ size_of_wmms =
+ num_of_ch * sizeof(struct ieee80211_wmm_rule);
+
+ regd = kzalloc(size_of_regd + size_of_wmms, GFP_KERNEL);
if (!regd)
return ERR_PTR(-ENOMEM);
+ regdb_ptrs = kcalloc(num_of_ch, sizeof(*regdb_ptrs), GFP_KERNEL);
+ if (!regdb_ptrs) {
+ copy_rd = ERR_PTR(-ENOMEM);
+ goto out;
+ }
+
+ /* set alpha2 from FW. */
+ regd->alpha2[0] = fw_mcc >> 8;
+ regd->alpha2[1] = fw_mcc & 0xff;
+
+ wmm_rule = (struct ieee80211_wmm_rule *)((u8 *)regd + size_of_regd);
+
for (ch_idx = 0; ch_idx < num_of_ch; ch_idx++) {
ch_flags = (u16)__le32_to_cpup(channels + ch_idx);
band = (ch_idx < NUM_2GHZ_CHANNELS) ?
@@ -927,14 +954,66 @@ iwl_parse_nvm_mcc_info(struct device *dev, const struct iwl_cfg *cfg,
iwl_nvm_print_channel_flags(dev, IWL_DL_LAR,
nvm_chan[ch_idx], ch_flags);
+
+ if (!(geo_info & GEO_WMM_ETSI_5GHZ_INFO) ||
+ band == NL80211_BAND_2GHZ)
+ continue;
+
+ if (!reg_query_regdb_wmm(regd->alpha2, center_freq,
+ &regdb_ptrs[n_wmms].token, wmm_rule)) {
+ /* Add only new rules */
+ for (i = 0; i < n_wmms; i++) {
+ if (regdb_ptrs[i].token ==
+ regdb_ptrs[n_wmms].token) {
+ rule->wmm_rule = regdb_ptrs[i].rule;
+ break;
+ }
+ }
+ if (i == n_wmms) {
+ rule->wmm_rule = wmm_rule;
+ regdb_ptrs[n_wmms++].rule = wmm_rule;
+ wmm_rule++;
+ }
+ }
}
regd->n_reg_rules = valid_rules;
+ regd->n_wmm_rules = n_wmms;
- /* set alpha2 from FW. */
- regd->alpha2[0] = fw_mcc >> 8;
- regd->alpha2[1] = fw_mcc & 0xff;
+ /*
+ * Narrow down regdom for unused regulatory rules to prevent hole
+ * between reg rules to wmm rules.
+ */
+ regd_to_copy = sizeof(struct ieee80211_regdomain) +
+ valid_rules * sizeof(struct ieee80211_reg_rule);
+
+ wmms_to_copy = sizeof(struct ieee80211_wmm_rule) * n_wmms;
+
+ copy_rd = kzalloc(regd_to_copy + wmms_to_copy, GFP_KERNEL);
+ if (!copy_rd) {
+ copy_rd = ERR_PTR(-ENOMEM);
+ goto out;
+ }
+
+ memcpy(copy_rd, regd, regd_to_copy);
+ memcpy((u8 *)copy_rd + regd_to_copy, (u8 *)regd + size_of_regd,
+ wmms_to_copy);
+
+ d_wmm = (struct ieee80211_wmm_rule *)((u8 *)copy_rd + regd_to_copy);
+ s_wmm = (struct ieee80211_wmm_rule *)((u8 *)regd + size_of_regd);
+
+ for (i = 0; i < regd->n_reg_rules; i++) {
+ if (!regd->reg_rules[i].wmm_rule)
+ continue;
+
+ copy_rd->reg_rules[i].wmm_rule = d_wmm +
+ (regd->reg_rules[i].wmm_rule - s_wmm) /
+ sizeof(struct ieee80211_wmm_rule);
+ }
- return regd;
+out:
+ kfree(regdb_ptrs);
+ kfree(regd);
+ return copy_rd;
}
IWL_EXPORT_SYMBOL(iwl_parse_nvm_mcc_info);
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.h b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.h
index 306736c7a042..3071a23b7606 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.h
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.h
@@ -101,12 +101,14 @@ void iwl_init_sbands(struct device *dev, const struct iwl_cfg *cfg,
*
* This function parses the regulatory channel data received as a
* MCC_UPDATE_CMD command. It returns a newly allocation regulatory domain,
- * to be fed into the regulatory core. An ERR_PTR is returned on error.
+ * to be fed into the regulatory core. In case the geo_info is set handle
+ * accordingly. An ERR_PTR is returned on error.
* If not given to the regulatory core, the user is responsible for freeing
* the regdomain returned here with kfree.
*/
struct ieee80211_regdomain *
iwl_parse_nvm_mcc_info(struct device *dev, const struct iwl_cfg *cfg,
- int num_of_ch, __le32 *channels, u16 fw_mcc);
+ int num_of_ch, __le32 *channels, u16 fw_mcc,
+ u16 geo_info);
#endif /* __iwl_nvm_parse_h__ */
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
index 51b30424575b..90f8c89ea59c 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
@@ -311,7 +311,8 @@ struct ieee80211_regdomain *iwl_mvm_get_regdomain(struct wiphy *wiphy,
regd = iwl_parse_nvm_mcc_info(mvm->trans->dev, mvm->cfg,
__le32_to_cpu(resp->n_channels),
resp->channels,
- __le16_to_cpu(resp->mcc));
+ __le16_to_cpu(resp->mcc),
+ __le16_to_cpu(resp->geo_info));
/* Store the return source id */
src_id = resp->source_id;
kfree(resp);
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
index f8a0234d332c..5517ea4c2aa0 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
@@ -1590,14 +1590,13 @@ static void iwl_pcie_set_interrupt_capa(struct pci_dev *pdev,
struct iwl_trans *trans)
{
struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
- int max_irqs, num_irqs, i, ret, nr_online_cpus;
+ int max_irqs, num_irqs, i, ret;
u16 pci_cmd;
if (!trans->cfg->mq_rx_supported)
goto enable_msi;
- nr_online_cpus = num_online_cpus();
- max_irqs = min_t(u32, nr_online_cpus + 2, IWL_MAX_RX_HW_QUEUES);
+ max_irqs = min_t(u32, num_online_cpus() + 2, IWL_MAX_RX_HW_QUEUES);
for (i = 0; i < max_irqs; i++)
trans_pcie->msix_entries[i].entry = i;
@@ -1623,16 +1622,17 @@ static void iwl_pcie_set_interrupt_capa(struct pci_dev *pdev,
* Two interrupts less: non rx causes shared with FBQ and RSS.
* More than two interrupts: we will use fewer RSS queues.
*/
- if (num_irqs <= nr_online_cpus) {
+ if (num_irqs <= max_irqs - 2) {
trans_pcie->trans->num_rx_queues = num_irqs + 1;
trans_pcie->shared_vec_mask = IWL_SHARED_IRQ_NON_RX |
IWL_SHARED_IRQ_FIRST_RSS;
- } else if (num_irqs == nr_online_cpus + 1) {
+ } else if (num_irqs == max_irqs - 1) {
trans_pcie->trans->num_rx_queues = num_irqs;
trans_pcie->shared_vec_mask = IWL_SHARED_IRQ_NON_RX;
} else {
trans_pcie->trans->num_rx_queues = num_irqs - 1;
}
+ WARN_ON(trans_pcie->trans->num_rx_queues > IWL_MAX_RX_HW_QUEUES);
trans_pcie->alloc_vecs = num_irqs;
trans_pcie->msix_enabled = true;
diff --git a/drivers/net/wireless/intersil/hostap/hostap_ap.c b/drivers/net/wireless/intersil/hostap/hostap_ap.c
index b4dfe1893d18..d1884b8913e7 100644
--- a/drivers/net/wireless/intersil/hostap/hostap_ap.c
+++ b/drivers/net/wireless/intersil/hostap/hostap_ap.c
@@ -69,7 +69,7 @@ static void prism2_send_mgmt(struct net_device *dev,
#ifndef PRISM2_NO_PROCFS_DEBUG
static int ap_debug_proc_show(struct seq_file *m, void *v)
{
- struct ap_data *ap = m->private;
+ struct ap_data *ap = PDE_DATA(file_inode(m->file));
seq_printf(m, "BridgedUnicastFrames=%u\n", ap->bridged_unicast);
seq_printf(m, "BridgedMulticastFrames=%u\n", ap->bridged_multicast);
@@ -81,18 +81,6 @@ static int ap_debug_proc_show(struct seq_file *m, void *v)
seq_printf(m, "tx_drop_nonassoc=%u\n", ap->tx_drop_nonassoc);
return 0;
}
-
-static int ap_debug_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, ap_debug_proc_show, PDE_DATA(inode));
-}
-
-static const struct file_operations ap_debug_proc_fops = {
- .open = ap_debug_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
#endif /* PRISM2_NO_PROCFS_DEBUG */
@@ -333,7 +321,7 @@ void hostap_deauth_all_stas(struct net_device *dev, struct ap_data *ap,
static int ap_control_proc_show(struct seq_file *m, void *v)
{
- struct ap_data *ap = m->private;
+ struct ap_data *ap = PDE_DATA(file_inode(m->file));
char *policy_txt;
struct mac_entry *entry;
@@ -365,20 +353,20 @@ static int ap_control_proc_show(struct seq_file *m, void *v)
static void *ap_control_proc_start(struct seq_file *m, loff_t *_pos)
{
- struct ap_data *ap = m->private;
+ struct ap_data *ap = PDE_DATA(file_inode(m->file));
spin_lock_bh(&ap->mac_restrictions.lock);
return seq_list_start_head(&ap->mac_restrictions.mac_list, *_pos);
}
static void *ap_control_proc_next(struct seq_file *m, void *v, loff_t *_pos)
{
- struct ap_data *ap = m->private;
+ struct ap_data *ap = PDE_DATA(file_inode(m->file));
return seq_list_next(v, &ap->mac_restrictions.mac_list, _pos);
}
static void ap_control_proc_stop(struct seq_file *m, void *v)
{
- struct ap_data *ap = m->private;
+ struct ap_data *ap = PDE_DATA(file_inode(m->file));
spin_unlock_bh(&ap->mac_restrictions.lock);
}
@@ -389,24 +377,6 @@ static const struct seq_operations ap_control_proc_seqops = {
.show = ap_control_proc_show,
};
-static int ap_control_proc_open(struct inode *inode, struct file *file)
-{
- int ret = seq_open(file, &ap_control_proc_seqops);
- if (ret == 0) {
- struct seq_file *m = file->private_data;
- m->private = PDE_DATA(inode);
- }
- return ret;
-}
-
-static const struct file_operations ap_control_proc_fops = {
- .open = ap_control_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
-
int ap_control_add_mac(struct mac_restrictions *mac_restrictions, u8 *mac)
{
struct mac_entry *entry;
@@ -585,20 +555,20 @@ static int prism2_ap_proc_show(struct seq_file *m, void *v)
static void *prism2_ap_proc_start(struct seq_file *m, loff_t *_pos)
{
- struct ap_data *ap = m->private;
+ struct ap_data *ap = PDE_DATA(file_inode(m->file));
spin_lock_bh(&ap->sta_table_lock);
return seq_list_start_head(&ap->sta_list, *_pos);
}
static void *prism2_ap_proc_next(struct seq_file *m, void *v, loff_t *_pos)
{
- struct ap_data *ap = m->private;
+ struct ap_data *ap = PDE_DATA(file_inode(m->file));
return seq_list_next(v, &ap->sta_list, _pos);
}
static void prism2_ap_proc_stop(struct seq_file *m, void *v)
{
- struct ap_data *ap = m->private;
+ struct ap_data *ap = PDE_DATA(file_inode(m->file));
spin_unlock_bh(&ap->sta_table_lock);
}
@@ -608,23 +578,6 @@ static const struct seq_operations prism2_ap_proc_seqops = {
.stop = prism2_ap_proc_stop,
.show = prism2_ap_proc_show,
};
-
-static int prism2_ap_proc_open(struct inode *inode, struct file *file)
-{
- int ret = seq_open(file, &prism2_ap_proc_seqops);
- if (ret == 0) {
- struct seq_file *m = file->private_data;
- m->private = PDE_DATA(inode);
- }
- return ret;
-}
-
-static const struct file_operations prism2_ap_proc_fops = {
- .open = prism2_ap_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
#endif /* PRISM2_NO_KERNEL_IEEE80211_MGMT */
@@ -896,12 +849,13 @@ void hostap_init_ap_proc(local_info_t *local)
return;
#ifndef PRISM2_NO_PROCFS_DEBUG
- proc_create_data("ap_debug", 0, ap->proc, &ap_debug_proc_fops, ap);
+ proc_create_single_data("ap_debug", 0, ap->proc, ap_debug_proc_show, ap);
#endif /* PRISM2_NO_PROCFS_DEBUG */
#ifndef PRISM2_NO_KERNEL_IEEE80211_MGMT
- proc_create_data("ap_control", 0, ap->proc, &ap_control_proc_fops, ap);
- proc_create_data("ap", 0, ap->proc, &prism2_ap_proc_fops, ap);
+ proc_create_seq_data("ap_control", 0, ap->proc, &ap_control_proc_seqops,
+ ap);
+ proc_create_seq_data("ap", 0, ap->proc, &prism2_ap_proc_seqops, ap);
#endif /* PRISM2_NO_KERNEL_IEEE80211_MGMT */
}
@@ -1106,18 +1060,6 @@ static int prism2_sta_proc_show(struct seq_file *m, void *v)
return 0;
}
-static int prism2_sta_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, prism2_sta_proc_show, PDE_DATA(inode));
-}
-
-static const struct file_operations prism2_sta_proc_fops = {
- .open = prism2_sta_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
static void handle_add_proc_queue(struct work_struct *work)
{
struct ap_data *ap = container_of(work, struct ap_data,
@@ -1138,9 +1080,9 @@ static void handle_add_proc_queue(struct work_struct *work)
if (sta) {
sprintf(name, "%pM", sta->addr);
- sta->proc = proc_create_data(
+ sta->proc = proc_create_single_data(
name, 0, ap->proc,
- &prism2_sta_proc_fops, sta);
+ prism2_sta_proc_show, sta);
atomic_dec(&sta->users);
}
diff --git a/drivers/net/wireless/intersil/hostap/hostap_hw.c b/drivers/net/wireless/intersil/hostap/hostap_hw.c
index 5c4a17a18968..2720aa39f530 100644
--- a/drivers/net/wireless/intersil/hostap/hostap_hw.c
+++ b/drivers/net/wireless/intersil/hostap/hostap_hw.c
@@ -2951,19 +2951,6 @@ static int prism2_registers_proc_show(struct seq_file *m, void *v)
return 0;
}
-
-static int prism2_registers_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, prism2_registers_proc_show, PDE_DATA(inode));
-}
-
-static const struct file_operations prism2_registers_proc_fops = {
- .open = prism2_registers_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
#endif /* PRISM2_NO_PROCFS_DEBUG */
@@ -3279,8 +3266,8 @@ static int hostap_hw_ready(struct net_device *dev)
}
hostap_init_proc(local);
#ifndef PRISM2_NO_PROCFS_DEBUG
- proc_create_data("registers", 0, local->proc,
- &prism2_registers_proc_fops, local);
+ proc_create_single_data("registers", 0, local->proc,
+ prism2_registers_proc_show, local);
#endif /* PRISM2_NO_PROCFS_DEBUG */
hostap_init_ap_proc(local);
return 0;
diff --git a/drivers/net/wireless/intersil/hostap/hostap_proc.c b/drivers/net/wireless/intersil/hostap/hostap_proc.c
index d234231bf532..5b33ccab9188 100644
--- a/drivers/net/wireless/intersil/hostap/hostap_proc.c
+++ b/drivers/net/wireless/intersil/hostap/hostap_proc.c
@@ -43,18 +43,6 @@ static int prism2_debug_proc_show(struct seq_file *m, void *v)
return 0;
}
-
-static int prism2_debug_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, prism2_debug_proc_show, PDE_DATA(inode));
-}
-
-static const struct file_operations prism2_debug_proc_fops = {
- .open = prism2_debug_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
#endif /* PRISM2_NO_PROCFS_DEBUG */
@@ -95,19 +83,6 @@ static int prism2_stats_proc_show(struct seq_file *m, void *v)
return 0;
}
-static int prism2_stats_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, prism2_stats_proc_show, PDE_DATA(inode));
-}
-
-static const struct file_operations prism2_stats_proc_fops = {
- .open = prism2_stats_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
-
static int prism2_wds_proc_show(struct seq_file *m, void *v)
{
struct list_head *ptr = v;
@@ -122,20 +97,20 @@ static int prism2_wds_proc_show(struct seq_file *m, void *v)
static void *prism2_wds_proc_start(struct seq_file *m, loff_t *_pos)
{
- local_info_t *local = m->private;
+ local_info_t *local = PDE_DATA(file_inode(m->file));
read_lock_bh(&local->iface_lock);
return seq_list_start(&local->hostap_interfaces, *_pos);
}
static void *prism2_wds_proc_next(struct seq_file *m, void *v, loff_t *_pos)
{
- local_info_t *local = m->private;
+ local_info_t *local = PDE_DATA(file_inode(m->file));
return seq_list_next(v, &local->hostap_interfaces, _pos);
}
static void prism2_wds_proc_stop(struct seq_file *m, void *v)
{
- local_info_t *local = m->private;
+ local_info_t *local = PDE_DATA(file_inode(m->file));
read_unlock_bh(&local->iface_lock);
}
@@ -146,27 +121,9 @@ static const struct seq_operations prism2_wds_proc_seqops = {
.show = prism2_wds_proc_show,
};
-static int prism2_wds_proc_open(struct inode *inode, struct file *file)
-{
- int ret = seq_open(file, &prism2_wds_proc_seqops);
- if (ret == 0) {
- struct seq_file *m = file->private_data;
- m->private = PDE_DATA(inode);
- }
- return ret;
-}
-
-static const struct file_operations prism2_wds_proc_fops = {
- .open = prism2_wds_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
-
static int prism2_bss_list_proc_show(struct seq_file *m, void *v)
{
- local_info_t *local = m->private;
+ local_info_t *local = PDE_DATA(file_inode(m->file));
struct list_head *ptr = v;
struct hostap_bss_info *bss;
@@ -193,20 +150,20 @@ static int prism2_bss_list_proc_show(struct seq_file *m, void *v)
static void *prism2_bss_list_proc_start(struct seq_file *m, loff_t *_pos)
{
- local_info_t *local = m->private;
+ local_info_t *local = PDE_DATA(file_inode(m->file));
spin_lock_bh(&local->lock);
return seq_list_start_head(&local->bss_list, *_pos);
}
static void *prism2_bss_list_proc_next(struct seq_file *m, void *v, loff_t *_pos)
{
- local_info_t *local = m->private;
+ local_info_t *local = PDE_DATA(file_inode(m->file));
return seq_list_next(v, &local->bss_list, _pos);
}
static void prism2_bss_list_proc_stop(struct seq_file *m, void *v)
{
- local_info_t *local = m->private;
+ local_info_t *local = PDE_DATA(file_inode(m->file));
spin_unlock_bh(&local->lock);
}
@@ -217,24 +174,6 @@ static const struct seq_operations prism2_bss_list_proc_seqops = {
.show = prism2_bss_list_proc_show,
};
-static int prism2_bss_list_proc_open(struct inode *inode, struct file *file)
-{
- int ret = seq_open(file, &prism2_bss_list_proc_seqops);
- if (ret == 0) {
- struct seq_file *m = file->private_data;
- m->private = PDE_DATA(inode);
- }
- return ret;
-}
-
-static const struct file_operations prism2_bss_list_proc_fops = {
- .open = prism2_bss_list_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
-
static int prism2_crypt_proc_show(struct seq_file *m, void *v)
{
local_info_t *local = m->private;
@@ -252,19 +191,6 @@ static int prism2_crypt_proc_show(struct seq_file *m, void *v)
return 0;
}
-static int prism2_crypt_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, prism2_crypt_proc_show, PDE_DATA(inode));
-}
-
-static const struct file_operations prism2_crypt_proc_fops = {
- .open = prism2_crypt_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
-
static ssize_t prism2_pda_proc_read(struct file *file, char __user *buf,
size_t count, loff_t *_pos)
{
@@ -342,7 +268,7 @@ static int prism2_io_debug_proc_read(char *page, char **start, off_t off,
#ifndef PRISM2_NO_STATION_MODES
static int prism2_scan_results_proc_show(struct seq_file *m, void *v)
{
- local_info_t *local = m->private;
+ local_info_t *local = PDE_DATA(file_inode(m->file));
unsigned long entry;
int i, len;
struct hfa384x_hostscan_result *scanres;
@@ -392,7 +318,7 @@ static int prism2_scan_results_proc_show(struct seq_file *m, void *v)
static void *prism2_scan_results_proc_start(struct seq_file *m, loff_t *_pos)
{
- local_info_t *local = m->private;
+ local_info_t *local = PDE_DATA(file_inode(m->file));
spin_lock_bh(&local->lock);
/* We have a header (pos 0) + N results to show (pos 1...N) */
@@ -403,7 +329,7 @@ static void *prism2_scan_results_proc_start(struct seq_file *m, loff_t *_pos)
static void *prism2_scan_results_proc_next(struct seq_file *m, void *v, loff_t *_pos)
{
- local_info_t *local = m->private;
+ local_info_t *local = PDE_DATA(file_inode(m->file));
++*_pos;
if (*_pos > local->last_scan_results_count)
@@ -413,7 +339,7 @@ static void *prism2_scan_results_proc_next(struct seq_file *m, void *v, loff_t *
static void prism2_scan_results_proc_stop(struct seq_file *m, void *v)
{
- local_info_t *local = m->private;
+ local_info_t *local = PDE_DATA(file_inode(m->file));
spin_unlock_bh(&local->lock);
}
@@ -423,25 +349,6 @@ static const struct seq_operations prism2_scan_results_proc_seqops = {
.stop = prism2_scan_results_proc_stop,
.show = prism2_scan_results_proc_show,
};
-
-static int prism2_scan_results_proc_open(struct inode *inode, struct file *file)
-{
- int ret = seq_open(file, &prism2_scan_results_proc_seqops);
- if (ret == 0) {
- struct seq_file *m = file->private_data;
- m->private = PDE_DATA(inode);
- }
- return ret;
-}
-
-static const struct file_operations prism2_scan_results_proc_fops = {
- .open = prism2_scan_results_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
-
#endif /* PRISM2_NO_STATION_MODES */
@@ -463,29 +370,29 @@ void hostap_init_proc(local_info_t *local)
}
#ifndef PRISM2_NO_PROCFS_DEBUG
- proc_create_data("debug", 0, local->proc,
- &prism2_debug_proc_fops, local);
+ proc_create_single_data("debug", 0, local->proc,
+ prism2_debug_proc_show, local);
#endif /* PRISM2_NO_PROCFS_DEBUG */
- proc_create_data("stats", 0, local->proc,
- &prism2_stats_proc_fops, local);
- proc_create_data("wds", 0, local->proc,
- &prism2_wds_proc_fops, local);
+ proc_create_single_data("stats", 0, local->proc, prism2_stats_proc_show,
+ local);
+ proc_create_seq_data("wds", 0, local->proc,
+ &prism2_wds_proc_seqops, local);
proc_create_data("pda", 0, local->proc,
&prism2_pda_proc_fops, local);
proc_create_data("aux_dump", 0, local->proc,
local->func->read_aux_fops ?: &prism2_aux_dump_proc_fops,
local);
- proc_create_data("bss_list", 0, local->proc,
- &prism2_bss_list_proc_fops, local);
- proc_create_data("crypt", 0, local->proc,
- &prism2_crypt_proc_fops, local);
+ proc_create_seq_data("bss_list", 0, local->proc,
+ &prism2_bss_list_proc_seqops, local);
+ proc_create_single_data("crypt", 0, local->proc, prism2_crypt_proc_show,
+ local);
#ifdef PRISM2_IO_DEBUG
- proc_create_data("io_debug", 0, local->proc,
- &prism2_io_debug_proc_fops, local);
+ proc_create_single_data("io_debug", 0, local->proc,
+ prism2_debug_proc_show, local);
#endif /* PRISM2_IO_DEBUG */
#ifndef PRISM2_NO_STATION_MODES
- proc_create_data("scan_results", 0, local->proc,
- &prism2_scan_results_proc_fops, local);
+ proc_create_seq_data("scan_results", 0, local->proc,
+ &prism2_scan_results_proc_seqops, local);
#endif /* PRISM2_NO_STATION_MODES */
}
diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c
index 96d26cfae90b..920c23e542a5 100644
--- a/drivers/net/wireless/mac80211_hwsim.c
+++ b/drivers/net/wireless/mac80211_hwsim.c
@@ -3236,6 +3236,7 @@ static int hwsim_new_radio_nl(struct sk_buff *msg, struct genl_info *info)
GENL_SET_ERR_MSG(info,"MAC is no valid source addr");
NL_SET_BAD_ATTR(info->extack,
info->attrs[HWSIM_ATTR_PERM_ADDR]);
+ kfree(hwname);
return -EINVAL;
}
@@ -3339,7 +3340,7 @@ out_err:
static int hwsim_dump_radio_nl(struct sk_buff *skb,
struct netlink_callback *cb)
{
- int last_idx = cb->args[0];
+ int last_idx = cb->args[0] - 1;
struct mac80211_hwsim_data *data = NULL;
int res = 0;
void *hdr;
@@ -3367,7 +3368,7 @@ static int hwsim_dump_radio_nl(struct sk_buff *skb,
last_idx = data->idx;
}
- cb->args[0] = last_idx;
+ cb->args[0] = last_idx + 1;
/* list changed, but no new element sent, set interrupted flag */
if (skb->len == 0 && cb->prev_seq && cb->seq != cb->prev_seq) {
diff --git a/drivers/net/wireless/ralink/rt2x00/rt2x00queue.c b/drivers/net/wireless/ralink/rt2x00/rt2x00queue.c
index a6884e73d2ab..7ddee980048b 100644
--- a/drivers/net/wireless/ralink/rt2x00/rt2x00queue.c
+++ b/drivers/net/wireless/ralink/rt2x00/rt2x00queue.c
@@ -372,16 +372,15 @@ static void rt2x00queue_create_tx_descriptor_ht(struct rt2x00_dev *rt2x00dev,
/*
* Determine IFS values
- * - Use TXOP_BACKOFF for probe and management frames except beacons
+ * - Use TXOP_BACKOFF for management frames except beacons
* - Use TXOP_SIFS for fragment bursts
* - Use TXOP_HTTXOP for everything else
*
* Note: rt2800 devices won't use CTS protection (if used)
* for frames not transmitted with TXOP_HTTXOP
*/
- if ((ieee80211_is_mgmt(hdr->frame_control) &&
- !ieee80211_is_beacon(hdr->frame_control)) ||
- (tx_info->flags & IEEE80211_TX_CTL_RATE_CTRL_PROBE))
+ if (ieee80211_is_mgmt(hdr->frame_control) &&
+ !ieee80211_is_beacon(hdr->frame_control))
txdesc->u.ht.txop = TXOP_BACKOFF;
else if (!(tx_info->flags & IEEE80211_TX_CTL_FIRST_FRAGMENT))
txdesc->u.ht.txop = TXOP_SIFS;
diff --git a/drivers/net/wireless/ray_cs.c b/drivers/net/wireless/ray_cs.c
index 7f9b16b97ea3..a7e0a17aa7e8 100644
--- a/drivers/net/wireless/ray_cs.c
+++ b/drivers/net/wireless/ray_cs.c
@@ -2663,19 +2663,6 @@ static int ray_cs_proc_show(struct seq_file *m, void *v)
}
return 0;
}
-
-static int ray_cs_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, ray_cs_proc_show, NULL);
-}
-
-static const struct file_operations ray_cs_proc_fops = {
- .owner = THIS_MODULE,
- .open = ray_cs_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
#endif
/*===========================================================================*/
static int build_auth_frame(ray_dev_t *local, UCHAR *dest, int auth_type)
@@ -2814,7 +2801,7 @@ static int __init init_ray_cs(void)
#ifdef CONFIG_PROC_FS
proc_mkdir("driver/ray_cs", NULL);
- proc_create("driver/ray_cs/ray_cs", 0, NULL, &ray_cs_proc_fops);
+ proc_create_single("driver/ray_cs/ray_cs", 0, NULL, ray_cs_proc_show);
proc_create("driver/ray_cs/essid", 0200, NULL, &ray_cs_essid_proc_fops);
proc_create_data("driver/ray_cs/net_type", 0200, NULL, &int_proc_fops,
&net_type);
diff --git a/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtcoutsrc.c b/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtcoutsrc.c
index 8b6b07a936f5..b026e80940a4 100644
--- a/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtcoutsrc.c
+++ b/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtcoutsrc.c
@@ -158,16 +158,6 @@ static u8 halbtc_get_wifi_central_chnl(struct btc_coexist *btcoexist)
static u8 rtl_get_hwpg_single_ant_path(struct rtl_priv *rtlpriv)
{
- struct rtl_mod_params *mod_params = rtlpriv->cfg->mod_params;
-
- /* override ant_num / ant_path */
- if (mod_params->ant_sel) {
- rtlpriv->btcoexist.btc_info.ant_num =
- (mod_params->ant_sel == 1 ? ANT_X2 : ANT_X1);
-
- rtlpriv->btcoexist.btc_info.single_ant_path =
- (mod_params->ant_sel == 1 ? 0 : 1);
- }
return rtlpriv->btcoexist.btc_info.single_ant_path;
}
@@ -178,7 +168,6 @@ static u8 rtl_get_hwpg_bt_type(struct rtl_priv *rtlpriv)
static u8 rtl_get_hwpg_ant_num(struct rtl_priv *rtlpriv)
{
- struct rtl_mod_params *mod_params = rtlpriv->cfg->mod_params;
u8 num;
if (rtlpriv->btcoexist.btc_info.ant_num == ANT_X2)
@@ -186,10 +175,6 @@ static u8 rtl_get_hwpg_ant_num(struct rtl_priv *rtlpriv)
else
num = 1;
- /* override ant_num / ant_path */
- if (mod_params->ant_sel)
- num = (mod_params->ant_sel == 1 ? ANT_X2 : ANT_X1) + 1;
-
return num;
}
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/hw.c
index e7bbbc95cdb1..b4f3f91b590e 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/hw.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/hw.c
@@ -848,6 +848,9 @@ static bool _rtl8723be_init_mac(struct ieee80211_hw *hw)
return false;
}
+ if (rtlpriv->cfg->ops->get_btc_status())
+ rtlpriv->btcoexist.btc_ops->btc_power_on_setting(rtlpriv);
+
bytetmp = rtl_read_byte(rtlpriv, REG_MULTI_FUNC_CTRL);
rtl_write_byte(rtlpriv, REG_MULTI_FUNC_CTRL, bytetmp | BIT(3));
@@ -2696,21 +2699,21 @@ void rtl8723be_read_bt_coexist_info_from_hwpg(struct ieee80211_hw *hw,
rtlpriv->btcoexist.btc_info.bt_type = BT_RTL8723B;
rtlpriv->btcoexist.btc_info.ant_num = (value & 0x1);
rtlpriv->btcoexist.btc_info.single_ant_path =
- (value & 0x40); /*0xc3[6]*/
+ (value & 0x40 ? ANT_AUX : ANT_MAIN); /*0xc3[6]*/
} else {
rtlpriv->btcoexist.btc_info.btcoexist = 0;
rtlpriv->btcoexist.btc_info.bt_type = BT_RTL8723B;
rtlpriv->btcoexist.btc_info.ant_num = ANT_X2;
- rtlpriv->btcoexist.btc_info.single_ant_path = 0;
+ rtlpriv->btcoexist.btc_info.single_ant_path = ANT_MAIN;
}
/* override ant_num / ant_path */
if (mod_params->ant_sel) {
rtlpriv->btcoexist.btc_info.ant_num =
- (mod_params->ant_sel == 1 ? ANT_X2 : ANT_X1);
+ (mod_params->ant_sel == 1 ? ANT_X1 : ANT_X2);
rtlpriv->btcoexist.btc_info.single_ant_path =
- (mod_params->ant_sel == 1 ? 0 : 1);
+ (mod_params->ant_sel == 1 ? ANT_AUX : ANT_MAIN);
}
}
diff --git a/drivers/net/wireless/realtek/rtlwifi/wifi.h b/drivers/net/wireless/realtek/rtlwifi/wifi.h
index d27e33960e77..ce1754054a07 100644
--- a/drivers/net/wireless/realtek/rtlwifi/wifi.h
+++ b/drivers/net/wireless/realtek/rtlwifi/wifi.h
@@ -2823,6 +2823,11 @@ enum bt_ant_num {
ANT_X1 = 1,
};
+enum bt_ant_path {
+ ANT_MAIN = 0,
+ ANT_AUX = 1,
+};
+
enum bt_co_type {
BT_2WIRE = 0,
BT_ISSC_3WIRE = 1,
diff --git a/drivers/nubus/proc.c b/drivers/nubus/proc.c
index c2e5a7e6bd3e..88e1f9a0faaf 100644
--- a/drivers/nubus/proc.c
+++ b/drivers/nubus/proc.c
@@ -45,18 +45,6 @@ nubus_devices_proc_show(struct seq_file *m, void *v)
return 0;
}
-static int nubus_devices_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, nubus_devices_proc_show, NULL);
-}
-
-static const struct file_operations nubus_devices_proc_fops = {
- .open = nubus_devices_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
static struct proc_dir_entry *proc_bus_nubus_dir;
/*
@@ -149,18 +137,6 @@ static int nubus_proc_rsrc_show(struct seq_file *m, void *v)
return 0;
}
-static int nubus_proc_rsrc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, nubus_proc_rsrc_show, inode);
-}
-
-static const struct file_operations nubus_proc_rsrc_fops = {
- .open = nubus_proc_rsrc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
void nubus_proc_add_rsrc_mem(struct proc_dir_entry *procdir,
const struct nubus_dirent *ent,
unsigned int size)
@@ -176,8 +152,8 @@ void nubus_proc_add_rsrc_mem(struct proc_dir_entry *procdir,
pde_data = nubus_proc_alloc_pde_data(nubus_dirptr(ent), size);
else
pde_data = NULL;
- proc_create_data(name, S_IFREG | 0444, procdir,
- &nubus_proc_rsrc_fops, pde_data);
+ proc_create_single_data(name, S_IFREG | 0444, procdir,
+ nubus_proc_rsrc_show, pde_data);
}
void nubus_proc_add_rsrc(struct proc_dir_entry *procdir,
@@ -190,32 +166,21 @@ void nubus_proc_add_rsrc(struct proc_dir_entry *procdir,
return;
snprintf(name, sizeof(name), "%x", ent->type);
- proc_create_data(name, S_IFREG | 0444, procdir,
- &nubus_proc_rsrc_fops,
- nubus_proc_alloc_pde_data(data, 0));
+ proc_create_single_data(name, S_IFREG | 0444, procdir,
+ nubus_proc_rsrc_show,
+ nubus_proc_alloc_pde_data(data, 0));
}
/*
* /proc/nubus stuff
*/
-static int nubus_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, nubus_proc_show, NULL);
-}
-
-static const struct file_operations nubus_proc_fops = {
- .open = nubus_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
void __init nubus_proc_init(void)
{
- proc_create("nubus", 0, NULL, &nubus_proc_fops);
+ proc_create_single("nubus", 0, NULL, nubus_proc_show);
proc_bus_nubus_dir = proc_mkdir("bus/nubus", NULL);
if (!proc_bus_nubus_dir)
return;
- proc_create("devices", 0, proc_bus_nubus_dir, &nubus_devices_proc_fops);
+ proc_create_single("devices", 0, proc_bus_nubus_dir,
+ nubus_devices_proc_show);
}
diff --git a/drivers/nvdimm/claim.c b/drivers/nvdimm/claim.c
index 30852270484f..2e96b34bc936 100644
--- a/drivers/nvdimm/claim.c
+++ b/drivers/nvdimm/claim.c
@@ -276,7 +276,8 @@ static int nsio_rw_bytes(struct nd_namespace_common *ndns,
if (rw == READ) {
if (unlikely(is_bad_pmem(&nsio->bb, sector, sz_align)))
return -EIO;
- return memcpy_mcsafe(buf, nsio->addr + offset, size);
+ if (memcpy_mcsafe(buf, nsio->addr + offset, size) != 0)
+ return -EIO;
}
if (unlikely(is_bad_pmem(&nsio->bb, sector, sz_align))) {
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index 9d714926ecf5..e023d6aa22b5 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -101,15 +101,15 @@ static blk_status_t read_pmem(struct page *page, unsigned int off,
void *pmem_addr, unsigned int len)
{
unsigned int chunk;
- int rc;
+ unsigned long rem;
void *mem;
while (len) {
mem = kmap_atomic(page);
chunk = min_t(unsigned int, len, PAGE_SIZE);
- rc = memcpy_mcsafe(mem + off, pmem_addr, chunk);
+ rem = memcpy_mcsafe(mem + off, pmem_addr, chunk);
kunmap_atomic(mem);
- if (rc)
+ if (rem)
return BLK_STS_IOERR;
len -= chunk;
off = 0;
diff --git a/drivers/nvme/host/Kconfig b/drivers/nvme/host/Kconfig
index b979cf3bce65..88a8b5916624 100644
--- a/drivers/nvme/host/Kconfig
+++ b/drivers/nvme/host/Kconfig
@@ -27,7 +27,7 @@ config NVME_FABRICS
config NVME_RDMA
tristate "NVM Express over Fabrics RDMA host driver"
- depends on INFINIBAND && BLOCK
+ depends on INFINIBAND && INFINIBAND_ADDR_TRANS && BLOCK
select NVME_CORE
select NVME_FABRICS
select SG_POOL
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 9df4f71e58ca..c8b30067b6ae 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -99,6 +99,16 @@ static struct class *nvme_subsys_class;
static void nvme_ns_remove(struct nvme_ns *ns);
static int nvme_revalidate_disk(struct gendisk *disk);
+static void nvme_put_subsystem(struct nvme_subsystem *subsys);
+
+static void nvme_queue_scan(struct nvme_ctrl *ctrl)
+{
+ /*
+ * Only new queue scan work when admin and IO queues are both alive
+ */
+ if (ctrl->state == NVME_CTRL_LIVE)
+ queue_work(nvme_wq, &ctrl->scan_work);
+}
int nvme_reset_ctrl(struct nvme_ctrl *ctrl)
{
@@ -117,7 +127,8 @@ int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl)
ret = nvme_reset_ctrl(ctrl);
if (!ret) {
flush_work(&ctrl->reset_work);
- if (ctrl->state != NVME_CTRL_LIVE)
+ if (ctrl->state != NVME_CTRL_LIVE &&
+ ctrl->state != NVME_CTRL_ADMIN_ONLY)
ret = -ENETRESET;
}
@@ -242,9 +253,6 @@ EXPORT_SYMBOL_GPL(nvme_complete_rq);
void nvme_cancel_request(struct request *req, void *data, bool reserved)
{
- if (!blk_mq_request_started(req))
- return;
-
dev_dbg_ratelimited(((struct nvme_ctrl *) data)->device,
"Cancelling I/O %d", req->tag);
@@ -349,7 +357,8 @@ static void nvme_free_ns_head(struct kref *ref)
nvme_mpath_remove_disk(head);
ida_simple_remove(&head->subsys->ns_ida, head->instance);
list_del_init(&head->entry);
- cleanup_srcu_struct(&head->srcu);
+ cleanup_srcu_struct_quiesced(&head->srcu);
+ nvme_put_subsystem(head->subsys);
kfree(head);
}
@@ -764,6 +773,7 @@ static int nvme_submit_user_cmd(struct request_queue *q,
ret = PTR_ERR(meta);
goto out_unmap;
}
+ req->cmd_flags |= REQ_INTEGRITY;
}
}
@@ -1029,6 +1039,21 @@ int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count)
}
EXPORT_SYMBOL_GPL(nvme_set_queue_count);
+#define NVME_AEN_SUPPORTED \
+ (NVME_AEN_CFG_NS_ATTR | NVME_AEN_CFG_FW_ACT)
+
+static void nvme_enable_aen(struct nvme_ctrl *ctrl)
+{
+ u32 result;
+ int status;
+
+ status = nvme_set_features(ctrl, NVME_FEAT_ASYNC_EVENT,
+ ctrl->oaes & NVME_AEN_SUPPORTED, NULL, 0, &result);
+ if (status)
+ dev_warn(ctrl->device, "Failed to configure AEN (cfg %x)\n",
+ ctrl->oaes & NVME_AEN_SUPPORTED);
+}
+
static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
{
struct nvme_user_io io;
@@ -1347,13 +1372,19 @@ static void nvme_set_chunk_size(struct nvme_ns *ns)
blk_queue_chunk_sectors(ns->queue, rounddown_pow_of_two(chunk_size));
}
-static void nvme_config_discard(struct nvme_ctrl *ctrl,
- unsigned stream_alignment, struct request_queue *queue)
+static void nvme_config_discard(struct nvme_ns *ns)
{
+ struct nvme_ctrl *ctrl = ns->ctrl;
+ struct request_queue *queue = ns->queue;
u32 size = queue_logical_block_size(queue);
- if (stream_alignment)
- size *= stream_alignment;
+ if (!(ctrl->oncs & NVME_CTRL_ONCS_DSM)) {
+ blk_queue_flag_clear(QUEUE_FLAG_DISCARD, queue);
+ return;
+ }
+
+ if (ctrl->nr_streams && ns->sws && ns->sgs)
+ size *= ns->sws * ns->sgs;
BUILD_BUG_ON(PAGE_SIZE / sizeof(struct nvme_dsm_range) <
NVME_DSM_MAX_RANGES);
@@ -1361,9 +1392,12 @@ static void nvme_config_discard(struct nvme_ctrl *ctrl,
queue->limits.discard_alignment = 0;
queue->limits.discard_granularity = size;
+ /* If discard is already enabled, don't reset queue limits */
+ if (blk_queue_flag_test_and_set(QUEUE_FLAG_DISCARD, queue))
+ return;
+
blk_queue_max_discard_sectors(queue, UINT_MAX);
blk_queue_max_discard_segments(queue, NVME_DSM_MAX_RANGES);
- blk_queue_flag_set(QUEUE_FLAG_DISCARD, queue);
if (ctrl->quirks & NVME_QUIRK_DEALLOCATE_ZEROES)
blk_queue_max_write_zeroes_sectors(queue, UINT_MAX);
@@ -1407,10 +1441,6 @@ static void nvme_update_disk_info(struct gendisk *disk,
{
sector_t capacity = le64_to_cpup(&id->nsze) << (ns->lba_shift - 9);
unsigned short bs = 1 << ns->lba_shift;
- unsigned stream_alignment = 0;
-
- if (ns->ctrl->nr_streams && ns->sws && ns->sgs)
- stream_alignment = ns->sws * ns->sgs;
blk_mq_freeze_queue(disk->queue);
blk_integrity_unregister(disk);
@@ -1424,10 +1454,9 @@ static void nvme_update_disk_info(struct gendisk *disk,
nvme_init_integrity(disk, ns->ms, ns->pi_type);
if (ns->ms && !nvme_ns_has_pi(ns) && !blk_get_integrity(disk))
capacity = 0;
- set_capacity(disk, capacity);
- if (ns->ctrl->oncs & NVME_CTRL_ONCS_DSM)
- nvme_config_discard(ns->ctrl, stream_alignment, disk->queue);
+ set_capacity(disk, capacity);
+ nvme_config_discard(ns);
blk_mq_unfreeze_queue(disk->queue);
}
@@ -1443,8 +1472,8 @@ static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id)
if (ns->lba_shift == 0)
ns->lba_shift = 9;
ns->noiob = le16_to_cpu(id->noiob);
- ns->ext = ns->ms && (id->flbas & NVME_NS_FLBAS_META_EXT);
ns->ms = le16_to_cpu(id->lbaf[id->flbas & NVME_NS_FLBAS_LBA_MASK].ms);
+ ns->ext = ns->ms && (id->flbas & NVME_NS_FLBAS_META_EXT);
/* the PI implementation requires metadata equal t10 pi tuple size */
if (ns->ms == sizeof(struct t10_pi_tuple))
ns->pi_type = id->dps & NVME_NS_DPS_PI_MASK;
@@ -1573,7 +1602,7 @@ static int nvme_pr_reserve(struct block_device *bdev, u64 key,
static int nvme_pr_preempt(struct block_device *bdev, u64 old, u64 new,
enum pr_type type, bool abort)
{
- u32 cdw10 = nvme_pr_type(type) << 8 | abort ? 2 : 1;
+ u32 cdw10 = nvme_pr_type(type) << 8 | (abort ? 2 : 1);
return nvme_pr_command(bdev, cdw10, old, new, nvme_cmd_resv_acquire);
}
@@ -1585,7 +1614,7 @@ static int nvme_pr_clear(struct block_device *bdev, u64 key)
static int nvme_pr_release(struct block_device *bdev, u64 key, enum pr_type type)
{
- u32 cdw10 = nvme_pr_type(type) << 8 | key ? 1 << 3 : 0;
+ u32 cdw10 = nvme_pr_type(type) << 8 | (key ? 1 << 3 : 0);
return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_release);
}
@@ -2179,7 +2208,8 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
* Verify that the subsystem actually supports multiple
* controllers, else bail out.
*/
- if (nvme_active_ctrls(found) && !(id->cmic & (1 << 1))) {
+ if (!ctrl->opts->discovery_nqn &&
+ nvme_active_ctrls(found) && !(id->cmic & (1 << 1))) {
dev_err(ctrl->device,
"ignoring ctrl due to duplicate subnqn (%s).\n",
found->subnqn);
@@ -2310,7 +2340,7 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
if (id->lpa & NVME_CTRL_LPA_CMD_EFFECTS_LOG) {
ret = nvme_get_effects_log(ctrl);
if (ret < 0)
- return ret;
+ goto out_free;
}
if (!ctrl->identified) {
@@ -2341,6 +2371,7 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
ctrl->oacs = le16_to_cpu(id->oacs);
ctrl->oncs = le16_to_cpup(&id->oncs);
+ ctrl->oaes = le32_to_cpu(id->oaes);
atomic_set(&ctrl->abort_limit, id->acl + 1);
ctrl->vwc = id->vwc;
ctrl->cntlid = le16_to_cpup(&id->cntlid);
@@ -2860,6 +2891,9 @@ static struct nvme_ns_head *nvme_alloc_ns_head(struct nvme_ctrl *ctrl,
goto out_cleanup_srcu;
list_add_tail(&head->entry, &ctrl->subsys->nsheads);
+
+ kref_get(&ctrl->subsys->ref);
+
return head;
out_cleanup_srcu:
cleanup_srcu_struct(&head->srcu);
@@ -2997,31 +3031,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
if (nvme_init_ns_head(ns, nsid, id))
goto out_free_id;
nvme_setup_streams_ns(ctrl, ns);
-
-#ifdef CONFIG_NVME_MULTIPATH
- /*
- * If multipathing is enabled we need to always use the subsystem
- * instance number for numbering our devices to avoid conflicts
- * between subsystems that have multiple controllers and thus use
- * the multipath-aware subsystem node and those that have a single
- * controller and use the controller node directly.
- */
- if (ns->head->disk) {
- sprintf(disk_name, "nvme%dc%dn%d", ctrl->subsys->instance,
- ctrl->cntlid, ns->head->instance);
- flags = GENHD_FL_HIDDEN;
- } else {
- sprintf(disk_name, "nvme%dn%d", ctrl->subsys->instance,
- ns->head->instance);
- }
-#else
- /*
- * But without the multipath code enabled, multiple controller per
- * subsystems are visible as devices and thus we cannot use the
- * subsystem instance.
- */
- sprintf(disk_name, "nvme%dn%d", ctrl->instance, ns->head->instance);
-#endif
+ nvme_set_disk_name(disk_name, ns, ctrl, &flags);
if ((ctrl->quirks & NVME_QUIRK_LIGHTNVM) && id->vs[0] == 0x1) {
if (nvme_nvm_register(ns, disk_name, node)) {
@@ -3187,6 +3197,42 @@ static void nvme_scan_ns_sequential(struct nvme_ctrl *ctrl, unsigned nn)
nvme_remove_invalid_namespaces(ctrl, nn);
}
+static bool nvme_scan_changed_ns_log(struct nvme_ctrl *ctrl)
+{
+ size_t log_size = NVME_MAX_CHANGED_NAMESPACES * sizeof(__le32);
+ __le32 *log;
+ int error, i;
+ bool ret = false;
+
+ log = kzalloc(log_size, GFP_KERNEL);
+ if (!log)
+ return false;
+
+ error = nvme_get_log(ctrl, NVME_LOG_CHANGED_NS, log, log_size);
+ if (error) {
+ dev_warn(ctrl->device,
+ "reading changed ns log failed: %d\n", error);
+ goto out_free_log;
+ }
+
+ if (log[0] == cpu_to_le32(0xffffffff))
+ goto out_free_log;
+
+ for (i = 0; i < NVME_MAX_CHANGED_NAMESPACES; i++) {
+ u32 nsid = le32_to_cpu(log[i]);
+
+ if (nsid == 0)
+ break;
+ dev_info(ctrl->device, "rescanning namespace %d.\n", nsid);
+ nvme_validate_ns(ctrl, nsid);
+ }
+ ret = true;
+
+out_free_log:
+ kfree(log);
+ return ret;
+}
+
static void nvme_scan_work(struct work_struct *work)
{
struct nvme_ctrl *ctrl =
@@ -3199,6 +3245,12 @@ static void nvme_scan_work(struct work_struct *work)
WARN_ON_ONCE(!ctrl->tagset);
+ if (test_and_clear_bit(EVENT_NS_CHANGED, &ctrl->events)) {
+ if (nvme_scan_changed_ns_log(ctrl))
+ goto out_sort_namespaces;
+ dev_info(ctrl->device, "rescanning namespaces.\n");
+ }
+
if (nvme_identify_ctrl(ctrl, &id))
return;
@@ -3206,25 +3258,16 @@ static void nvme_scan_work(struct work_struct *work)
if (ctrl->vs >= NVME_VS(1, 1, 0) &&
!(ctrl->quirks & NVME_QUIRK_IDENTIFY_CNS)) {
if (!nvme_scan_ns_list(ctrl, nn))
- goto done;
+ goto out_free_id;
}
nvme_scan_ns_sequential(ctrl, nn);
- done:
+out_free_id:
+ kfree(id);
+out_sort_namespaces:
down_write(&ctrl->namespaces_rwsem);
list_sort(NULL, &ctrl->namespaces, ns_cmp);
up_write(&ctrl->namespaces_rwsem);
- kfree(id);
-}
-
-void nvme_queue_scan(struct nvme_ctrl *ctrl)
-{
- /*
- * Only new queue scan work when admin and IO queues are both alive
- */
- if (ctrl->state == NVME_CTRL_LIVE)
- queue_work(nvme_wq, &ctrl->scan_work);
}
-EXPORT_SYMBOL_GPL(nvme_queue_scan);
/*
* This function iterates the namespace list unlocked to allow recovery from
@@ -3339,8 +3382,23 @@ static void nvme_fw_act_work(struct work_struct *work)
nvme_get_fw_slot_info(ctrl);
}
+static void nvme_handle_aen_notice(struct nvme_ctrl *ctrl, u32 result)
+{
+ switch ((result & 0xff00) >> 8) {
+ case NVME_AER_NOTICE_NS_CHANGED:
+ set_bit(EVENT_NS_CHANGED, &ctrl->events);
+ nvme_queue_scan(ctrl);
+ break;
+ case NVME_AER_NOTICE_FW_ACT_STARTING:
+ queue_work(nvme_wq, &ctrl->fw_act_work);
+ break;
+ default:
+ dev_warn(ctrl->device, "async event result %08x\n", result);
+ }
+}
+
void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status,
- union nvme_result *res)
+ volatile union nvme_result *res)
{
u32 result = le32_to_cpu(res->u32);
@@ -3348,6 +3406,9 @@ void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status,
return;
switch (result & 0x7) {
+ case NVME_AER_NOTICE:
+ nvme_handle_aen_notice(ctrl, result);
+ break;
case NVME_AER_ERROR:
case NVME_AER_SMART:
case NVME_AER_CSS:
@@ -3357,18 +3418,6 @@ void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status,
default:
break;
}
-
- switch (result & 0xff07) {
- case NVME_AER_NOTICE_NS_CHANGED:
- dev_info(ctrl->device, "rescanning\n");
- nvme_queue_scan(ctrl);
- break;
- case NVME_AER_NOTICE_FW_ACT_STARTING:
- queue_work(nvme_wq, &ctrl->fw_act_work);
- break;
- default:
- dev_warn(ctrl->device, "async event result %08x\n", result);
- }
queue_work(nvme_wq, &ctrl->async_event_work);
}
EXPORT_SYMBOL_GPL(nvme_complete_async_event);
@@ -3391,6 +3440,7 @@ void nvme_start_ctrl(struct nvme_ctrl *ctrl)
if (ctrl->queue_count > 1) {
nvme_queue_scan(ctrl);
+ nvme_enable_aen(ctrl);
queue_work(nvme_wq, &ctrl->async_event_work);
nvme_start_queues(ctrl);
}
diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c
index 124c458806df..5f5f7067c41d 100644
--- a/drivers/nvme/host/fabrics.c
+++ b/drivers/nvme/host/fabrics.c
@@ -57,7 +57,7 @@ static struct nvmf_host *nvmf_host_add(const char *hostnqn)
goto out_unlock;
kref_init(&host->ref);
- memcpy(host->nqn, hostnqn, NVMF_NQN_SIZE);
+ strlcpy(host->nqn, hostnqn, NVMF_NQN_SIZE);
list_add_tail(&host->list, &nvmf_hosts);
out_unlock:
@@ -545,71 +545,54 @@ blk_status_t nvmf_check_if_ready(struct nvme_ctrl *ctrl, struct request *rq,
return BLK_STS_OK;
switch (ctrl->state) {
- case NVME_CTRL_DELETING:
- goto reject_io;
-
case NVME_CTRL_NEW:
case NVME_CTRL_CONNECTING:
+ case NVME_CTRL_DELETING:
+ /*
+ * This is the case of starting a new or deleting an association
+ * but connectivity was lost before it was fully created or torn
+ * down. We need to error the commands used to initialize the
+ * controller so the reconnect can go into a retry attempt. The
+ * commands should all be marked REQ_FAILFAST_DRIVER, which will
+ * hit the reject path below. Anything else will be queued while
+ * the state settles.
+ */
if (!is_connected)
- /*
- * This is the case of starting a new
- * association but connectivity was lost
- * before it was fully created. We need to
- * error the commands used to initialize the
- * controller so the reconnect can go into a
- * retry attempt. The commands should all be
- * marked REQ_FAILFAST_DRIVER, which will hit
- * the reject path below. Anything else will
- * be queued while the state settles.
- */
- goto reject_or_queue_io;
-
- if ((queue_live &&
- !(nvme_req(rq)->flags & NVME_REQ_USERCMD)) ||
- (!queue_live && blk_rq_is_passthrough(rq) &&
- cmd->common.opcode == nvme_fabrics_command &&
- cmd->fabrics.fctype == nvme_fabrics_type_connect))
- /*
- * If queue is live, allow only commands that
- * are internally generated pass through. These
- * are commands on the admin queue to initialize
- * the controller. This will reject any ioctl
- * admin cmds received while initializing.
- *
- * If the queue is not live, allow only a
- * connect command. This will reject any ioctl
- * admin cmd as well as initialization commands
- * if the controller reverted the queue to non-live.
- */
+ break;
+
+ /*
+ * If queue is live, allow only commands that are internally
+ * generated pass through. These are commands on the admin
+ * queue to initialize the controller. This will reject any
+ * ioctl admin cmds received while initializing.
+ */
+ if (queue_live && !(nvme_req(rq)->flags & NVME_REQ_USERCMD))
return BLK_STS_OK;
/*
- * fall-thru to the reject_or_queue_io clause
+ * If the queue is not live, allow only a connect command. This
+ * will reject any ioctl admin cmd as well as initialization
+ * commands if the controller reverted the queue to non-live.
*/
+ if (!queue_live && blk_rq_is_passthrough(rq) &&
+ cmd->common.opcode == nvme_fabrics_command &&
+ cmd->fabrics.fctype == nvme_fabrics_type_connect)
+ return BLK_STS_OK;
break;
-
- /* these cases fall-thru
- * case NVME_CTRL_LIVE:
- * case NVME_CTRL_RESETTING:
- */
default:
break;
}
-reject_or_queue_io:
/*
- * Any other new io is something we're not in a state to send
- * to the device. Default action is to busy it and retry it
- * after the controller state is recovered. However, anything
- * marked for failfast or nvme multipath is immediately failed.
- * Note: commands used to initialize the controller will be
- * marked for failfast.
+ * Any other new io is something we're not in a state to send to the
+ * device. Default action is to busy it and retry it after the
+ * controller state is recovered. However, anything marked for failfast
+ * or nvme multipath is immediately failed. Note: commands used to
+ * initialize the controller will be marked for failfast.
* Note: nvme cli/ioctl commands are marked for failfast.
*/
if (!blk_noretry_request(rq) && !(rq->cmd_flags & REQ_NVME_MPATH))
return BLK_STS_RESOURCE;
-
-reject_io:
nvme_req(rq)->status = NVME_SC_ABORT_REQ;
return BLK_STS_IOERR;
}
@@ -668,6 +651,7 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
ret = -ENOMEM;
goto out;
}
+ kfree(opts->transport);
opts->transport = p;
break;
case NVMF_OPT_NQN:
@@ -676,6 +660,7 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
ret = -ENOMEM;
goto out;
}
+ kfree(opts->subsysnqn);
opts->subsysnqn = p;
nqnlen = strlen(opts->subsysnqn);
if (nqnlen >= NVMF_NQN_SIZE) {
@@ -687,10 +672,6 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
opts->discovery_nqn =
!(strcmp(opts->subsysnqn,
NVME_DISC_SUBSYS_NAME));
- if (opts->discovery_nqn) {
- opts->kato = 0;
- opts->nr_io_queues = 0;
- }
break;
case NVMF_OPT_TRADDR:
p = match_strdup(args);
@@ -698,6 +679,7 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
ret = -ENOMEM;
goto out;
}
+ kfree(opts->traddr);
opts->traddr = p;
break;
case NVMF_OPT_TRSVCID:
@@ -706,6 +688,7 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
ret = -ENOMEM;
goto out;
}
+ kfree(opts->trsvcid);
opts->trsvcid = p;
break;
case NVMF_OPT_QUEUE_SIZE:
@@ -792,6 +775,7 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
ret = -EINVAL;
goto out;
}
+ nvmf_host_put(opts->host);
opts->host = nvmf_host_add(p);
kfree(p);
if (!opts->host) {
@@ -817,6 +801,7 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
ret = -ENOMEM;
goto out;
}
+ kfree(opts->host_traddr);
opts->host_traddr = p;
break;
case NVMF_OPT_HOST_ID:
@@ -845,6 +830,11 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
}
}
+ if (opts->discovery_nqn) {
+ opts->kato = 0;
+ opts->nr_io_queues = 0;
+ opts->duplicate_connect = true;
+ }
if (ctrl_loss_tmo < 0)
opts->max_reconnects = -1;
else
@@ -977,16 +967,6 @@ nvmf_create_ctrl(struct device *dev, const char *buf, size_t count)
goto out_module_put;
}
- if (strcmp(ctrl->subsys->subnqn, opts->subsysnqn)) {
- dev_warn(ctrl->device,
- "controller returned incorrect NQN: \"%s\".\n",
- ctrl->subsys->subnqn);
- module_put(ops->module);
- up_read(&nvmf_transports_rwsem);
- nvme_delete_ctrl_sync(ctrl);
- return ERR_PTR(-EINVAL);
- }
-
module_put(ops->module);
up_read(&nvmf_transports_rwsem);
return ctrl;
diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h
index ef46c915b7b5..0cf0460a5c92 100644
--- a/drivers/nvme/host/fabrics.h
+++ b/drivers/nvme/host/fabrics.h
@@ -139,7 +139,9 @@ static inline bool
nvmf_ctlr_matches_baseopts(struct nvme_ctrl *ctrl,
struct nvmf_ctrl_options *opts)
{
- if (strcmp(opts->subsysnqn, ctrl->opts->subsysnqn) ||
+ if (ctrl->state == NVME_CTRL_DELETING ||
+ ctrl->state == NVME_CTRL_DEAD ||
+ strcmp(opts->subsysnqn, ctrl->opts->subsysnqn) ||
strcmp(opts->host->nqn, ctrl->opts->host->nqn) ||
memcmp(&opts->host->id, &ctrl->opts->host->id, sizeof(uuid_t)))
return false;
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 6cb26bcf6ec0..0bad65803271 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -1686,16 +1686,6 @@ done:
goto check_error;
}
- /*
- * Force failures of commands if we're killing the controller
- * or have an error on a command used to create an new association
- */
- if (status &&
- (blk_queue_dying(rq->q) ||
- ctrl->ctrl.state == NVME_CTRL_NEW ||
- ctrl->ctrl.state == NVME_CTRL_CONNECTING))
- status |= cpu_to_le16(NVME_SC_DNR << 1);
-
__nvme_fc_fcpop_chk_teardowns(ctrl, op, opstate);
nvme_end_request(rq, status, result);
@@ -2403,9 +2393,6 @@ nvme_fc_terminate_exchange(struct request *req, void *data, bool reserved)
struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl);
struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(req);
- if (!blk_mq_request_started(req))
- return;
-
__nvme_fc_abort_op(ctrl, op);
}
@@ -3284,6 +3271,8 @@ nvme_fc_create_ctrl(struct device *dev, struct nvmf_ctrl_options *opts)
}
spin_unlock_irqrestore(&nvme_fc_lock, flags);
+ pr_warn("%s: %s - %s combination not found\n",
+ __func__, opts->traddr, opts->host_traddr);
return ERR_PTR(-ENOENT);
}
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index 956e0b8e9c4d..d7b664ae5923 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -15,10 +15,32 @@
#include "nvme.h"
static bool multipath = true;
-module_param(multipath, bool, 0644);
+module_param(multipath, bool, 0444);
MODULE_PARM_DESC(multipath,
"turn on native support for multiple controllers per subsystem");
+/*
+ * If multipathing is enabled we need to always use the subsystem instance
+ * number for numbering our devices to avoid conflicts between subsystems that
+ * have multiple controllers and thus use the multipath-aware subsystem node
+ * and those that have a single controller and use the controller node
+ * directly.
+ */
+void nvme_set_disk_name(char *disk_name, struct nvme_ns *ns,
+ struct nvme_ctrl *ctrl, int *flags)
+{
+ if (!multipath) {
+ sprintf(disk_name, "nvme%dn%d", ctrl->instance, ns->head->instance);
+ } else if (ns->head->disk) {
+ sprintf(disk_name, "nvme%dc%dn%d", ctrl->subsys->instance,
+ ctrl->cntlid, ns->head->instance);
+ *flags = GENHD_FL_HIDDEN;
+ } else {
+ sprintf(disk_name, "nvme%dn%d", ctrl->subsys->instance,
+ ns->head->instance);
+ }
+}
+
void nvme_failover_req(struct request *req)
{
struct nvme_ns *ns = req->q->queuedata;
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 061fecfd44f5..de24fe77c80b 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -22,6 +22,7 @@
#include <linux/lightnvm.h>
#include <linux/sed-opal.h>
#include <linux/fault-inject.h>
+#include <linux/rcupdate.h>
extern unsigned int nvme_io_timeout;
#define NVME_IO_TIMEOUT (nvme_io_timeout * HZ)
@@ -84,6 +85,11 @@ enum nvme_quirks {
* Supports the LighNVM command set if indicated in vs[1].
*/
NVME_QUIRK_LIGHTNVM = (1 << 6),
+
+ /*
+ * Set MEDIUM priority on SQ creation
+ */
+ NVME_QUIRK_MEDIUM_PRIO_SQ = (1 << 7),
};
/*
@@ -175,6 +181,7 @@ struct nvme_ctrl {
u16 kas;
u8 npss;
u8 apsta;
+ u32 oaes;
u32 aen_result;
unsigned int shutdown_timeout;
unsigned int kato;
@@ -187,6 +194,8 @@ struct nvme_ctrl {
struct delayed_work ka_work;
struct nvme_command ka_cmd;
struct work_struct fw_act_work;
+#define EVENT_NS_CHANGED (1 << 0)
+ unsigned long events;
/* Power saving configuration */
u64 ps_max_latency_us;
@@ -393,14 +402,13 @@ void nvme_stop_ctrl(struct nvme_ctrl *ctrl);
void nvme_put_ctrl(struct nvme_ctrl *ctrl);
int nvme_init_identify(struct nvme_ctrl *ctrl);
-void nvme_queue_scan(struct nvme_ctrl *ctrl);
void nvme_remove_namespaces(struct nvme_ctrl *ctrl);
int nvme_sec_submit(void *data, u16 spsp, u8 secp, void *buffer, size_t len,
bool send);
void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status,
- union nvme_result *res);
+ volatile union nvme_result *res);
void nvme_stop_queues(struct nvme_ctrl *ctrl);
void nvme_start_queues(struct nvme_ctrl *ctrl);
@@ -436,6 +444,8 @@ extern const struct attribute_group nvme_ns_id_attr_group;
extern const struct block_device_operations nvme_ns_head_ops;
#ifdef CONFIG_NVME_MULTIPATH
+void nvme_set_disk_name(char *disk_name, struct nvme_ns *ns,
+ struct nvme_ctrl *ctrl, int *flags);
void nvme_failover_req(struct request *req);
bool nvme_req_needs_failover(struct request *req, blk_status_t error);
void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl);
@@ -447,7 +457,7 @@ static inline void nvme_mpath_clear_current_path(struct nvme_ns *ns)
{
struct nvme_ns_head *head = ns->head;
- if (head && ns == srcu_dereference(head->current_path, &head->srcu))
+ if (head && ns == rcu_access_pointer(head->current_path))
rcu_assign_pointer(head->current_path, NULL);
}
struct nvme_ns *nvme_find_path(struct nvme_ns_head *head);
@@ -461,6 +471,16 @@ static inline void nvme_mpath_check_last_path(struct nvme_ns *ns)
}
#else
+/*
+ * Without the multipath code enabled, multiple controller per subsystems are
+ * visible as devices and thus we cannot use the subsystem instance.
+ */
+static inline void nvme_set_disk_name(char *disk_name, struct nvme_ns *ns,
+ struct nvme_ctrl *ctrl, int *flags)
+{
+ sprintf(disk_name, "nvme%dn%d", ctrl->instance, ns->head->instance);
+}
+
static inline void nvme_failover_req(struct request *req)
{
}
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index fbc71fac6f1e..e526437bacbf 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -13,6 +13,7 @@
*/
#include <linux/aer.h>
+#include <linux/async.h>
#include <linux/blkdev.h>
#include <linux/blk-mq.h>
#include <linux/blk-mq-pci.h>
@@ -68,7 +69,6 @@ MODULE_PARM_DESC(io_queue_depth, "set io queue depth, should >= 2");
struct nvme_dev;
struct nvme_queue;
-static void nvme_process_cq(struct nvme_queue *nvmeq);
static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown);
/*
@@ -147,9 +147,10 @@ static inline struct nvme_dev *to_nvme_dev(struct nvme_ctrl *ctrl)
struct nvme_queue {
struct device *q_dmadev;
struct nvme_dev *dev;
- spinlock_t q_lock;
+ spinlock_t sq_lock;
struct nvme_command *sq_cmds;
struct nvme_command __iomem *sq_cmds_io;
+ spinlock_t cq_lock ____cacheline_aligned_in_smp;
volatile struct nvme_completion *cqes;
struct blk_mq_tags **tags;
dma_addr_t sq_dma_addr;
@@ -159,9 +160,9 @@ struct nvme_queue {
s16 cq_vector;
u16 sq_tail;
u16 cq_head;
+ u16 last_cq_head;
u16 qid;
u8 cq_phase;
- u8 cqe_seen;
u32 *dbbuf_sq_db;
u32 *dbbuf_cq_db;
u32 *dbbuf_sq_ei;
@@ -420,28 +421,25 @@ static int nvme_pci_map_queues(struct blk_mq_tag_set *set)
}
/**
- * __nvme_submit_cmd() - Copy a command into a queue and ring the doorbell
+ * nvme_submit_cmd() - Copy a command into a queue and ring the doorbell
* @nvmeq: The queue to use
* @cmd: The command to send
- *
- * Safe to use from interrupt context
*/
-static void __nvme_submit_cmd(struct nvme_queue *nvmeq,
- struct nvme_command *cmd)
+static void nvme_submit_cmd(struct nvme_queue *nvmeq, struct nvme_command *cmd)
{
- u16 tail = nvmeq->sq_tail;
-
+ spin_lock(&nvmeq->sq_lock);
if (nvmeq->sq_cmds_io)
- memcpy_toio(&nvmeq->sq_cmds_io[tail], cmd, sizeof(*cmd));
+ memcpy_toio(&nvmeq->sq_cmds_io[nvmeq->sq_tail], cmd,
+ sizeof(*cmd));
else
- memcpy(&nvmeq->sq_cmds[tail], cmd, sizeof(*cmd));
+ memcpy(&nvmeq->sq_cmds[nvmeq->sq_tail], cmd, sizeof(*cmd));
- if (++tail == nvmeq->q_depth)
- tail = 0;
- if (nvme_dbbuf_update_and_check_event(tail, nvmeq->dbbuf_sq_db,
- nvmeq->dbbuf_sq_ei))
- writel(tail, nvmeq->q_db);
- nvmeq->sq_tail = tail;
+ if (++nvmeq->sq_tail == nvmeq->q_depth)
+ nvmeq->sq_tail = 0;
+ if (nvme_dbbuf_update_and_check_event(nvmeq->sq_tail,
+ nvmeq->dbbuf_sq_db, nvmeq->dbbuf_sq_ei))
+ writel(nvmeq->sq_tail, nvmeq->q_db);
+ spin_unlock(&nvmeq->sq_lock);
}
static void **nvme_pci_iod_list(struct request *req)
@@ -872,6 +870,13 @@ static blk_status_t nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
struct nvme_command cmnd;
blk_status_t ret;
+ /*
+ * We should not need to do this, but we're still using this to
+ * ensure we can drain requests on a dying queue.
+ */
+ if (unlikely(nvmeq->cq_vector < 0))
+ return BLK_STS_IOERR;
+
ret = nvme_setup_cmd(ns, req, &cmnd);
if (ret)
return ret;
@@ -887,16 +892,7 @@ static blk_status_t nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
}
blk_mq_start_request(req);
-
- spin_lock_irq(&nvmeq->q_lock);
- if (unlikely(nvmeq->cq_vector < 0)) {
- ret = BLK_STS_IOERR;
- spin_unlock_irq(&nvmeq->q_lock);
- goto out_cleanup_iod;
- }
- __nvme_submit_cmd(nvmeq, &cmnd);
- nvme_process_cq(nvmeq);
- spin_unlock_irq(&nvmeq->q_lock);
+ nvme_submit_cmd(nvmeq, &cmnd);
return BLK_STS_OK;
out_cleanup_iod:
nvme_free_iod(dev, req);
@@ -914,10 +910,10 @@ static void nvme_pci_complete_rq(struct request *req)
}
/* We read the CQE phase first to check if the rest of the entry is valid */
-static inline bool nvme_cqe_valid(struct nvme_queue *nvmeq, u16 head,
- u16 phase)
+static inline bool nvme_cqe_pending(struct nvme_queue *nvmeq)
{
- return (le16_to_cpu(nvmeq->cqes[head].status) & 1) == phase;
+ return (le16_to_cpu(nvmeq->cqes[nvmeq->cq_head].status) & 1) ==
+ nvmeq->cq_phase;
}
static inline void nvme_ring_cq_doorbell(struct nvme_queue *nvmeq)
@@ -931,9 +927,9 @@ static inline void nvme_ring_cq_doorbell(struct nvme_queue *nvmeq)
}
}
-static inline void nvme_handle_cqe(struct nvme_queue *nvmeq,
- struct nvme_completion *cqe)
+static inline void nvme_handle_cqe(struct nvme_queue *nvmeq, u16 idx)
{
+ volatile struct nvme_completion *cqe = &nvmeq->cqes[idx];
struct request *req;
if (unlikely(cqe->command_id >= nvmeq->q_depth)) {
@@ -956,83 +952,87 @@ static inline void nvme_handle_cqe(struct nvme_queue *nvmeq,
return;
}
- nvmeq->cqe_seen = 1;
req = blk_mq_tag_to_rq(*nvmeq->tags, cqe->command_id);
nvme_end_request(req, cqe->status, cqe->result);
}
-static inline bool nvme_read_cqe(struct nvme_queue *nvmeq,
- struct nvme_completion *cqe)
+static void nvme_complete_cqes(struct nvme_queue *nvmeq, u16 start, u16 end)
{
- if (nvme_cqe_valid(nvmeq, nvmeq->cq_head, nvmeq->cq_phase)) {
- *cqe = nvmeq->cqes[nvmeq->cq_head];
+ while (start != end) {
+ nvme_handle_cqe(nvmeq, start);
+ if (++start == nvmeq->q_depth)
+ start = 0;
+ }
+}
- if (++nvmeq->cq_head == nvmeq->q_depth) {
- nvmeq->cq_head = 0;
- nvmeq->cq_phase = !nvmeq->cq_phase;
- }
- return true;
+static inline void nvme_update_cq_head(struct nvme_queue *nvmeq)
+{
+ if (++nvmeq->cq_head == nvmeq->q_depth) {
+ nvmeq->cq_head = 0;
+ nvmeq->cq_phase = !nvmeq->cq_phase;
}
- return false;
}
-static void nvme_process_cq(struct nvme_queue *nvmeq)
+static inline bool nvme_process_cq(struct nvme_queue *nvmeq, u16 *start,
+ u16 *end, int tag)
{
- struct nvme_completion cqe;
- int consumed = 0;
+ bool found = false;
- while (nvme_read_cqe(nvmeq, &cqe)) {
- nvme_handle_cqe(nvmeq, &cqe);
- consumed++;
+ *start = nvmeq->cq_head;
+ while (!found && nvme_cqe_pending(nvmeq)) {
+ if (nvmeq->cqes[nvmeq->cq_head].command_id == tag)
+ found = true;
+ nvme_update_cq_head(nvmeq);
}
+ *end = nvmeq->cq_head;
- if (consumed)
+ if (*start != *end)
nvme_ring_cq_doorbell(nvmeq);
+ return found;
}
static irqreturn_t nvme_irq(int irq, void *data)
{
- irqreturn_t result;
struct nvme_queue *nvmeq = data;
- spin_lock(&nvmeq->q_lock);
- nvme_process_cq(nvmeq);
- result = nvmeq->cqe_seen ? IRQ_HANDLED : IRQ_NONE;
- nvmeq->cqe_seen = 0;
- spin_unlock(&nvmeq->q_lock);
- return result;
+ irqreturn_t ret = IRQ_NONE;
+ u16 start, end;
+
+ spin_lock(&nvmeq->cq_lock);
+ if (nvmeq->cq_head != nvmeq->last_cq_head)
+ ret = IRQ_HANDLED;
+ nvme_process_cq(nvmeq, &start, &end, -1);
+ nvmeq->last_cq_head = nvmeq->cq_head;
+ spin_unlock(&nvmeq->cq_lock);
+
+ if (start != end) {
+ nvme_complete_cqes(nvmeq, start, end);
+ return IRQ_HANDLED;
+ }
+
+ return ret;
}
static irqreturn_t nvme_irq_check(int irq, void *data)
{
struct nvme_queue *nvmeq = data;
- if (nvme_cqe_valid(nvmeq, nvmeq->cq_head, nvmeq->cq_phase))
+ if (nvme_cqe_pending(nvmeq))
return IRQ_WAKE_THREAD;
return IRQ_NONE;
}
static int __nvme_poll(struct nvme_queue *nvmeq, unsigned int tag)
{
- struct nvme_completion cqe;
- int found = 0, consumed = 0;
+ u16 start, end;
+ bool found;
- if (!nvme_cqe_valid(nvmeq, nvmeq->cq_head, nvmeq->cq_phase))
+ if (!nvme_cqe_pending(nvmeq))
return 0;
- spin_lock_irq(&nvmeq->q_lock);
- while (nvme_read_cqe(nvmeq, &cqe)) {
- nvme_handle_cqe(nvmeq, &cqe);
- consumed++;
-
- if (tag == cqe.command_id) {
- found = 1;
- break;
- }
- }
-
- if (consumed)
- nvme_ring_cq_doorbell(nvmeq);
- spin_unlock_irq(&nvmeq->q_lock);
+ spin_lock_irq(&nvmeq->cq_lock);
+ found = nvme_process_cq(nvmeq, &start, &end, tag);
+ spin_unlock_irq(&nvmeq->cq_lock);
+ nvme_complete_cqes(nvmeq, start, end);
return found;
}
@@ -1052,10 +1052,7 @@ static void nvme_pci_submit_async_event(struct nvme_ctrl *ctrl)
memset(&c, 0, sizeof(c));
c.common.opcode = nvme_admin_async_event;
c.common.command_id = NVME_AQ_BLK_MQ_DEPTH;
-
- spin_lock_irq(&nvmeq->q_lock);
- __nvme_submit_cmd(nvmeq, &c);
- spin_unlock_irq(&nvmeq->q_lock);
+ nvme_submit_cmd(nvmeq, &c);
}
static int adapter_delete_queue(struct nvme_dev *dev, u8 opcode, u16 id)
@@ -1070,7 +1067,7 @@ static int adapter_delete_queue(struct nvme_dev *dev, u8 opcode, u16 id)
}
static int adapter_alloc_cq(struct nvme_dev *dev, u16 qid,
- struct nvme_queue *nvmeq)
+ struct nvme_queue *nvmeq, s16 vector)
{
struct nvme_command c;
int flags = NVME_QUEUE_PHYS_CONTIG | NVME_CQ_IRQ_ENABLED;
@@ -1085,7 +1082,7 @@ static int adapter_alloc_cq(struct nvme_dev *dev, u16 qid,
c.create_cq.cqid = cpu_to_le16(qid);
c.create_cq.qsize = cpu_to_le16(nvmeq->q_depth - 1);
c.create_cq.cq_flags = cpu_to_le16(flags);
- c.create_cq.irq_vector = cpu_to_le16(nvmeq->cq_vector);
+ c.create_cq.irq_vector = cpu_to_le16(vector);
return nvme_submit_sync_cmd(dev->ctrl.admin_q, &c, NULL, 0);
}
@@ -1093,10 +1090,19 @@ static int adapter_alloc_cq(struct nvme_dev *dev, u16 qid,
static int adapter_alloc_sq(struct nvme_dev *dev, u16 qid,
struct nvme_queue *nvmeq)
{
+ struct nvme_ctrl *ctrl = &dev->ctrl;
struct nvme_command c;
int flags = NVME_QUEUE_PHYS_CONTIG;
/*
+ * Some drives have a bug that auto-enables WRRU if MEDIUM isn't
+ * set. Since URGENT priority is zeroes, it makes all queues
+ * URGENT.
+ */
+ if (ctrl->quirks & NVME_QUIRK_MEDIUM_PRIO_SQ)
+ flags |= NVME_SQ_PRIO_MEDIUM;
+
+ /*
* Note: we (ab)use the fact that the prp fields survive if no data
* is attached to the request.
*/
@@ -1199,7 +1205,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
nvme_warn_reset(dev, csts);
nvme_dev_disable(dev, false);
nvme_reset_ctrl(&dev->ctrl);
- return BLK_EH_HANDLED;
+ return BLK_EH_DONE;
}
/*
@@ -1209,24 +1215,24 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
dev_warn(dev->ctrl.device,
"I/O %d QID %d timeout, completion polled\n",
req->tag, nvmeq->qid);
- return BLK_EH_HANDLED;
+ return BLK_EH_DONE;
}
/*
* Shutdown immediately if controller times out while starting. The
* reset work will see the pci device disabled when it gets the forced
* cancellation error. All outstanding requests are completed on
- * shutdown, so we return BLK_EH_HANDLED.
+ * shutdown, so we return BLK_EH_DONE.
*/
switch (dev->ctrl.state) {
case NVME_CTRL_CONNECTING:
case NVME_CTRL_RESETTING:
- dev_warn(dev->ctrl.device,
+ dev_warn_ratelimited(dev->ctrl.device,
"I/O %d QID %d timeout, disable controller\n",
req->tag, nvmeq->qid);
nvme_dev_disable(dev, false);
nvme_req(req)->flags |= NVME_REQ_CANCELLED;
- return BLK_EH_HANDLED;
+ return BLK_EH_DONE;
default:
break;
}
@@ -1243,12 +1249,8 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
nvme_dev_disable(dev, false);
nvme_reset_ctrl(&dev->ctrl);
- /*
- * Mark the request as handled, since the inline shutdown
- * forces all outstanding requests to complete.
- */
nvme_req(req)->flags |= NVME_REQ_CANCELLED;
- return BLK_EH_HANDLED;
+ return BLK_EH_DONE;
}
if (atomic_dec_return(&dev->ctrl.abort_limit) < 0) {
@@ -1312,15 +1314,21 @@ static int nvme_suspend_queue(struct nvme_queue *nvmeq)
{
int vector;
- spin_lock_irq(&nvmeq->q_lock);
+ spin_lock_irq(&nvmeq->cq_lock);
if (nvmeq->cq_vector == -1) {
- spin_unlock_irq(&nvmeq->q_lock);
+ spin_unlock_irq(&nvmeq->cq_lock);
return 1;
}
vector = nvmeq->cq_vector;
nvmeq->dev->online_queues--;
nvmeq->cq_vector = -1;
- spin_unlock_irq(&nvmeq->q_lock);
+ spin_unlock_irq(&nvmeq->cq_lock);
+
+ /*
+ * Ensure that nvme_queue_rq() sees it ->cq_vector == -1 without
+ * having to grab the lock.
+ */
+ mb();
if (!nvmeq->qid && nvmeq->dev->ctrl.admin_q)
blk_mq_quiesce_queue(nvmeq->dev->ctrl.admin_q);
@@ -1333,15 +1341,18 @@ static int nvme_suspend_queue(struct nvme_queue *nvmeq)
static void nvme_disable_admin_queue(struct nvme_dev *dev, bool shutdown)
{
struct nvme_queue *nvmeq = &dev->queues[0];
+ u16 start, end;
if (shutdown)
nvme_shutdown_ctrl(&dev->ctrl);
else
nvme_disable_ctrl(&dev->ctrl, dev->ctrl.cap);
- spin_lock_irq(&nvmeq->q_lock);
- nvme_process_cq(nvmeq);
- spin_unlock_irq(&nvmeq->q_lock);
+ spin_lock_irq(&nvmeq->cq_lock);
+ nvme_process_cq(nvmeq, &start, &end, -1);
+ spin_unlock_irq(&nvmeq->cq_lock);
+
+ nvme_complete_cqes(nvmeq, start, end);
}
static int nvme_cmb_qdepth(struct nvme_dev *dev, int nr_io_queues,
@@ -1399,7 +1410,8 @@ static int nvme_alloc_queue(struct nvme_dev *dev, int qid, int depth)
nvmeq->q_dmadev = dev->dev;
nvmeq->dev = dev;
- spin_lock_init(&nvmeq->q_lock);
+ spin_lock_init(&nvmeq->sq_lock);
+ spin_lock_init(&nvmeq->cq_lock);
nvmeq->cq_head = 0;
nvmeq->cq_phase = 1;
nvmeq->q_db = &dev->dbs[qid * 2 * dev->db_stride];
@@ -1435,7 +1447,7 @@ static void nvme_init_queue(struct nvme_queue *nvmeq, u16 qid)
{
struct nvme_dev *dev = nvmeq->dev;
- spin_lock_irq(&nvmeq->q_lock);
+ spin_lock_irq(&nvmeq->cq_lock);
nvmeq->sq_tail = 0;
nvmeq->cq_head = 0;
nvmeq->cq_phase = 1;
@@ -1443,13 +1455,14 @@ static void nvme_init_queue(struct nvme_queue *nvmeq, u16 qid)
memset((void *)nvmeq->cqes, 0, CQ_SIZE(nvmeq->q_depth));
nvme_dbbuf_init(dev, nvmeq, qid);
dev->online_queues++;
- spin_unlock_irq(&nvmeq->q_lock);
+ spin_unlock_irq(&nvmeq->cq_lock);
}
static int nvme_create_queue(struct nvme_queue *nvmeq, int qid)
{
struct nvme_dev *dev = nvmeq->dev;
int result;
+ s16 vector;
if (dev->cmb && use_cmb_sqes && (dev->cmbsz & NVME_CMBSZ_SQS)) {
unsigned offset = (qid - 1) * roundup(SQ_SIZE(nvmeq->q_depth),
@@ -1462,15 +1475,21 @@ static int nvme_create_queue(struct nvme_queue *nvmeq, int qid)
* A queue's vector matches the queue identifier unless the controller
* has only one vector available.
*/
- nvmeq->cq_vector = dev->num_vecs == 1 ? 0 : qid;
- result = adapter_alloc_cq(dev, qid, nvmeq);
+ vector = dev->num_vecs == 1 ? 0 : qid;
+ result = adapter_alloc_cq(dev, qid, nvmeq, vector);
if (result < 0)
- goto release_vector;
+ goto out;
result = adapter_alloc_sq(dev, qid, nvmeq);
if (result < 0)
goto release_cq;
+ /*
+ * Set cq_vector after alloc cq/sq, otherwise nvme_suspend_queue will
+ * invoke free_irq for it and cause a 'Trying to free already-free IRQ
+ * xxx' warning if the create CQ/SQ command times out.
+ */
+ nvmeq->cq_vector = vector;
nvme_init_queue(nvmeq, qid);
result = queue_request_irq(nvmeq);
if (result < 0)
@@ -1478,13 +1497,13 @@ static int nvme_create_queue(struct nvme_queue *nvmeq, int qid)
return result;
- release_sq:
+release_sq:
+ nvmeq->cq_vector = -1;
dev->online_queues--;
adapter_delete_sq(dev, qid);
- release_cq:
+release_cq:
adapter_delete_cq(dev, qid);
- release_vector:
- nvmeq->cq_vector = -1;
+out:
return result;
}
@@ -1988,19 +2007,22 @@ static void nvme_del_queue_end(struct request *req, blk_status_t error)
static void nvme_del_cq_end(struct request *req, blk_status_t error)
{
struct nvme_queue *nvmeq = req->end_io_data;
+ u16 start, end;
if (!error) {
unsigned long flags;
/*
- * We might be called with the AQ q_lock held
- * and the I/O queue q_lock should always
+ * We might be called with the AQ cq_lock held
+ * and the I/O queue cq_lock should always
* nest inside the AQ one.
*/
- spin_lock_irqsave_nested(&nvmeq->q_lock, flags,
+ spin_lock_irqsave_nested(&nvmeq->cq_lock, flags,
SINGLE_DEPTH_NESTING);
- nvme_process_cq(nvmeq);
- spin_unlock_irqrestore(&nvmeq->q_lock, flags);
+ nvme_process_cq(nvmeq, &start, &end, -1);
+ spin_unlock_irqrestore(&nvmeq->cq_lock, flags);
+
+ nvme_complete_cqes(nvmeq, start, end);
}
nvme_del_queue_end(req, error);
@@ -2488,6 +2510,15 @@ static unsigned long check_vendor_combination_bug(struct pci_dev *pdev)
return 0;
}
+static void nvme_async_probe(void *data, async_cookie_t cookie)
+{
+ struct nvme_dev *dev = data;
+
+ nvme_reset_ctrl_sync(&dev->ctrl);
+ flush_work(&dev->ctrl.scan_work);
+ nvme_put_ctrl(&dev->ctrl);
+}
+
static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
{
int node, result = -ENOMEM;
@@ -2532,7 +2563,8 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
dev_info(dev->ctrl.device, "pci function %s\n", dev_name(&pdev->dev));
- nvme_reset_ctrl(&dev->ctrl);
+ nvme_get_ctrl(&dev->ctrl);
+ async_schedule(nvme_async_probe, dev);
return 0;
@@ -2676,6 +2708,9 @@ static pci_ers_result_t nvme_slot_reset(struct pci_dev *pdev)
static void nvme_error_resume(struct pci_dev *pdev)
{
+ struct nvme_dev *dev = pci_get_drvdata(pdev);
+
+ flush_work(&dev->ctrl.reset_work);
pci_cleanup_aer_uncorrect_error_status(pdev);
}
@@ -2701,9 +2736,12 @@ static const struct pci_device_id nvme_id_table[] = {
.driver_data = NVME_QUIRK_STRIPE_SIZE |
NVME_QUIRK_DEALLOCATE_ZEROES, },
{ PCI_VDEVICE(INTEL, 0xf1a5), /* Intel 600P/P3100 */
- .driver_data = NVME_QUIRK_NO_DEEPEST_PS },
+ .driver_data = NVME_QUIRK_NO_DEEPEST_PS |
+ NVME_QUIRK_MEDIUM_PRIO_SQ },
{ PCI_VDEVICE(INTEL, 0x5845), /* Qemu emulated controller */
.driver_data = NVME_QUIRK_IDENTIFY_CNS, },
+ { PCI_DEVICE(0x1bb1, 0x0100), /* Seagate Nytro Flash Storage */
+ .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, },
{ PCI_DEVICE(0x1c58, 0x0003), /* HGST adapter */
.driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, },
{ PCI_DEVICE(0x1c58, 0x0023), /* WDC SN200 adapter */
@@ -2718,6 +2756,8 @@ static const struct pci_device_id nvme_id_table[] = {
.driver_data = NVME_QUIRK_LIGHTNVM, },
{ PCI_DEVICE(0x1d1d, 0x2807), /* CNEX WL */
.driver_data = NVME_QUIRK_LIGHTNVM, },
+ { PCI_DEVICE(0x1d1d, 0x2601), /* CNEX Granby */
+ .driver_data = NVME_QUIRK_LIGHTNVM, },
{ PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, 0xffffff) },
{ PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2001) },
{ PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2003) },
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 1eb4438a8763..7b3f08410430 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -778,7 +778,7 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl,
if (error) {
dev_err(ctrl->ctrl.device,
"prop_get NVME_REG_CAP failed\n");
- goto out_cleanup_queue;
+ goto out_stop_queue;
}
ctrl->ctrl.sqsize =
@@ -786,23 +786,25 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl,
error = nvme_enable_ctrl(&ctrl->ctrl, ctrl->ctrl.cap);
if (error)
- goto out_cleanup_queue;
+ goto out_stop_queue;
ctrl->ctrl.max_hw_sectors =
(ctrl->max_fr_pages - 1) << (ilog2(SZ_4K) - 9);
error = nvme_init_identify(&ctrl->ctrl);
if (error)
- goto out_cleanup_queue;
+ goto out_stop_queue;
error = nvme_rdma_alloc_qe(ctrl->queues[0].device->dev,
&ctrl->async_event_sqe, sizeof(struct nvme_command),
DMA_TO_DEVICE);
if (error)
- goto out_cleanup_queue;
+ goto out_stop_queue;
return 0;
+out_stop_queue:
+ nvme_rdma_stop_queue(&ctrl->queues[0]);
out_cleanup_queue:
if (new)
blk_cleanup_queue(ctrl->ctrl.admin_q);
@@ -1598,7 +1600,7 @@ nvme_rdma_timeout(struct request *rq, bool reserved)
/* fail with DNR on cmd timeout */
nvme_req(rq)->status = NVME_SC_ABORT_REQ | NVME_SC_DNR;
- return BLK_EH_HANDLED;
+ return BLK_EH_DONE;
}
static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
diff --git a/drivers/nvme/host/trace.h b/drivers/nvme/host/trace.h
index ea91fccd1bc0..01390f0e1671 100644
--- a/drivers/nvme/host/trace.h
+++ b/drivers/nvme/host/trace.h
@@ -148,8 +148,8 @@ TRACE_EVENT(nvme_complete_rq,
__entry->flags = nvme_req(req)->flags;
__entry->status = nvme_req(req)->status;
),
- TP_printk("cmdid=%u, qid=%d, res=%llu, retries=%u, flags=0x%x, status=%u",
- __entry->cid, __entry->qid, __entry->result,
+ TP_printk("qid=%d, cmdid=%u, res=%llu, retries=%u, flags=0x%x, status=%u",
+ __entry->qid, __entry->cid, __entry->result,
__entry->retries, __entry->flags, __entry->status)
);
diff --git a/drivers/nvme/target/Kconfig b/drivers/nvme/target/Kconfig
index 5f4f8b16685f..3c7b61ddb0d1 100644
--- a/drivers/nvme/target/Kconfig
+++ b/drivers/nvme/target/Kconfig
@@ -27,7 +27,7 @@ config NVME_TARGET_LOOP
config NVME_TARGET_RDMA
tristate "NVMe over Fabrics RDMA target support"
- depends on INFINIBAND
+ depends on INFINIBAND && INFINIBAND_ADDR_TRANS
depends on NVME_TARGET
select SGL_ALLOC
help
diff --git a/drivers/nvme/target/Makefile b/drivers/nvme/target/Makefile
index 488250189c99..8118c93391c6 100644
--- a/drivers/nvme/target/Makefile
+++ b/drivers/nvme/target/Makefile
@@ -6,8 +6,8 @@ obj-$(CONFIG_NVME_TARGET_RDMA) += nvmet-rdma.o
obj-$(CONFIG_NVME_TARGET_FC) += nvmet-fc.o
obj-$(CONFIG_NVME_TARGET_FCLOOP) += nvme-fcloop.o
-nvmet-y += core.o configfs.o admin-cmd.o io-cmd.o fabrics-cmd.o \
- discovery.o
+nvmet-y += core.o configfs.o admin-cmd.o fabrics-cmd.o \
+ discovery.o io-cmd-file.o io-cmd-bdev.o
nvme-loop-y += loop.o
nvmet-rdma-y += rdma.o
nvmet-fc-y += fc.o
diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c
index 5e0e9fcc0d4d..ead8fbe6922e 100644
--- a/drivers/nvme/target/admin-cmd.c
+++ b/drivers/nvme/target/admin-cmd.c
@@ -32,6 +32,11 @@ u32 nvmet_get_log_page_len(struct nvme_command *cmd)
return len;
}
+static void nvmet_execute_get_log_page_noop(struct nvmet_req *req)
+{
+ nvmet_req_complete(req, nvmet_zero_sgl(req, 0, req->data_len));
+}
+
static u16 nvmet_get_smart_log_nsid(struct nvmet_req *req,
struct nvme_smart_log *slog)
{
@@ -45,6 +50,10 @@ static u16 nvmet_get_smart_log_nsid(struct nvmet_req *req,
return NVME_SC_INVALID_NS;
}
+ /* we don't have the right data for file backed ns */
+ if (!ns->bdev)
+ goto out;
+
host_reads = part_stat_read(ns->bdev->bd_part, ios[READ]);
data_units_read = part_stat_read(ns->bdev->bd_part, sectors[READ]);
host_writes = part_stat_read(ns->bdev->bd_part, ios[WRITE]);
@@ -54,6 +63,7 @@ static u16 nvmet_get_smart_log_nsid(struct nvmet_req *req,
put_unaligned_le64(data_units_read, &slog->data_units_read[0]);
put_unaligned_le64(host_writes, &slog->host_writes[0]);
put_unaligned_le64(data_units_written, &slog->data_units_written[0]);
+out:
nvmet_put_namespace(ns);
return NVME_SC_SUCCESS;
@@ -71,6 +81,9 @@ static u16 nvmet_get_smart_log_all(struct nvmet_req *req,
rcu_read_lock();
list_for_each_entry_rcu(ns, &ctrl->subsys->namespaces, dev_link) {
+ /* we don't have the right data for file backed ns */
+ if (!ns->bdev)
+ continue;
host_reads += part_stat_read(ns->bdev->bd_part, ios[READ]);
data_units_read +=
part_stat_read(ns->bdev->bd_part, sectors[READ]);
@@ -89,74 +102,50 @@ static u16 nvmet_get_smart_log_all(struct nvmet_req *req,
return NVME_SC_SUCCESS;
}
-static u16 nvmet_get_smart_log(struct nvmet_req *req,
- struct nvme_smart_log *slog)
+static void nvmet_execute_get_log_page_smart(struct nvmet_req *req)
{
- u16 status;
+ struct nvme_smart_log *log;
+ u16 status = NVME_SC_INTERNAL;
+
+ if (req->data_len != sizeof(*log))
+ goto out;
+
+ log = kzalloc(sizeof(*log), GFP_KERNEL);
+ if (!log)
+ goto out;
- WARN_ON(req == NULL || slog == NULL);
if (req->cmd->get_log_page.nsid == cpu_to_le32(NVME_NSID_ALL))
- status = nvmet_get_smart_log_all(req, slog);
+ status = nvmet_get_smart_log_all(req, log);
else
- status = nvmet_get_smart_log_nsid(req, slog);
- return status;
+ status = nvmet_get_smart_log_nsid(req, log);
+ if (status)
+ goto out;
+
+ status = nvmet_copy_to_sgl(req, 0, log, sizeof(*log));
+out:
+ nvmet_req_complete(req, status);
}
-static void nvmet_execute_get_log_page(struct nvmet_req *req)
+static void nvmet_execute_get_log_changed_ns(struct nvmet_req *req)
{
- struct nvme_smart_log *smart_log;
- size_t data_len = nvmet_get_log_page_len(req->cmd);
- void *buf;
- u16 status = 0;
+ struct nvmet_ctrl *ctrl = req->sq->ctrl;
+ u16 status = NVME_SC_INTERNAL;
+ size_t len;
- buf = kzalloc(data_len, GFP_KERNEL);
- if (!buf) {
- status = NVME_SC_INTERNAL;
+ if (req->data_len != NVME_MAX_CHANGED_NAMESPACES * sizeof(__le32))
goto out;
- }
- switch (req->cmd->get_log_page.lid) {
- case NVME_LOG_ERROR:
- /*
- * We currently never set the More bit in the status field,
- * so all error log entries are invalid and can be zeroed out.
- * This is called a minum viable implementation (TM) of this
- * mandatory log page.
- */
- break;
- case NVME_LOG_SMART:
- /*
- * XXX: fill out actual smart log
- *
- * We might have a hard time coming up with useful values for
- * many of the fields, and even when we have useful data
- * available (e.g. units or commands read/written) those aren't
- * persistent over power loss.
- */
- if (data_len != sizeof(*smart_log)) {
- status = NVME_SC_INTERNAL;
- goto err;
- }
- smart_log = buf;
- status = nvmet_get_smart_log(req, smart_log);
- if (status)
- goto err;
- break;
- case NVME_LOG_FW_SLOT:
- /*
- * We only support a single firmware slot which always is
- * active, so we can zero out the whole firmware slot log and
- * still claim to fully implement this mandatory log page.
- */
- break;
- default:
- BUG();
- }
-
- status = nvmet_copy_to_sgl(req, 0, buf, data_len);
-
-err:
- kfree(buf);
+ mutex_lock(&ctrl->lock);
+ if (ctrl->nr_changed_ns == U32_MAX)
+ len = sizeof(__le32);
+ else
+ len = ctrl->nr_changed_ns * sizeof(__le32);
+ status = nvmet_copy_to_sgl(req, 0, ctrl->changed_ns_list, len);
+ if (!status)
+ status = nvmet_zero_sgl(req, len, req->data_len - len);
+ ctrl->nr_changed_ns = 0;
+ clear_bit(NVME_AEN_CFG_NS_ATTR, &ctrl->aen_masked);
+ mutex_unlock(&ctrl->lock);
out:
nvmet_req_complete(req, status);
}
@@ -201,7 +190,7 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req)
id->ver = cpu_to_le32(ctrl->subsys->ver);
/* XXX: figure out what to do about RTD3R/RTD3 */
- id->oaes = cpu_to_le32(1 << 8);
+ id->oaes = cpu_to_le32(NVMET_AEN_CFG_OPTIONAL);
id->ctratt = cpu_to_le32(1 << 0);
id->oacs = 0;
@@ -447,6 +436,16 @@ static void nvmet_execute_set_features(struct nvmet_req *req)
req->sq->ctrl->kato = DIV_ROUND_UP(val32, 1000);
nvmet_set_result(req, req->sq->ctrl->kato);
break;
+ case NVME_FEAT_ASYNC_EVENT:
+ val32 = le32_to_cpu(req->cmd->common.cdw10[1]);
+ if (val32 & ~NVMET_AEN_CFG_ALL) {
+ status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+ break;
+ }
+
+ WRITE_ONCE(req->sq->ctrl->aen_enabled, val32);
+ nvmet_set_result(req, val32);
+ break;
case NVME_FEAT_HOST_ID:
status = NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR;
break;
@@ -485,9 +484,10 @@ static void nvmet_execute_get_features(struct nvmet_req *req)
break;
case NVME_FEAT_WRITE_ATOMIC:
break;
+#endif
case NVME_FEAT_ASYNC_EVENT:
+ nvmet_set_result(req, READ_ONCE(req->sq->ctrl->aen_enabled));
break;
-#endif
case NVME_FEAT_VOLATILE_WC:
nvmet_set_result(req, 1);
break;
@@ -548,8 +548,6 @@ u16 nvmet_parse_admin_cmd(struct nvmet_req *req)
struct nvme_command *cmd = req->cmd;
u16 ret;
- req->ns = NULL;
-
ret = nvmet_check_ctrl_status(req, cmd);
if (unlikely(ret))
return ret;
@@ -560,9 +558,28 @@ u16 nvmet_parse_admin_cmd(struct nvmet_req *req)
switch (cmd->get_log_page.lid) {
case NVME_LOG_ERROR:
+ /*
+ * We currently never set the More bit in the status
+ * field, so all error log entries are invalid and can
+ * be zeroed out. This is called a minum viable
+ * implementation (TM) of this mandatory log page.
+ */
+ req->execute = nvmet_execute_get_log_page_noop;
+ return 0;
case NVME_LOG_SMART:
+ req->execute = nvmet_execute_get_log_page_smart;
+ return 0;
case NVME_LOG_FW_SLOT:
- req->execute = nvmet_execute_get_log_page;
+ /*
+ * We only support a single firmware slot which always
+ * is active, so we can zero out the whole firmware slot
+ * log and still claim to fully implement this mandatory
+ * log page.
+ */
+ req->execute = nvmet_execute_get_log_page_noop;
+ return 0;
+ case NVME_LOG_CHANGED_NS:
+ req->execute = nvmet_execute_get_log_changed_ns;
return 0;
}
break;
diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index e95424f172fd..a03da764ecae 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -57,6 +57,13 @@ u16 nvmet_copy_from_sgl(struct nvmet_req *req, off_t off, void *buf, size_t len)
return 0;
}
+u16 nvmet_zero_sgl(struct nvmet_req *req, off_t off, size_t len)
+{
+ if (sg_zero_buffer(req->sg, req->sg_cnt, len, off) != len)
+ return NVME_SC_SGL_INVALID_DATA | NVME_SC_DNR;
+ return 0;
+}
+
static unsigned int nvmet_max_nsid(struct nvmet_subsys *subsys)
{
struct nvmet_ns *ns;
@@ -137,6 +144,51 @@ static void nvmet_add_async_event(struct nvmet_ctrl *ctrl, u8 event_type,
schedule_work(&ctrl->async_event_work);
}
+static bool nvmet_aen_disabled(struct nvmet_ctrl *ctrl, u32 aen)
+{
+ if (!(READ_ONCE(ctrl->aen_enabled) & aen))
+ return true;
+ return test_and_set_bit(aen, &ctrl->aen_masked);
+}
+
+static void nvmet_add_to_changed_ns_log(struct nvmet_ctrl *ctrl, __le32 nsid)
+{
+ u32 i;
+
+ mutex_lock(&ctrl->lock);
+ if (ctrl->nr_changed_ns > NVME_MAX_CHANGED_NAMESPACES)
+ goto out_unlock;
+
+ for (i = 0; i < ctrl->nr_changed_ns; i++) {
+ if (ctrl->changed_ns_list[i] == nsid)
+ goto out_unlock;
+ }
+
+ if (ctrl->nr_changed_ns == NVME_MAX_CHANGED_NAMESPACES) {
+ ctrl->changed_ns_list[0] = cpu_to_le32(0xffffffff);
+ ctrl->nr_changed_ns = U32_MAX;
+ goto out_unlock;
+ }
+
+ ctrl->changed_ns_list[ctrl->nr_changed_ns++] = nsid;
+out_unlock:
+ mutex_unlock(&ctrl->lock);
+}
+
+static void nvmet_ns_changed(struct nvmet_subsys *subsys, u32 nsid)
+{
+ struct nvmet_ctrl *ctrl;
+
+ list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) {
+ nvmet_add_to_changed_ns_log(ctrl, cpu_to_le32(nsid));
+ if (nvmet_aen_disabled(ctrl, NVME_AEN_CFG_NS_ATTR))
+ continue;
+ nvmet_add_async_event(ctrl, NVME_AER_TYPE_NOTICE,
+ NVME_AER_NOTICE_NS_CHANGED,
+ NVME_LOG_CHANGED_NS);
+ }
+}
+
int nvmet_register_transport(const struct nvmet_fabrics_ops *ops)
{
int ret = 0;
@@ -271,33 +323,31 @@ void nvmet_put_namespace(struct nvmet_ns *ns)
percpu_ref_put(&ns->ref);
}
+static void nvmet_ns_dev_disable(struct nvmet_ns *ns)
+{
+ nvmet_bdev_ns_disable(ns);
+ nvmet_file_ns_disable(ns);
+}
+
int nvmet_ns_enable(struct nvmet_ns *ns)
{
struct nvmet_subsys *subsys = ns->subsys;
- struct nvmet_ctrl *ctrl;
int ret = 0;
mutex_lock(&subsys->lock);
if (ns->enabled)
goto out_unlock;
- ns->bdev = blkdev_get_by_path(ns->device_path, FMODE_READ | FMODE_WRITE,
- NULL);
- if (IS_ERR(ns->bdev)) {
- pr_err("failed to open block device %s: (%ld)\n",
- ns->device_path, PTR_ERR(ns->bdev));
- ret = PTR_ERR(ns->bdev);
- ns->bdev = NULL;
+ ret = nvmet_bdev_ns_enable(ns);
+ if (ret)
+ ret = nvmet_file_ns_enable(ns);
+ if (ret)
goto out_unlock;
- }
-
- ns->size = i_size_read(ns->bdev->bd_inode);
- ns->blksize_shift = blksize_bits(bdev_logical_block_size(ns->bdev));
ret = percpu_ref_init(&ns->ref, nvmet_destroy_namespace,
0, GFP_KERNEL);
if (ret)
- goto out_blkdev_put;
+ goto out_dev_put;
if (ns->nsid > subsys->max_nsid)
subsys->max_nsid = ns->nsid;
@@ -320,24 +370,20 @@ int nvmet_ns_enable(struct nvmet_ns *ns)
list_add_tail_rcu(&ns->dev_link, &old->dev_link);
}
- list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry)
- nvmet_add_async_event(ctrl, NVME_AER_TYPE_NOTICE, 0, 0);
-
+ nvmet_ns_changed(subsys, ns->nsid);
ns->enabled = true;
ret = 0;
out_unlock:
mutex_unlock(&subsys->lock);
return ret;
-out_blkdev_put:
- blkdev_put(ns->bdev, FMODE_WRITE|FMODE_READ);
- ns->bdev = NULL;
+out_dev_put:
+ nvmet_ns_dev_disable(ns);
goto out_unlock;
}
void nvmet_ns_disable(struct nvmet_ns *ns)
{
struct nvmet_subsys *subsys = ns->subsys;
- struct nvmet_ctrl *ctrl;
mutex_lock(&subsys->lock);
if (!ns->enabled)
@@ -363,11 +409,8 @@ void nvmet_ns_disable(struct nvmet_ns *ns)
percpu_ref_exit(&ns->ref);
mutex_lock(&subsys->lock);
- list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry)
- nvmet_add_async_event(ctrl, NVME_AER_TYPE_NOTICE, 0, 0);
-
- if (ns->bdev)
- blkdev_put(ns->bdev, FMODE_WRITE|FMODE_READ);
+ nvmet_ns_changed(subsys, ns->nsid);
+ nvmet_ns_dev_disable(ns);
out_unlock:
mutex_unlock(&subsys->lock);
}
@@ -499,6 +542,25 @@ int nvmet_sq_init(struct nvmet_sq *sq)
}
EXPORT_SYMBOL_GPL(nvmet_sq_init);
+static u16 nvmet_parse_io_cmd(struct nvmet_req *req)
+{
+ struct nvme_command *cmd = req->cmd;
+ u16 ret;
+
+ ret = nvmet_check_ctrl_status(req, cmd);
+ if (unlikely(ret))
+ return ret;
+
+ req->ns = nvmet_find_namespace(req->sq->ctrl, cmd->rw.nsid);
+ if (unlikely(!req->ns))
+ return NVME_SC_INVALID_NS | NVME_SC_DNR;
+
+ if (req->ns->file)
+ return nvmet_file_parse_io_cmd(req);
+ else
+ return nvmet_bdev_parse_io_cmd(req);
+}
+
bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq,
struct nvmet_sq *sq, const struct nvmet_fabrics_ops *ops)
{
@@ -710,15 +772,14 @@ out:
u16 nvmet_check_ctrl_status(struct nvmet_req *req, struct nvme_command *cmd)
{
if (unlikely(!(req->sq->ctrl->cc & NVME_CC_ENABLE))) {
- pr_err("got io cmd %d while CC.EN == 0 on qid = %d\n",
+ pr_err("got cmd %d while CC.EN == 0 on qid = %d\n",
cmd->common.opcode, req->sq->qid);
return NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR;
}
if (unlikely(!(req->sq->ctrl->csts & NVME_CSTS_RDY))) {
- pr_err("got io cmd %d while CSTS.RDY == 0 on qid = %d\n",
+ pr_err("got cmd %d while CSTS.RDY == 0 on qid = %d\n",
cmd->common.opcode, req->sq->qid);
- req->ns = NULL;
return NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR;
}
return 0;
@@ -809,12 +870,18 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn,
kref_init(&ctrl->ref);
ctrl->subsys = subsys;
+ WRITE_ONCE(ctrl->aen_enabled, NVMET_AEN_CFG_OPTIONAL);
+
+ ctrl->changed_ns_list = kmalloc_array(NVME_MAX_CHANGED_NAMESPACES,
+ sizeof(__le32), GFP_KERNEL);
+ if (!ctrl->changed_ns_list)
+ goto out_free_ctrl;
ctrl->cqs = kcalloc(subsys->max_qid + 1,
sizeof(struct nvmet_cq *),
GFP_KERNEL);
if (!ctrl->cqs)
- goto out_free_ctrl;
+ goto out_free_changed_ns_list;
ctrl->sqs = kcalloc(subsys->max_qid + 1,
sizeof(struct nvmet_sq *),
@@ -872,6 +939,8 @@ out_free_sqs:
kfree(ctrl->sqs);
out_free_cqs:
kfree(ctrl->cqs);
+out_free_changed_ns_list:
+ kfree(ctrl->changed_ns_list);
out_free_ctrl:
kfree(ctrl);
out_put_subsystem:
@@ -898,6 +967,7 @@ static void nvmet_ctrl_free(struct kref *ref)
kfree(ctrl->sqs);
kfree(ctrl->cqs);
+ kfree(ctrl->changed_ns_list);
kfree(ctrl);
nvmet_subsys_put(subsys);
diff --git a/drivers/nvme/target/discovery.c b/drivers/nvme/target/discovery.c
index 231e04e0a496..08656b849bd6 100644
--- a/drivers/nvme/target/discovery.c
+++ b/drivers/nvme/target/discovery.c
@@ -187,8 +187,6 @@ u16 nvmet_parse_discovery_cmd(struct nvmet_req *req)
{
struct nvme_command *cmd = req->cmd;
- req->ns = NULL;
-
if (unlikely(!(req->sq->ctrl->csts & NVME_CSTS_RDY))) {
pr_err("got cmd %d while not ready\n",
cmd->common.opcode);
diff --git a/drivers/nvme/target/fabrics-cmd.c b/drivers/nvme/target/fabrics-cmd.c
index 19e9e42ae943..d84ae004cb85 100644
--- a/drivers/nvme/target/fabrics-cmd.c
+++ b/drivers/nvme/target/fabrics-cmd.c
@@ -77,8 +77,6 @@ u16 nvmet_parse_fabrics_cmd(struct nvmet_req *req)
{
struct nvme_command *cmd = req->cmd;
- req->ns = NULL;
-
switch (cmd->fabrics.fctype) {
case nvme_fabrics_type_property_set:
req->data_len = 0;
@@ -242,8 +240,6 @@ u16 nvmet_parse_connect_cmd(struct nvmet_req *req)
{
struct nvme_command *cmd = req->cmd;
- req->ns = NULL;
-
if (cmd->common.opcode != nvme_fabrics_command) {
pr_err("invalid command 0x%x on unconnected queue.\n",
cmd->fabrics.opcode);
diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c
index 33ee8d3145f8..408279cb6f2c 100644
--- a/drivers/nvme/target/fc.c
+++ b/drivers/nvme/target/fc.c
@@ -31,7 +31,7 @@
/* *************************** Data Structures/Defines ****************** */
-#define NVMET_LS_CTX_COUNT 4
+#define NVMET_LS_CTX_COUNT 256
/* for this implementation, assume small single frame rqst/rsp */
#define NVME_FC_MAX_LS_BUFFER_SIZE 2048
diff --git a/drivers/nvme/target/io-cmd.c b/drivers/nvme/target/io-cmd-bdev.c
index cd2344179673..e0b0f7df70c2 100644
--- a/drivers/nvme/target/io-cmd.c
+++ b/drivers/nvme/target/io-cmd-bdev.c
@@ -16,6 +16,34 @@
#include <linux/module.h>
#include "nvmet.h"
+int nvmet_bdev_ns_enable(struct nvmet_ns *ns)
+{
+ int ret;
+
+ ns->bdev = blkdev_get_by_path(ns->device_path,
+ FMODE_READ | FMODE_WRITE, NULL);
+ if (IS_ERR(ns->bdev)) {
+ ret = PTR_ERR(ns->bdev);
+ if (ret != -ENOTBLK) {
+ pr_err("failed to open block device %s: (%ld)\n",
+ ns->device_path, PTR_ERR(ns->bdev));
+ }
+ ns->bdev = NULL;
+ return ret;
+ }
+ ns->size = i_size_read(ns->bdev->bd_inode);
+ ns->blksize_shift = blksize_bits(bdev_logical_block_size(ns->bdev));
+ return 0;
+}
+
+void nvmet_bdev_ns_disable(struct nvmet_ns *ns)
+{
+ if (ns->bdev) {
+ blkdev_put(ns->bdev, FMODE_WRITE | FMODE_READ);
+ ns->bdev = NULL;
+ }
+}
+
static void nvmet_bio_done(struct bio *bio)
{
struct nvmet_req *req = bio->bi_private;
@@ -23,20 +51,14 @@ static void nvmet_bio_done(struct bio *bio)
nvmet_req_complete(req,
bio->bi_status ? NVME_SC_INTERNAL | NVME_SC_DNR : 0);
- if (bio != &req->inline_bio)
+ if (bio != &req->b.inline_bio)
bio_put(bio);
}
-static inline u32 nvmet_rw_len(struct nvmet_req *req)
-{
- return ((u32)le16_to_cpu(req->cmd->rw.length) + 1) <<
- req->ns->blksize_shift;
-}
-
-static void nvmet_execute_rw(struct nvmet_req *req)
+static void nvmet_bdev_execute_rw(struct nvmet_req *req)
{
int sg_cnt = req->sg_cnt;
- struct bio *bio = &req->inline_bio;
+ struct bio *bio = &req->b.inline_bio;
struct scatterlist *sg;
sector_t sector;
blk_qc_t cookie;
@@ -89,9 +111,9 @@ static void nvmet_execute_rw(struct nvmet_req *req)
blk_poll(bdev_get_queue(req->ns->bdev), cookie);
}
-static void nvmet_execute_flush(struct nvmet_req *req)
+static void nvmet_bdev_execute_flush(struct nvmet_req *req)
{
- struct bio *bio = &req->inline_bio;
+ struct bio *bio = &req->b.inline_bio;
bio_init(bio, req->inline_bvec, ARRAY_SIZE(req->inline_bvec));
bio_set_dev(bio, req->ns->bdev);
@@ -102,7 +124,7 @@ static void nvmet_execute_flush(struct nvmet_req *req)
submit_bio(bio);
}
-static u16 nvmet_discard_range(struct nvmet_ns *ns,
+static u16 nvmet_bdev_discard_range(struct nvmet_ns *ns,
struct nvme_dsm_range *range, struct bio **bio)
{
int ret;
@@ -116,7 +138,7 @@ static u16 nvmet_discard_range(struct nvmet_ns *ns,
return 0;
}
-static void nvmet_execute_discard(struct nvmet_req *req)
+static void nvmet_bdev_execute_discard(struct nvmet_req *req)
{
struct nvme_dsm_range range;
struct bio *bio = NULL;
@@ -129,7 +151,7 @@ static void nvmet_execute_discard(struct nvmet_req *req)
if (status)
break;
- status = nvmet_discard_range(req->ns, &range, &bio);
+ status = nvmet_bdev_discard_range(req->ns, &range, &bio);
if (status)
break;
}
@@ -148,11 +170,11 @@ static void nvmet_execute_discard(struct nvmet_req *req)
}
}
-static void nvmet_execute_dsm(struct nvmet_req *req)
+static void nvmet_bdev_execute_dsm(struct nvmet_req *req)
{
switch (le32_to_cpu(req->cmd->dsm.attributes)) {
case NVME_DSMGMT_AD:
- nvmet_execute_discard(req);
+ nvmet_bdev_execute_discard(req);
return;
case NVME_DSMGMT_IDR:
case NVME_DSMGMT_IDW:
@@ -163,7 +185,7 @@ static void nvmet_execute_dsm(struct nvmet_req *req)
}
}
-static void nvmet_execute_write_zeroes(struct nvmet_req *req)
+static void nvmet_bdev_execute_write_zeroes(struct nvmet_req *req)
{
struct nvme_write_zeroes_cmd *write_zeroes = &req->cmd->write_zeroes;
struct bio *bio = NULL;
@@ -189,38 +211,27 @@ static void nvmet_execute_write_zeroes(struct nvmet_req *req)
}
}
-u16 nvmet_parse_io_cmd(struct nvmet_req *req)
+u16 nvmet_bdev_parse_io_cmd(struct nvmet_req *req)
{
struct nvme_command *cmd = req->cmd;
- u16 ret;
-
- ret = nvmet_check_ctrl_status(req, cmd);
- if (unlikely(ret)) {
- req->ns = NULL;
- return ret;
- }
-
- req->ns = nvmet_find_namespace(req->sq->ctrl, cmd->rw.nsid);
- if (unlikely(!req->ns))
- return NVME_SC_INVALID_NS | NVME_SC_DNR;
switch (cmd->common.opcode) {
case nvme_cmd_read:
case nvme_cmd_write:
- req->execute = nvmet_execute_rw;
+ req->execute = nvmet_bdev_execute_rw;
req->data_len = nvmet_rw_len(req);
return 0;
case nvme_cmd_flush:
- req->execute = nvmet_execute_flush;
+ req->execute = nvmet_bdev_execute_flush;
req->data_len = 0;
return 0;
case nvme_cmd_dsm:
- req->execute = nvmet_execute_dsm;
+ req->execute = nvmet_bdev_execute_dsm;
req->data_len = (le32_to_cpu(cmd->dsm.nr) + 1) *
sizeof(struct nvme_dsm_range);
return 0;
case nvme_cmd_write_zeroes:
- req->execute = nvmet_execute_write_zeroes;
+ req->execute = nvmet_bdev_execute_write_zeroes;
return 0;
default:
pr_err("unhandled cmd %d on qid %d\n", cmd->common.opcode,
diff --git a/drivers/nvme/target/io-cmd-file.c b/drivers/nvme/target/io-cmd-file.c
new file mode 100644
index 000000000000..8c42b3a8c420
--- /dev/null
+++ b/drivers/nvme/target/io-cmd-file.c
@@ -0,0 +1,304 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * NVMe Over Fabrics Target File I/O commands implementation.
+ * Copyright (c) 2017-2018 Western Digital Corporation or its
+ * affiliates.
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/uio.h>
+#include <linux/falloc.h>
+#include <linux/file.h>
+#include "nvmet.h"
+
+#define NVMET_MAX_MPOOL_BVEC 16
+#define NVMET_MIN_MPOOL_OBJ 16
+
+void nvmet_file_ns_disable(struct nvmet_ns *ns)
+{
+ if (ns->file) {
+ mempool_destroy(ns->bvec_pool);
+ ns->bvec_pool = NULL;
+ kmem_cache_destroy(ns->bvec_cache);
+ ns->bvec_cache = NULL;
+ fput(ns->file);
+ ns->file = NULL;
+ }
+}
+
+int nvmet_file_ns_enable(struct nvmet_ns *ns)
+{
+ int ret;
+ struct kstat stat;
+
+ ns->file = filp_open(ns->device_path,
+ O_RDWR | O_LARGEFILE | O_DIRECT, 0);
+ if (IS_ERR(ns->file)) {
+ pr_err("failed to open file %s: (%ld)\n",
+ ns->device_path, PTR_ERR(ns->file));
+ return PTR_ERR(ns->file);
+ }
+
+ ret = vfs_getattr(&ns->file->f_path,
+ &stat, STATX_SIZE, AT_STATX_FORCE_SYNC);
+ if (ret)
+ goto err;
+
+ ns->size = stat.size;
+ ns->blksize_shift = file_inode(ns->file)->i_blkbits;
+
+ ns->bvec_cache = kmem_cache_create("nvmet-bvec",
+ NVMET_MAX_MPOOL_BVEC * sizeof(struct bio_vec),
+ 0, SLAB_HWCACHE_ALIGN, NULL);
+ if (!ns->bvec_cache) {
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ ns->bvec_pool = mempool_create(NVMET_MIN_MPOOL_OBJ, mempool_alloc_slab,
+ mempool_free_slab, ns->bvec_cache);
+
+ if (!ns->bvec_pool) {
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ return ret;
+err:
+ ns->size = 0;
+ ns->blksize_shift = 0;
+ nvmet_file_ns_disable(ns);
+ return ret;
+}
+
+static void nvmet_file_init_bvec(struct bio_vec *bv, struct sg_page_iter *iter)
+{
+ bv->bv_page = sg_page_iter_page(iter);
+ bv->bv_offset = iter->sg->offset;
+ bv->bv_len = PAGE_SIZE - iter->sg->offset;
+}
+
+static ssize_t nvmet_file_submit_bvec(struct nvmet_req *req, loff_t pos,
+ unsigned long nr_segs, size_t count)
+{
+ struct kiocb *iocb = &req->f.iocb;
+ ssize_t (*call_iter)(struct kiocb *iocb, struct iov_iter *iter);
+ struct iov_iter iter;
+ int ki_flags = 0, rw;
+ ssize_t ret;
+
+ if (req->cmd->rw.opcode == nvme_cmd_write) {
+ if (req->cmd->rw.control & cpu_to_le16(NVME_RW_FUA))
+ ki_flags = IOCB_DSYNC;
+ call_iter = req->ns->file->f_op->write_iter;
+ rw = WRITE;
+ } else {
+ call_iter = req->ns->file->f_op->read_iter;
+ rw = READ;
+ }
+
+ iov_iter_bvec(&iter, ITER_BVEC | rw, req->f.bvec, nr_segs, count);
+
+ iocb->ki_pos = pos;
+ iocb->ki_filp = req->ns->file;
+ iocb->ki_flags = IOCB_DIRECT | ki_flags;
+
+ ret = call_iter(iocb, &iter);
+
+ if (ret != -EIOCBQUEUED && iocb->ki_complete)
+ iocb->ki_complete(iocb, ret, 0);
+
+ return ret;
+}
+
+static void nvmet_file_io_done(struct kiocb *iocb, long ret, long ret2)
+{
+ struct nvmet_req *req = container_of(iocb, struct nvmet_req, f.iocb);
+
+ if (req->f.bvec != req->inline_bvec) {
+ if (likely(req->f.mpool_alloc == false))
+ kfree(req->f.bvec);
+ else
+ mempool_free(req->f.bvec, req->ns->bvec_pool);
+ }
+
+ nvmet_req_complete(req, ret != req->data_len ?
+ NVME_SC_INTERNAL | NVME_SC_DNR : 0);
+}
+
+static void nvmet_file_execute_rw(struct nvmet_req *req)
+{
+ ssize_t nr_bvec = DIV_ROUND_UP(req->data_len, PAGE_SIZE);
+ struct sg_page_iter sg_pg_iter;
+ unsigned long bv_cnt = 0;
+ bool is_sync = false;
+ size_t len = 0, total_len = 0;
+ ssize_t ret = 0;
+ loff_t pos;
+
+ if (!req->sg_cnt || !nr_bvec) {
+ nvmet_req_complete(req, 0);
+ return;
+ }
+
+ if (nr_bvec > NVMET_MAX_INLINE_BIOVEC)
+ req->f.bvec = kmalloc_array(nr_bvec, sizeof(struct bio_vec),
+ GFP_KERNEL);
+ else
+ req->f.bvec = req->inline_bvec;
+
+ req->f.mpool_alloc = false;
+ if (unlikely(!req->f.bvec)) {
+ /* fallback under memory pressure */
+ req->f.bvec = mempool_alloc(req->ns->bvec_pool, GFP_KERNEL);
+ req->f.mpool_alloc = true;
+ if (nr_bvec > NVMET_MAX_MPOOL_BVEC)
+ is_sync = true;
+ }
+
+ pos = le64_to_cpu(req->cmd->rw.slba) << req->ns->blksize_shift;
+
+ memset(&req->f.iocb, 0, sizeof(struct kiocb));
+ for_each_sg_page(req->sg, &sg_pg_iter, req->sg_cnt, 0) {
+ nvmet_file_init_bvec(&req->f.bvec[bv_cnt], &sg_pg_iter);
+ len += req->f.bvec[bv_cnt].bv_len;
+ total_len += req->f.bvec[bv_cnt].bv_len;
+ bv_cnt++;
+
+ WARN_ON_ONCE((nr_bvec - 1) < 0);
+
+ if (unlikely(is_sync) &&
+ (nr_bvec - 1 == 0 || bv_cnt == NVMET_MAX_MPOOL_BVEC)) {
+ ret = nvmet_file_submit_bvec(req, pos, bv_cnt, len);
+ if (ret < 0)
+ goto out;
+ pos += len;
+ bv_cnt = 0;
+ len = 0;
+ }
+ nr_bvec--;
+ }
+
+ if (WARN_ON_ONCE(total_len != req->data_len))
+ ret = -EIO;
+out:
+ if (unlikely(is_sync || ret)) {
+ nvmet_file_io_done(&req->f.iocb, ret < 0 ? ret : total_len, 0);
+ return;
+ }
+ req->f.iocb.ki_complete = nvmet_file_io_done;
+ nvmet_file_submit_bvec(req, pos, bv_cnt, total_len);
+}
+
+static void nvmet_file_flush_work(struct work_struct *w)
+{
+ struct nvmet_req *req = container_of(w, struct nvmet_req, f.work);
+ int ret;
+
+ ret = vfs_fsync(req->ns->file, 1);
+
+ nvmet_req_complete(req, ret < 0 ? NVME_SC_INTERNAL | NVME_SC_DNR : 0);
+}
+
+static void nvmet_file_execute_flush(struct nvmet_req *req)
+{
+ INIT_WORK(&req->f.work, nvmet_file_flush_work);
+ schedule_work(&req->f.work);
+}
+
+static void nvmet_file_execute_discard(struct nvmet_req *req)
+{
+ int mode = FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE;
+ struct nvme_dsm_range range;
+ loff_t offset;
+ loff_t len;
+ int i, ret;
+
+ for (i = 0; i <= le32_to_cpu(req->cmd->dsm.nr); i++) {
+ if (nvmet_copy_from_sgl(req, i * sizeof(range), &range,
+ sizeof(range)))
+ break;
+ offset = le64_to_cpu(range.slba) << req->ns->blksize_shift;
+ len = le32_to_cpu(range.nlb) << req->ns->blksize_shift;
+ ret = vfs_fallocate(req->ns->file, mode, offset, len);
+ if (ret)
+ break;
+ }
+
+ nvmet_req_complete(req, ret < 0 ? NVME_SC_INTERNAL | NVME_SC_DNR : 0);
+}
+
+static void nvmet_file_dsm_work(struct work_struct *w)
+{
+ struct nvmet_req *req = container_of(w, struct nvmet_req, f.work);
+
+ switch (le32_to_cpu(req->cmd->dsm.attributes)) {
+ case NVME_DSMGMT_AD:
+ nvmet_file_execute_discard(req);
+ return;
+ case NVME_DSMGMT_IDR:
+ case NVME_DSMGMT_IDW:
+ default:
+ /* Not supported yet */
+ nvmet_req_complete(req, 0);
+ return;
+ }
+}
+
+static void nvmet_file_execute_dsm(struct nvmet_req *req)
+{
+ INIT_WORK(&req->f.work, nvmet_file_dsm_work);
+ schedule_work(&req->f.work);
+}
+
+static void nvmet_file_write_zeroes_work(struct work_struct *w)
+{
+ struct nvmet_req *req = container_of(w, struct nvmet_req, f.work);
+ struct nvme_write_zeroes_cmd *write_zeroes = &req->cmd->write_zeroes;
+ int mode = FALLOC_FL_ZERO_RANGE | FALLOC_FL_KEEP_SIZE;
+ loff_t offset;
+ loff_t len;
+ int ret;
+
+ offset = le64_to_cpu(write_zeroes->slba) << req->ns->blksize_shift;
+ len = (((sector_t)le16_to_cpu(write_zeroes->length) + 1) <<
+ req->ns->blksize_shift);
+
+ ret = vfs_fallocate(req->ns->file, mode, offset, len);
+ nvmet_req_complete(req, ret < 0 ? NVME_SC_INTERNAL | NVME_SC_DNR : 0);
+}
+
+static void nvmet_file_execute_write_zeroes(struct nvmet_req *req)
+{
+ INIT_WORK(&req->f.work, nvmet_file_write_zeroes_work);
+ schedule_work(&req->f.work);
+}
+
+u16 nvmet_file_parse_io_cmd(struct nvmet_req *req)
+{
+ struct nvme_command *cmd = req->cmd;
+
+ switch (cmd->common.opcode) {
+ case nvme_cmd_read:
+ case nvme_cmd_write:
+ req->execute = nvmet_file_execute_rw;
+ req->data_len = nvmet_rw_len(req);
+ return 0;
+ case nvme_cmd_flush:
+ req->execute = nvmet_file_execute_flush;
+ req->data_len = 0;
+ return 0;
+ case nvme_cmd_dsm:
+ req->execute = nvmet_file_execute_dsm;
+ req->data_len = (le32_to_cpu(cmd->dsm.nr) + 1) *
+ sizeof(struct nvme_dsm_range);
+ return 0;
+ case nvme_cmd_write_zeroes:
+ req->execute = nvmet_file_execute_write_zeroes;
+ req->data_len = 0;
+ return 0;
+ default:
+ pr_err("unhandled cmd for file ns %d on qid %d\n",
+ cmd->common.opcode, req->sq->qid);
+ return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
+ }
+}
diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c
index 31fdfba556a8..1304ec3a7ede 100644
--- a/drivers/nvme/target/loop.c
+++ b/drivers/nvme/target/loop.c
@@ -45,6 +45,7 @@ struct nvme_loop_ctrl {
struct nvme_ctrl ctrl;
struct nvmet_ctrl *target_ctrl;
+ struct nvmet_port *port;
};
static inline struct nvme_loop_ctrl *to_loop_ctrl(struct nvme_ctrl *ctrl)
@@ -63,7 +64,8 @@ struct nvme_loop_queue {
unsigned long flags;
};
-static struct nvmet_port *nvmet_loop_port;
+static LIST_HEAD(nvme_loop_ports);
+static DEFINE_MUTEX(nvme_loop_ports_mutex);
static LIST_HEAD(nvme_loop_ctrl_list);
static DEFINE_MUTEX(nvme_loop_ctrl_mutex);
@@ -146,7 +148,7 @@ nvme_loop_timeout(struct request *rq, bool reserved)
/* fail with DNR on admin cmd timeout */
nvme_req(rq)->status = NVME_SC_ABORT_REQ | NVME_SC_DNR;
- return BLK_EH_HANDLED;
+ return BLK_EH_DONE;
}
static blk_status_t nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx,
@@ -169,12 +171,12 @@ static blk_status_t nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx,
blk_mq_start_request(req);
iod->cmd.common.flags |= NVME_CMD_SGL_METABUF;
- iod->req.port = nvmet_loop_port;
+ iod->req.port = queue->ctrl->port;
if (!nvmet_req_init(&iod->req, &queue->nvme_cq,
&queue->nvme_sq, &nvme_loop_ops))
return BLK_STS_OK;
- if (blk_rq_payload_bytes(req)) {
+ if (blk_rq_nr_phys_segments(req)) {
iod->sg_table.sgl = iod->first_sgl;
if (sg_alloc_table_chained(&iod->sg_table,
blk_rq_nr_phys_segments(req),
@@ -469,6 +471,12 @@ static void nvme_loop_reset_ctrl_work(struct work_struct *work)
nvme_stop_ctrl(&ctrl->ctrl);
nvme_loop_shutdown_ctrl(ctrl);
+ if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) {
+ /* state change failure should never happen */
+ WARN_ON_ONCE(1);
+ return;
+ }
+
ret = nvme_loop_configure_admin_queue(ctrl);
if (ret)
goto out_disable;
@@ -511,6 +519,7 @@ static const struct nvme_ctrl_ops nvme_loop_ctrl_ops = {
.free_ctrl = nvme_loop_free_ctrl,
.submit_async_event = nvme_loop_submit_async_event,
.delete_ctrl = nvme_loop_delete_ctrl_host,
+ .get_address = nvmf_get_address,
};
static int nvme_loop_create_io_queues(struct nvme_loop_ctrl *ctrl)
@@ -559,6 +568,23 @@ out_destroy_queues:
return ret;
}
+static struct nvmet_port *nvme_loop_find_port(struct nvme_ctrl *ctrl)
+{
+ struct nvmet_port *p, *found = NULL;
+
+ mutex_lock(&nvme_loop_ports_mutex);
+ list_for_each_entry(p, &nvme_loop_ports, entry) {
+ /* if no transport address is specified use the first port */
+ if ((ctrl->opts->mask & NVMF_OPT_TRADDR) &&
+ strcmp(ctrl->opts->traddr, p->disc_addr.traddr))
+ continue;
+ found = p;
+ break;
+ }
+ mutex_unlock(&nvme_loop_ports_mutex);
+ return found;
+}
+
static struct nvme_ctrl *nvme_loop_create_ctrl(struct device *dev,
struct nvmf_ctrl_options *opts)
{
@@ -583,6 +609,7 @@ static struct nvme_ctrl *nvme_loop_create_ctrl(struct device *dev,
ctrl->ctrl.sqsize = opts->queue_size - 1;
ctrl->ctrl.kato = opts->kato;
+ ctrl->port = nvme_loop_find_port(&ctrl->ctrl);
ctrl->queues = kcalloc(opts->nr_io_queues + 1, sizeof(*ctrl->queues),
GFP_KERNEL);
@@ -640,27 +667,17 @@ out_put_ctrl:
static int nvme_loop_add_port(struct nvmet_port *port)
{
- /*
- * XXX: disalow adding more than one port so
- * there is no connection rejections when a
- * a subsystem is assigned to a port for which
- * loop doesn't have a pointer.
- * This scenario would be possible if we allowed
- * more than one port to be added and a subsystem
- * was assigned to a port other than nvmet_loop_port.
- */
-
- if (nvmet_loop_port)
- return -EPERM;
-
- nvmet_loop_port = port;
+ mutex_lock(&nvme_loop_ports_mutex);
+ list_add_tail(&port->entry, &nvme_loop_ports);
+ mutex_unlock(&nvme_loop_ports_mutex);
return 0;
}
static void nvme_loop_remove_port(struct nvmet_port *port)
{
- if (port == nvmet_loop_port)
- nvmet_loop_port = NULL;
+ mutex_lock(&nvme_loop_ports_mutex);
+ list_del_init(&port->entry);
+ mutex_unlock(&nvme_loop_ports_mutex);
}
static const struct nvmet_fabrics_ops nvme_loop_ops = {
@@ -676,6 +693,7 @@ static struct nvmf_transport_ops nvme_loop_transport = {
.name = "loop",
.module = THIS_MODULE,
.create_ctrl = nvme_loop_create_ctrl,
+ .allowed_opts = NVMF_OPT_TRADDR,
};
static int __init nvme_loop_init_module(void)
diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
index 15fd84ab21f8..480dfe10fad9 100644
--- a/drivers/nvme/target/nvmet.h
+++ b/drivers/nvme/target/nvmet.h
@@ -30,6 +30,21 @@
#define NVMET_ASYNC_EVENTS 4
#define NVMET_ERROR_LOG_SLOTS 128
+
+/*
+ * Supported optional AENs:
+ */
+#define NVMET_AEN_CFG_OPTIONAL \
+ NVME_AEN_CFG_NS_ATTR
+
+/*
+ * Plus mandatory SMART AENs (we'll never send them, but allow enabling them):
+ */
+#define NVMET_AEN_CFG_ALL \
+ (NVME_SMART_CRIT_SPARE | NVME_SMART_CRIT_TEMPERATURE | \
+ NVME_SMART_CRIT_RELIABILITY | NVME_SMART_CRIT_MEDIA | \
+ NVME_SMART_CRIT_VOLATILE_MEMORY | NVMET_AEN_CFG_OPTIONAL)
+
/* Helper Macros when NVMe error is NVME_SC_CONNECT_INVALID_PARAM
* The 16 bit shift is to set IATTR bit to 1, which means offending
* offset starts in the data section of connect()
@@ -43,6 +58,7 @@ struct nvmet_ns {
struct list_head dev_link;
struct percpu_ref ref;
struct block_device *bdev;
+ struct file *file;
u32 nsid;
u32 blksize_shift;
loff_t size;
@@ -57,6 +73,8 @@ struct nvmet_ns {
struct config_group group;
struct completion disable_done;
+ mempool_t *bvec_pool;
+ struct kmem_cache *bvec_cache;
};
static inline struct nvmet_ns *to_nvmet_ns(struct config_item *item)
@@ -82,7 +100,7 @@ struct nvmet_sq {
/**
* struct nvmet_port - Common structure to keep port
* information for the target.
- * @entry: List head for holding a list of these elements.
+ * @entry: Entry into referrals or transport list.
* @disc_addr: Address information is stored in a format defined
* for a discovery log page entry.
* @group: ConfigFS group for this element's folder.
@@ -120,6 +138,8 @@ struct nvmet_ctrl {
u16 cntlid;
u32 kato;
+ u32 aen_enabled;
+ unsigned long aen_masked;
struct nvmet_req *async_event_cmds[NVMET_ASYNC_EVENTS];
unsigned int nr_async_event_cmds;
struct list_head async_events;
@@ -132,6 +152,9 @@ struct nvmet_ctrl {
const struct nvmet_fabrics_ops *ops;
+ __le32 *changed_ns_list;
+ u32 nr_changed_ns;
+
char subsysnqn[NVMF_NQN_FIELD_LEN];
char hostnqn[NVMF_NQN_FIELD_LEN];
};
@@ -222,8 +245,18 @@ struct nvmet_req {
struct nvmet_cq *cq;
struct nvmet_ns *ns;
struct scatterlist *sg;
- struct bio inline_bio;
struct bio_vec inline_bvec[NVMET_MAX_INLINE_BIOVEC];
+ union {
+ struct {
+ struct bio inline_bio;
+ } b;
+ struct {
+ bool mpool_alloc;
+ struct kiocb iocb;
+ struct bio_vec *bvec;
+ struct work_struct work;
+ } f;
+ };
int sg_cnt;
/* data length as parsed from the command: */
size_t data_len;
@@ -263,7 +296,8 @@ struct nvmet_async_event {
};
u16 nvmet_parse_connect_cmd(struct nvmet_req *req);
-u16 nvmet_parse_io_cmd(struct nvmet_req *req);
+u16 nvmet_bdev_parse_io_cmd(struct nvmet_req *req);
+u16 nvmet_file_parse_io_cmd(struct nvmet_req *req);
u16 nvmet_parse_admin_cmd(struct nvmet_req *req);
u16 nvmet_parse_discovery_cmd(struct nvmet_req *req);
u16 nvmet_parse_fabrics_cmd(struct nvmet_req *req);
@@ -316,6 +350,7 @@ u16 nvmet_copy_to_sgl(struct nvmet_req *req, off_t off, const void *buf,
size_t len);
u16 nvmet_copy_from_sgl(struct nvmet_req *req, off_t off, void *buf,
size_t len);
+u16 nvmet_zero_sgl(struct nvmet_req *req, off_t off, size_t len);
u32 nvmet_get_log_page_len(struct nvme_command *cmd);
@@ -338,4 +373,14 @@ extern struct rw_semaphore nvmet_config_sem;
bool nvmet_host_allowed(struct nvmet_req *req, struct nvmet_subsys *subsys,
const char *hostnqn);
+int nvmet_bdev_ns_enable(struct nvmet_ns *ns);
+int nvmet_file_ns_enable(struct nvmet_ns *ns);
+void nvmet_bdev_ns_disable(struct nvmet_ns *ns);
+void nvmet_file_ns_disable(struct nvmet_ns *ns);
+
+static inline u32 nvmet_rw_len(struct nvmet_req *req)
+{
+ return ((u32)le16_to_cpu(req->cmd->rw.length) + 1) <<
+ req->ns->blksize_shift;
+}
#endif /* _NVMET_H */
diff --git a/drivers/of/device.c b/drivers/of/device.c
index 064c818105bd..33d85511d790 100644
--- a/drivers/of/device.c
+++ b/drivers/of/device.c
@@ -76,6 +76,8 @@ int of_device_add(struct platform_device *ofdev)
* of_dma_configure - Setup DMA configuration
* @dev: Device to apply DMA configuration
* @np: Pointer to OF node having DMA configuration
+ * @force_dma: Whether device is to be set up by of_dma_configure() even if
+ * DMA capability is not explicitly described by firmware.
*
* Try to get devices's DMA configuration from DT and update it
* accordingly.
@@ -84,7 +86,7 @@ int of_device_add(struct platform_device *ofdev)
* can use a platform bus notifier and handle BUS_NOTIFY_ADD_DEVICE events
* to fix up DMA configuration.
*/
-int of_dma_configure(struct device *dev, struct device_node *np)
+int of_dma_configure(struct device *dev, struct device_node *np, bool force_dma)
{
u64 dma_addr, paddr, size = 0;
int ret;
@@ -100,7 +102,7 @@ int of_dma_configure(struct device *dev, struct device_node *np)
* DMA configuration regardless of whether "dma-ranges" is
* correctly specified or not.
*/
- if (!dev->bus->force_dma)
+ if (!force_dma)
return ret == -ENODEV ? 0 : ret;
dma_addr = offset = 0;
diff --git a/drivers/of/of_reserved_mem.c b/drivers/of/of_reserved_mem.c
index 9a4f4246231d..895c83e0c7b6 100644
--- a/drivers/of/of_reserved_mem.c
+++ b/drivers/of/of_reserved_mem.c
@@ -353,7 +353,7 @@ int of_reserved_mem_device_init_by_idx(struct device *dev,
/* ensure that dma_ops is set for virtual devices
* using reserved memory
*/
- of_dma_configure(dev, np);
+ of_dma_configure(dev, np, true);
dev_info(dev, "assigned reserved memory node %s\n", rmem->name);
} else {
diff --git a/drivers/of/overlay.c b/drivers/of/overlay.c
index b35fe88f1851..7baa53e5b1d7 100644
--- a/drivers/of/overlay.c
+++ b/drivers/of/overlay.c
@@ -102,12 +102,28 @@ static DEFINE_IDR(ovcs_idr);
static BLOCKING_NOTIFIER_HEAD(overlay_notify_chain);
+/**
+ * of_overlay_notifier_register() - Register notifier for overlay operations
+ * @nb: Notifier block to register
+ *
+ * Register for notification on overlay operations on device tree nodes. The
+ * reported actions definied by @of_reconfig_change. The notifier callback
+ * furthermore receives a pointer to the affected device tree node.
+ *
+ * Note that a notifier callback is not supposed to store pointers to a device
+ * tree node or its content beyond @OF_OVERLAY_POST_REMOVE corresponding to the
+ * respective node it received.
+ */
int of_overlay_notifier_register(struct notifier_block *nb)
{
return blocking_notifier_chain_register(&overlay_notify_chain, nb);
}
EXPORT_SYMBOL_GPL(of_overlay_notifier_register);
+/**
+ * of_overlay_notifier_register() - Unregister notifier for overlay operations
+ * @nb: Notifier block to unregister
+ */
int of_overlay_notifier_unregister(struct notifier_block *nb)
{
return blocking_notifier_chain_unregister(&overlay_notify_chain, nb);
@@ -671,17 +687,13 @@ static void free_overlay_changeset(struct overlay_changeset *ovcs)
of_node_put(ovcs->fragments[i].overlay);
}
kfree(ovcs->fragments);
-
/*
- * TODO
- *
- * would like to: kfree(ovcs->overlay_tree);
- * but can not since drivers may have pointers into this data
- *
- * would like to: kfree(ovcs->fdt);
- * but can not since drivers may have pointers into this data
+ * There should be no live pointers into ovcs->overlay_tree and
+ * ovcs->fdt due to the policy that overlay notifiers are not allowed
+ * to retain pointers into the overlay devicetree.
*/
-
+ kfree(ovcs->overlay_tree);
+ kfree(ovcs->fdt);
kfree(ovcs);
}
diff --git a/drivers/parisc/Kconfig b/drivers/parisc/Kconfig
index 3a102a84d637..5a48b5606110 100644
--- a/drivers/parisc/Kconfig
+++ b/drivers/parisc/Kconfig
@@ -103,11 +103,6 @@ config IOMMU_SBA
depends on PCI_LBA
default PCI_LBA
-config IOMMU_HELPER
- bool
- depends on IOMMU_SBA || IOMMU_CCIO
- default y
-
source "drivers/pcmcia/Kconfig"
endmenu
diff --git a/drivers/parisc/ccio-dma.c b/drivers/parisc/ccio-dma.c
index acba1f56af3e..614823617b8b 100644
--- a/drivers/parisc/ccio-dma.c
+++ b/drivers/parisc/ccio-dma.c
@@ -1108,19 +1108,6 @@ static int ccio_proc_info(struct seq_file *m, void *p)
return 0;
}
-static int ccio_proc_info_open(struct inode *inode, struct file *file)
-{
- return single_open(file, &ccio_proc_info, NULL);
-}
-
-static const struct file_operations ccio_proc_info_fops = {
- .owner = THIS_MODULE,
- .open = ccio_proc_info_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
static int ccio_proc_bitmap_info(struct seq_file *m, void *p)
{
struct ioc *ioc = ioc_list;
@@ -1135,19 +1122,6 @@ static int ccio_proc_bitmap_info(struct seq_file *m, void *p)
return 0;
}
-
-static int ccio_proc_bitmap_open(struct inode *inode, struct file *file)
-{
- return single_open(file, &ccio_proc_bitmap_info, NULL);
-}
-
-static const struct file_operations ccio_proc_bitmap_fops = {
- .owner = THIS_MODULE,
- .open = ccio_proc_bitmap_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
#endif /* CONFIG_PROC_FS */
/**
@@ -1195,7 +1169,7 @@ void * ccio_get_iommu(const struct parisc_device *dev)
* to/from certain pages. To avoid this happening, we mark these pages
* as `used', and ensure that nothing will try to allocate from them.
*/
-void ccio_cujo20_fixup(struct parisc_device *cujo, u32 iovp)
+void __init ccio_cujo20_fixup(struct parisc_device *cujo, u32 iovp)
{
unsigned int idx;
struct parisc_device *dev = parisc_parent(cujo);
@@ -1263,7 +1237,7 @@ static struct parisc_driver ccio_driver __refdata = {
* I/O Page Directory, the resource map, and initalizing the
* U2/Uturn chip into virtual mode.
*/
-static void
+static void __init
ccio_ioc_init(struct ioc *ioc)
{
int i;
@@ -1589,15 +1563,13 @@ static int __init ccio_probe(struct parisc_device *dev)
#ifdef CONFIG_PROC_FS
if (ioc_count == 0) {
- proc_create(MODULE_NAME, 0, proc_runway_root,
- &ccio_proc_info_fops);
- proc_create(MODULE_NAME"-bitmap", 0, proc_runway_root,
- &ccio_proc_bitmap_fops);
+ proc_create_single(MODULE_NAME, 0, proc_runway_root,
+ ccio_proc_info);
+ proc_create_single(MODULE_NAME"-bitmap", 0, proc_runway_root,
+ ccio_proc_bitmap_info);
}
#endif
ioc_count++;
-
- parisc_has_iommu();
return 0;
}
diff --git a/drivers/parisc/sba_iommu.c b/drivers/parisc/sba_iommu.c
index 0a9c762a70fa..11de0eccf968 100644
--- a/drivers/parisc/sba_iommu.c
+++ b/drivers/parisc/sba_iommu.c
@@ -1864,20 +1864,6 @@ static int sba_proc_info(struct seq_file *m, void *p)
}
static int
-sba_proc_open(struct inode *i, struct file *f)
-{
- return single_open(f, &sba_proc_info, NULL);
-}
-
-static const struct file_operations sba_proc_fops = {
- .owner = THIS_MODULE,
- .open = sba_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
-static int
sba_proc_bitmap_info(struct seq_file *m, void *p)
{
struct sba_device *sba_dev = sba_list;
@@ -1889,20 +1875,6 @@ sba_proc_bitmap_info(struct seq_file *m, void *p)
return 0;
}
-
-static int
-sba_proc_bitmap_open(struct inode *i, struct file *f)
-{
- return single_open(f, &sba_proc_bitmap_info, NULL);
-}
-
-static const struct file_operations sba_proc_bitmap_fops = {
- .owner = THIS_MODULE,
- .open = sba_proc_bitmap_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
#endif /* CONFIG_PROC_FS */
static const struct parisc_device_id sba_tbl[] __initconst = {
@@ -2014,11 +1986,9 @@ static int __init sba_driver_callback(struct parisc_device *dev)
break;
}
- proc_create("sba_iommu", 0, root, &sba_proc_fops);
- proc_create("sba_iommu-bitmap", 0, root, &sba_proc_bitmap_fops);
+ proc_create_single("sba_iommu", 0, root, sba_proc_info);
+ proc_create_single("sba_iommu-bitmap", 0, root, sba_proc_bitmap_info);
#endif
-
- parisc_has_iommu();
return 0;
}
diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig
index 34b56a8f8480..29a487f31dae 100644
--- a/drivers/pci/Kconfig
+++ b/drivers/pci/Kconfig
@@ -5,10 +5,6 @@
source "drivers/pci/pcie/Kconfig"
-config PCI_BUS_ADDR_T_64BIT
- def_bool y if (ARCH_DMA_ADDR_T_64BIT || 64BIT)
- depends on PCI
-
config PCI_MSI
bool "Message Signaled Interrupts (MSI and MSI-X)"
depends on PCI
diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c
index bc2ded4c451f..35b7fc87eac5 100644
--- a/drivers/pci/bus.c
+++ b/drivers/pci/bus.c
@@ -120,7 +120,7 @@ int devm_request_pci_bus_resources(struct device *dev,
EXPORT_SYMBOL_GPL(devm_request_pci_bus_resources);
static struct pci_bus_region pci_32_bit = {0, 0xffffffffULL};
-#ifdef CONFIG_PCI_BUS_ADDR_T_64BIT
+#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
static struct pci_bus_region pci_64_bit = {0,
(pci_bus_addr_t) 0xffffffffffffffffULL};
static struct pci_bus_region pci_high = {(pci_bus_addr_t) 0x100000000ULL,
@@ -230,7 +230,7 @@ int pci_bus_alloc_resource(struct pci_bus *bus, struct resource *res,
resource_size_t),
void *alignf_data)
{
-#ifdef CONFIG_PCI_BUS_ADDR_T_64BIT
+#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
int rc;
if (res->flags & IORESOURCE_MEM_64) {
diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index 30250631efe7..f45b74fcc059 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -1434,6 +1434,9 @@ struct irq_domain *pci_msi_create_irq_domain(struct fwnode_handle *fwnode,
{
struct irq_domain *domain;
+ if (WARN_ON(info->flags & MSI_FLAG_LEVEL_CAPABLE))
+ info->flags &= ~MSI_FLAG_LEVEL_CAPABLE;
+
if (info->flags & MSI_FLAG_USE_DEF_DOM_OPS)
pci_msi_domain_update_dom_ops(info);
if (info->flags & MSI_FLAG_USE_DEF_CHIP_OPS)
diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c
index b9a131137e64..f8269a725667 100644
--- a/drivers/pci/pci-driver.c
+++ b/drivers/pci/pci-driver.c
@@ -16,6 +16,8 @@
#include <linux/pm_runtime.h>
#include <linux/suspend.h>
#include <linux/kexec.h>
+#include <linux/of_device.h>
+#include <linux/acpi.h>
#include "pci.h"
#include "pcie/portdrv.h"
@@ -1577,6 +1579,35 @@ static int pci_bus_num_vf(struct device *dev)
return pci_num_vf(to_pci_dev(dev));
}
+/**
+ * pci_dma_configure - Setup DMA configuration
+ * @dev: ptr to dev structure
+ *
+ * Function to update PCI devices's DMA configuration using the same
+ * info from the OF node or ACPI node of host bridge's parent (if any).
+ */
+static int pci_dma_configure(struct device *dev)
+{
+ struct device *bridge;
+ int ret = 0;
+
+ bridge = pci_get_host_bridge_device(to_pci_dev(dev));
+
+ if (IS_ENABLED(CONFIG_OF) && bridge->parent &&
+ bridge->parent->of_node) {
+ ret = of_dma_configure(dev, bridge->parent->of_node, true);
+ } else if (has_acpi_companion(bridge)) {
+ struct acpi_device *adev = to_acpi_device_node(bridge->fwnode);
+ enum dev_dma_attr attr = acpi_get_dma_attr(adev);
+
+ if (attr != DEV_DMA_NOT_SUPPORTED)
+ ret = acpi_dma_configure(dev, attr);
+ }
+
+ pci_put_host_bridge_device(bridge);
+ return ret;
+}
+
struct bus_type pci_bus_type = {
.name = "pci",
.match = pci_bus_match,
@@ -1589,7 +1620,7 @@ struct bus_type pci_bus_type = {
.drv_groups = pci_drv_groups,
.pm = PCI_PM_OPS_PTR,
.num_vf = pci_bus_num_vf,
- .force_dma = true,
+ .dma_configure = pci_dma_configure,
};
EXPORT_SYMBOL(pci_bus_type);
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index a04197ce767d..dbfe7c4f3776 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -1910,7 +1910,7 @@ void pci_pme_active(struct pci_dev *dev, bool enable)
EXPORT_SYMBOL(pci_pme_active);
/**
- * pci_enable_wake - enable PCI device as wakeup event source
+ * __pci_enable_wake - enable PCI device as wakeup event source
* @dev: PCI device affected
* @state: PCI state from which device will issue wakeup events
* @enable: True to enable event generation; false to disable
@@ -1928,7 +1928,7 @@ EXPORT_SYMBOL(pci_pme_active);
* Error code depending on the platform is returned if both the platform and
* the native mechanism fail to enable the generation of wake-up events
*/
-int pci_enable_wake(struct pci_dev *dev, pci_power_t state, bool enable)
+static int __pci_enable_wake(struct pci_dev *dev, pci_power_t state, bool enable)
{
int ret = 0;
@@ -1969,6 +1969,23 @@ int pci_enable_wake(struct pci_dev *dev, pci_power_t state, bool enable)
return ret;
}
+
+/**
+ * pci_enable_wake - change wakeup settings for a PCI device
+ * @pci_dev: Target device
+ * @state: PCI state from which device will issue wakeup events
+ * @enable: Whether or not to enable event generation
+ *
+ * If @enable is set, check device_may_wakeup() for the device before calling
+ * __pci_enable_wake() for it.
+ */
+int pci_enable_wake(struct pci_dev *pci_dev, pci_power_t state, bool enable)
+{
+ if (enable && !device_may_wakeup(&pci_dev->dev))
+ return -EINVAL;
+
+ return __pci_enable_wake(pci_dev, state, enable);
+}
EXPORT_SYMBOL(pci_enable_wake);
/**
@@ -1981,9 +1998,9 @@ EXPORT_SYMBOL(pci_enable_wake);
* should not be called twice in a row to enable wake-up due to PCI PM vs ACPI
* ordering constraints.
*
- * This function only returns error code if the device is not capable of
- * generating PME# from both D3_hot and D3_cold, and the platform is unable to
- * enable wake-up power for it.
+ * This function only returns error code if the device is not allowed to wake
+ * up the system from sleep or it is not capable of generating PME# from both
+ * D3_hot and D3_cold and the platform is unable to enable wake-up power for it.
*/
int pci_wake_from_d3(struct pci_dev *dev, bool enable)
{
@@ -2114,7 +2131,7 @@ int pci_finish_runtime_suspend(struct pci_dev *dev)
dev->runtime_d3cold = target_state == PCI_D3cold;
- pci_enable_wake(dev, target_state, pci_dev_run_wake(dev));
+ __pci_enable_wake(dev, target_state, pci_dev_run_wake(dev));
error = pci_set_power_state(dev, target_state);
@@ -2138,16 +2155,16 @@ bool pci_dev_run_wake(struct pci_dev *dev)
{
struct pci_bus *bus = dev->bus;
- if (device_can_wakeup(&dev->dev))
- return true;
-
if (!dev->pme_support)
return false;
/* PME-capable in principle, but not from the target power state */
- if (!pci_pme_capable(dev, pci_target_state(dev, false)))
+ if (!pci_pme_capable(dev, pci_target_state(dev, true)))
return false;
+ if (device_can_wakeup(&dev->dev))
+ return true;
+
while (bus->parent) {
struct pci_dev *bridge = bus->self;
diff --git a/drivers/pci/proc.c b/drivers/pci/proc.c
index 1ee8927a0635..7ac035af39f0 100644
--- a/drivers/pci/proc.c
+++ b/drivers/pci/proc.c
@@ -435,25 +435,12 @@ int pci_proc_detach_bus(struct pci_bus *bus)
return 0;
}
-static int proc_bus_pci_dev_open(struct inode *inode, struct file *file)
-{
- return seq_open(file, &proc_bus_pci_devices_op);
-}
-
-static const struct file_operations proc_bus_pci_dev_operations = {
- .owner = THIS_MODULE,
- .open = proc_bus_pci_dev_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
static int __init pci_proc_init(void)
{
struct pci_dev *dev = NULL;
proc_bus_pci_dir = proc_mkdir("bus/pci", NULL);
- proc_create("devices", 0, proc_bus_pci_dir,
- &proc_bus_pci_dev_operations);
+ proc_create_seq("devices", 0, proc_bus_pci_dir,
+ &proc_bus_pci_devices_op);
proc_initialized = 1;
for_each_pci_dev(dev)
pci_proc_attach_device(dev);
diff --git a/drivers/pinctrl/intel/pinctrl-cherryview.c b/drivers/pinctrl/intel/pinctrl-cherryview.c
index b1ae1618fefe..fee9225ca559 100644
--- a/drivers/pinctrl/intel/pinctrl-cherryview.c
+++ b/drivers/pinctrl/intel/pinctrl-cherryview.c
@@ -1622,22 +1622,30 @@ static int chv_gpio_probe(struct chv_pinctrl *pctrl, int irq)
if (!need_valid_mask) {
irq_base = devm_irq_alloc_descs(pctrl->dev, -1, 0,
- chip->ngpio, NUMA_NO_NODE);
+ community->npins, NUMA_NO_NODE);
if (irq_base < 0) {
dev_err(pctrl->dev, "Failed to allocate IRQ numbers\n");
return irq_base;
}
- } else {
- irq_base = 0;
}
- ret = gpiochip_irqchip_add(chip, &chv_gpio_irqchip, irq_base,
+ ret = gpiochip_irqchip_add(chip, &chv_gpio_irqchip, 0,
handle_bad_irq, IRQ_TYPE_NONE);
if (ret) {
dev_err(pctrl->dev, "failed to add IRQ chip\n");
return ret;
}
+ if (!need_valid_mask) {
+ for (i = 0; i < community->ngpio_ranges; i++) {
+ range = &community->gpio_ranges[i];
+
+ irq_domain_associate_many(chip->irq.domain, irq_base,
+ range->base, range->npins);
+ irq_base += range->npins;
+ }
+ }
+
gpiochip_set_chained_irqchip(chip, &chv_gpio_irqchip, irq,
chv_gpio_irq_handler);
return 0;
diff --git a/drivers/pinctrl/intel/pinctrl-sunrisepoint.c b/drivers/pinctrl/intel/pinctrl-sunrisepoint.c
index 8870a4100164..fee3435a6f15 100644
--- a/drivers/pinctrl/intel/pinctrl-sunrisepoint.c
+++ b/drivers/pinctrl/intel/pinctrl-sunrisepoint.c
@@ -36,6 +36,27 @@
.npins = ((e) - (s) + 1), \
}
+#define SPTH_GPP(r, s, e, g) \
+ { \
+ .reg_num = (r), \
+ .base = (s), \
+ .size = ((e) - (s) + 1), \
+ .gpio_base = (g), \
+ }
+
+#define SPTH_COMMUNITY(b, s, e, g) \
+ { \
+ .barno = (b), \
+ .padown_offset = SPT_PAD_OWN, \
+ .padcfglock_offset = SPT_PADCFGLOCK, \
+ .hostown_offset = SPT_HOSTSW_OWN, \
+ .ie_offset = SPT_GPI_IE, \
+ .pin_base = (s), \
+ .npins = ((e) - (s) + 1), \
+ .gpps = (g), \
+ .ngpps = ARRAY_SIZE(g), \
+ }
+
/* Sunrisepoint-LP */
static const struct pinctrl_pin_desc sptlp_pins[] = {
/* GPP_A */
@@ -531,10 +552,28 @@ static const struct intel_function spth_functions[] = {
FUNCTION("i2c2", spth_i2c2_groups),
};
+static const struct intel_padgroup spth_community0_gpps[] = {
+ SPTH_GPP(0, 0, 23, 0), /* GPP_A */
+ SPTH_GPP(1, 24, 47, 24), /* GPP_B */
+};
+
+static const struct intel_padgroup spth_community1_gpps[] = {
+ SPTH_GPP(0, 48, 71, 48), /* GPP_C */
+ SPTH_GPP(1, 72, 95, 72), /* GPP_D */
+ SPTH_GPP(2, 96, 108, 96), /* GPP_E */
+ SPTH_GPP(3, 109, 132, 120), /* GPP_F */
+ SPTH_GPP(4, 133, 156, 144), /* GPP_G */
+ SPTH_GPP(5, 157, 180, 168), /* GPP_H */
+};
+
+static const struct intel_padgroup spth_community3_gpps[] = {
+ SPTH_GPP(0, 181, 191, 192), /* GPP_I */
+};
+
static const struct intel_community spth_communities[] = {
- SPT_COMMUNITY(0, 0, 47),
- SPT_COMMUNITY(1, 48, 180),
- SPT_COMMUNITY(2, 181, 191),
+ SPTH_COMMUNITY(0, 0, 47, spth_community0_gpps),
+ SPTH_COMMUNITY(1, 48, 180, spth_community1_gpps),
+ SPTH_COMMUNITY(2, 181, 191, spth_community3_gpps),
};
static const struct intel_pinctrl_soc_data spth_soc_data = {
diff --git a/drivers/pinctrl/meson/pinctrl-meson-axg.c b/drivers/pinctrl/meson/pinctrl-meson-axg.c
index 4b91ff74779b..99a6ceac8e53 100644
--- a/drivers/pinctrl/meson/pinctrl-meson-axg.c
+++ b/drivers/pinctrl/meson/pinctrl-meson-axg.c
@@ -898,7 +898,7 @@ static struct meson_bank meson_axg_periphs_banks[] = {
static struct meson_bank meson_axg_aobus_banks[] = {
/* name first last irq pullen pull dir out in */
- BANK("AO", GPIOAO_0, GPIOAO_9, 0, 13, 0, 16, 0, 0, 0, 0, 0, 16, 1, 0),
+ BANK("AO", GPIOAO_0, GPIOAO_13, 0, 13, 0, 16, 0, 0, 0, 0, 0, 16, 1, 0),
};
static struct meson_pmx_bank meson_axg_periphs_pmx_banks[] = {
diff --git a/drivers/pinctrl/stm32/pinctrl-stm32.c b/drivers/pinctrl/stm32/pinctrl-stm32.c
index 6cbcff42ba47..dfed60982a8a 100644
--- a/drivers/pinctrl/stm32/pinctrl-stm32.c
+++ b/drivers/pinctrl/stm32/pinctrl-stm32.c
@@ -267,12 +267,13 @@ static void stm32_gpio_irq_release_resources(struct irq_data *irq_data)
}
static struct irq_chip stm32_gpio_irq_chip = {
- .name = "stm32gpio",
- .irq_ack = irq_chip_ack_parent,
- .irq_mask = irq_chip_mask_parent,
- .irq_unmask = irq_chip_unmask_parent,
- .irq_set_type = irq_chip_set_type_parent,
- .irq_set_wake = irq_chip_set_wake_parent,
+ .name = "stm32gpio",
+ .irq_eoi = irq_chip_eoi_parent,
+ .irq_ack = irq_chip_ack_parent,
+ .irq_mask = irq_chip_mask_parent,
+ .irq_unmask = irq_chip_unmask_parent,
+ .irq_set_type = irq_chip_set_type_parent,
+ .irq_set_wake = irq_chip_set_wake_parent,
.irq_request_resources = stm32_gpio_irq_request_resources,
.irq_release_resources = stm32_gpio_irq_release_resources,
};
diff --git a/drivers/platform/chrome/Kconfig b/drivers/platform/chrome/Kconfig
index e728a96cabfd..cb0df9eb3e0f 100644
--- a/drivers/platform/chrome/Kconfig
+++ b/drivers/platform/chrome/Kconfig
@@ -38,6 +38,17 @@ config CHROMEOS_PSTORE
If you have a supported Chromebook, choose Y or M here.
The module will be called chromeos_pstore.
+config CHROMEOS_TBMC
+ tristate "ChromeOS Tablet Switch Controller"
+ depends on ACPI
+ depends on INPUT
+ help
+ This option adds a driver for the tablet switch on
+ select Chrome OS systems.
+
+ To compile this driver as a module, choose M here: the
+ module will be called chromeos_tbmc.
+
config CROS_EC_CTL
tristate
diff --git a/drivers/platform/chrome/Makefile b/drivers/platform/chrome/Makefile
index ff3b369911f0..e44c37a63fa9 100644
--- a/drivers/platform/chrome/Makefile
+++ b/drivers/platform/chrome/Makefile
@@ -2,6 +2,7 @@
obj-$(CONFIG_CHROMEOS_LAPTOP) += chromeos_laptop.o
obj-$(CONFIG_CHROMEOS_PSTORE) += chromeos_pstore.o
+obj-$(CONFIG_CHROMEOS_TBMC) += chromeos_tbmc.o
cros_ec_ctl-objs := cros_ec_sysfs.o cros_ec_lightbar.o \
cros_ec_vbc.o cros_ec_debugfs.o
obj-$(CONFIG_CROS_EC_CTL) += cros_ec_ctl.o
diff --git a/drivers/platform/chrome/chromeos_laptop.c b/drivers/platform/chrome/chromeos_laptop.c
index 5c47f451e43b..24326eecd787 100644
--- a/drivers/platform/chrome/chromeos_laptop.c
+++ b/drivers/platform/chrome/chromeos_laptop.c
@@ -6,6 +6,7 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/acpi.h>
#include <linux/dmi.h>
#include <linux/i2c.h>
#include <linux/input.h>
@@ -54,6 +55,11 @@ struct i2c_peripheral {
struct i2c_client *client;
};
+struct acpi_peripheral {
+ char hid[ACPI_ID_LEN];
+ const struct property_entry *properties;
+};
+
struct chromeos_laptop {
/*
* Note that we can't mark this pointer as const because
@@ -61,6 +67,9 @@ struct chromeos_laptop {
*/
struct i2c_peripheral *i2c_peripherals;
unsigned int num_i2c_peripherals;
+
+ const struct acpi_peripheral *acpi_peripherals;
+ unsigned int num_acpi_peripherals;
};
static const struct chromeos_laptop *cros_laptop;
@@ -148,6 +157,38 @@ static void chromeos_laptop_check_adapter(struct i2c_adapter *adapter)
}
}
+static bool chromeos_laptop_adjust_client(struct i2c_client *client)
+{
+ const struct acpi_peripheral *acpi_dev;
+ struct acpi_device_id acpi_ids[2] = { };
+ int i;
+ int error;
+
+ if (!has_acpi_companion(&client->dev))
+ return false;
+
+ for (i = 0; i < cros_laptop->num_acpi_peripherals; i++) {
+ acpi_dev = &cros_laptop->acpi_peripherals[i];
+
+ memcpy(acpi_ids[0].id, acpi_dev->hid, ACPI_ID_LEN);
+
+ if (acpi_match_device(acpi_ids, &client->dev)) {
+ error = device_add_properties(&client->dev,
+ acpi_dev->properties);
+ if (error) {
+ dev_err(&client->dev,
+ "failed to add properties: %d\n",
+ error);
+ break;
+ }
+
+ return true;
+ }
+ }
+
+ return false;
+}
+
static void chromeos_laptop_detach_i2c_client(struct i2c_client *client)
{
struct i2c_peripheral *i2c_dev;
@@ -170,6 +211,8 @@ static int chromeos_laptop_i2c_notifier_call(struct notifier_block *nb,
case BUS_NOTIFY_ADD_DEVICE:
if (dev->type == &i2c_adapter_type)
chromeos_laptop_check_adapter(to_i2c_adapter(dev));
+ else if (dev->type == &i2c_client_type)
+ chromeos_laptop_adjust_client(to_i2c_client(dev));
break;
case BUS_NOTIFY_REMOVED_DEVICE:
@@ -191,6 +234,12 @@ static const struct chromeos_laptop _name __initconst = { \
.num_i2c_peripherals = ARRAY_SIZE(_name##_peripherals), \
}
+#define DECLARE_ACPI_CROS_LAPTOP(_name) \
+static const struct chromeos_laptop _name __initconst = { \
+ .acpi_peripherals = _name##_peripherals, \
+ .num_acpi_peripherals = ARRAY_SIZE(_name##_peripherals), \
+}
+
static struct i2c_peripheral samsung_series_5_550_peripherals[] __initdata = {
/* Touchpad. */
{
@@ -234,16 +283,25 @@ static const int chromebook_pixel_tp_keys[] __initconst = {
static const struct property_entry
chromebook_pixel_trackpad_props[] __initconst = {
+ PROPERTY_ENTRY_STRING("compatible", "atmel,maxtouch"),
PROPERTY_ENTRY_U32_ARRAY("linux,gpio-keymap", chromebook_pixel_tp_keys),
{ }
};
+static const struct property_entry
+chromebook_atmel_touchscreen_props[] __initconst = {
+ PROPERTY_ENTRY_STRING("compatible", "atmel,maxtouch"),
+ { }
+};
+
static struct i2c_peripheral chromebook_pixel_peripherals[] __initdata = {
/* Touch Screen. */
{
.board_info = {
I2C_BOARD_INFO("atmel_mxt_ts",
ATMEL_TS_I2C_ADDR),
+ .properties =
+ chromebook_atmel_touchscreen_props,
.flags = I2C_CLIENT_WAKE,
},
.dmi_name = "touchscreen",
@@ -354,6 +412,8 @@ static struct i2c_peripheral acer_c720_peripherals[] __initdata = {
.board_info = {
I2C_BOARD_INFO("atmel_mxt_ts",
ATMEL_TS_I2C_ADDR),
+ .properties =
+ chromebook_atmel_touchscreen_props,
.flags = I2C_CLIENT_WAKE,
},
.dmi_name = "touchscreen",
@@ -419,6 +479,47 @@ static struct i2c_peripheral cr48_peripherals[] __initdata = {
};
DECLARE_CROS_LAPTOP(cr48);
+static const u32 samus_touchpad_buttons[] __initconst = {
+ KEY_RESERVED,
+ KEY_RESERVED,
+ KEY_RESERVED,
+ BTN_LEFT
+};
+
+static const struct property_entry samus_trackpad_props[] __initconst = {
+ PROPERTY_ENTRY_STRING("compatible", "atmel,maxtouch"),
+ PROPERTY_ENTRY_U32_ARRAY("linux,gpio-keymap", samus_touchpad_buttons),
+ { }
+};
+
+static struct acpi_peripheral samus_peripherals[] __initdata = {
+ /* Touchpad */
+ {
+ .hid = "ATML0000",
+ .properties = samus_trackpad_props,
+ },
+ /* Touchsceen */
+ {
+ .hid = "ATML0001",
+ .properties = chromebook_atmel_touchscreen_props,
+ },
+};
+DECLARE_ACPI_CROS_LAPTOP(samus);
+
+static struct acpi_peripheral generic_atmel_peripherals[] __initdata = {
+ /* Touchpad */
+ {
+ .hid = "ATML0000",
+ .properties = chromebook_pixel_trackpad_props,
+ },
+ /* Touchsceen */
+ {
+ .hid = "ATML0001",
+ .properties = chromebook_atmel_touchscreen_props,
+ },
+};
+DECLARE_ACPI_CROS_LAPTOP(generic_atmel);
+
static const struct dmi_system_id chromeos_laptop_dmi_table[] __initconst = {
{
.ident = "Samsung Series 5 550",
@@ -502,17 +603,72 @@ static const struct dmi_system_id chromeos_laptop_dmi_table[] __initconst = {
},
.driver_data = (void *)&cr48,
},
+ /* Devices with peripherals incompletely described in ACPI */
+ {
+ .ident = "Chromebook Pro",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Google"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "Caroline"),
+ },
+ .driver_data = (void *)&samus,
+ },
+ {
+ .ident = "Google Pixel 2 (2015)",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "GOOGLE"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "Samus"),
+ },
+ .driver_data = (void *)&samus,
+ },
+ {
+ .ident = "Samsung Chromebook 3",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "GOOGLE"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "Celes"),
+ },
+ .driver_data = (void *)&samus,
+ },
+ {
+ /*
+ * Other Chromebooks with Atmel touch controllers:
+ * - Winky (touchpad)
+ * - Clapper, Expresso, Rambi, Glimmer (touchscreen)
+ */
+ .ident = "Other Chromebook",
+ .matches = {
+ /*
+ * This will match all Google devices, not only devices
+ * with Atmel, but we will validate that the device
+ * actually has matching peripherals.
+ */
+ DMI_MATCH(DMI_SYS_VENDOR, "GOOGLE"),
+ },
+ .driver_data = (void *)&generic_atmel,
+ },
{ }
};
MODULE_DEVICE_TABLE(dmi, chromeos_laptop_dmi_table);
-static int __init chromeos_laptop_scan_adapter(struct device *dev, void *data)
+static int __init chromeos_laptop_scan_peripherals(struct device *dev, void *data)
{
- struct i2c_adapter *adapter;
+ int error;
- adapter = i2c_verify_adapter(dev);
- if (adapter)
- chromeos_laptop_check_adapter(adapter);
+ if (dev->type == &i2c_adapter_type) {
+ chromeos_laptop_check_adapter(to_i2c_adapter(dev));
+ } else if (dev->type == &i2c_client_type) {
+ if (chromeos_laptop_adjust_client(to_i2c_client(dev))) {
+ /*
+ * Now that we have needed properties re-trigger
+ * driver probe in case driver was initialized
+ * earlier and probe failed.
+ */
+ error = device_attach(dev);
+ if (error < 0)
+ dev_warn(dev,
+ "%s: device_attach() failed: %d\n",
+ __func__, error);
+ }
+ }
return 0;
}
@@ -556,27 +712,24 @@ static int __init chromeos_laptop_setup_irq(struct i2c_peripheral *i2c_dev)
return 0;
}
-static struct chromeos_laptop * __init
-chromeos_laptop_prepare(const struct chromeos_laptop *src)
+static int __init
+chromeos_laptop_prepare_i2c_peripherals(struct chromeos_laptop *cros_laptop,
+ const struct chromeos_laptop *src)
{
- struct chromeos_laptop *cros_laptop;
struct i2c_peripheral *i2c_dev;
struct i2c_board_info *info;
- int error;
int i;
+ int error;
- cros_laptop = kzalloc(sizeof(*cros_laptop), GFP_KERNEL);
- if (!cros_laptop)
- return ERR_PTR(-ENOMEM);
+ if (!src->num_i2c_peripherals)
+ return 0;
cros_laptop->i2c_peripherals = kmemdup(src->i2c_peripherals,
src->num_i2c_peripherals *
sizeof(*src->i2c_peripherals),
GFP_KERNEL);
- if (!cros_laptop->i2c_peripherals) {
- error = -ENOMEM;
- goto err_free_cros_laptop;
- }
+ if (!cros_laptop->i2c_peripherals)
+ return -ENOMEM;
cros_laptop->num_i2c_peripherals = src->num_i2c_peripherals;
@@ -586,7 +739,7 @@ chromeos_laptop_prepare(const struct chromeos_laptop *src)
error = chromeos_laptop_setup_irq(i2c_dev);
if (error)
- goto err_destroy_cros_peripherals;
+ goto err_out;
/* We need to deep-copy properties */
if (info->properties) {
@@ -594,14 +747,14 @@ chromeos_laptop_prepare(const struct chromeos_laptop *src)
property_entries_dup(info->properties);
if (IS_ERR(info->properties)) {
error = PTR_ERR(info->properties);
- goto err_destroy_cros_peripherals;
+ goto err_out;
}
}
}
- return cros_laptop;
+ return 0;
-err_destroy_cros_peripherals:
+err_out:
while (--i >= 0) {
i2c_dev = &cros_laptop->i2c_peripherals[i];
info = &i2c_dev->board_info;
@@ -609,13 +762,74 @@ err_destroy_cros_peripherals:
property_entries_free(info->properties);
}
kfree(cros_laptop->i2c_peripherals);
-err_free_cros_laptop:
- kfree(cros_laptop);
- return ERR_PTR(error);
+ return error;
+}
+
+static int __init
+chromeos_laptop_prepare_acpi_peripherals(struct chromeos_laptop *cros_laptop,
+ const struct chromeos_laptop *src)
+{
+ struct acpi_peripheral *acpi_peripherals;
+ struct acpi_peripheral *acpi_dev;
+ const struct acpi_peripheral *src_dev;
+ int n_peripherals = 0;
+ int i;
+ int error;
+
+ for (i = 0; i < src->num_acpi_peripherals; i++) {
+ if (acpi_dev_present(src->acpi_peripherals[i].hid, NULL, -1))
+ n_peripherals++;
+ }
+
+ if (!n_peripherals)
+ return 0;
+
+ acpi_peripherals = kcalloc(n_peripherals,
+ sizeof(*src->acpi_peripherals),
+ GFP_KERNEL);
+ if (!acpi_peripherals)
+ return -ENOMEM;
+
+ acpi_dev = acpi_peripherals;
+ for (i = 0; i < src->num_acpi_peripherals; i++) {
+ src_dev = &src->acpi_peripherals[i];
+ if (!acpi_dev_present(src_dev->hid, NULL, -1))
+ continue;
+
+ *acpi_dev = *src_dev;
+
+ /* We need to deep-copy properties */
+ if (src_dev->properties) {
+ acpi_dev->properties =
+ property_entries_dup(src_dev->properties);
+ if (IS_ERR(acpi_dev->properties)) {
+ error = PTR_ERR(acpi_dev->properties);
+ goto err_out;
+ }
+ }
+
+ acpi_dev++;
+ }
+
+ cros_laptop->acpi_peripherals = acpi_peripherals;
+ cros_laptop->num_acpi_peripherals = n_peripherals;
+
+ return 0;
+
+err_out:
+ while (--i >= 0) {
+ acpi_dev = &acpi_peripherals[i];
+ if (acpi_dev->properties)
+ property_entries_free(acpi_dev->properties);
+ }
+
+ kfree(acpi_peripherals);
+ return error;
}
static void chromeos_laptop_destroy(const struct chromeos_laptop *cros_laptop)
{
+ const struct acpi_peripheral *acpi_dev;
struct i2c_peripheral *i2c_dev;
struct i2c_board_info *info;
int i;
@@ -631,10 +845,41 @@ static void chromeos_laptop_destroy(const struct chromeos_laptop *cros_laptop)
property_entries_free(info->properties);
}
+ for (i = 0; i < cros_laptop->num_acpi_peripherals; i++) {
+ acpi_dev = &cros_laptop->acpi_peripherals[i];
+
+ if (acpi_dev->properties)
+ property_entries_free(acpi_dev->properties);
+ }
+
kfree(cros_laptop->i2c_peripherals);
+ kfree(cros_laptop->acpi_peripherals);
kfree(cros_laptop);
}
+static struct chromeos_laptop * __init
+chromeos_laptop_prepare(const struct chromeos_laptop *src)
+{
+ struct chromeos_laptop *cros_laptop;
+ int error;
+
+ cros_laptop = kzalloc(sizeof(*cros_laptop), GFP_KERNEL);
+ if (!cros_laptop)
+ return ERR_PTR(-ENOMEM);
+
+ error = chromeos_laptop_prepare_i2c_peripherals(cros_laptop, src);
+ if (!error)
+ error = chromeos_laptop_prepare_acpi_peripherals(cros_laptop,
+ src);
+
+ if (error) {
+ chromeos_laptop_destroy(cros_laptop);
+ return ERR_PTR(error);
+ }
+
+ return cros_laptop;
+}
+
static int __init chromeos_laptop_init(void)
{
const struct dmi_system_id *dmi_id;
@@ -652,21 +897,33 @@ static int __init chromeos_laptop_init(void)
if (IS_ERR(cros_laptop))
return PTR_ERR(cros_laptop);
+ if (!cros_laptop->num_i2c_peripherals &&
+ !cros_laptop->num_acpi_peripherals) {
+ pr_debug("no relevant devices detected\n");
+ error = -ENODEV;
+ goto err_destroy_cros_laptop;
+ }
+
error = bus_register_notifier(&i2c_bus_type,
&chromeos_laptop_i2c_notifier);
if (error) {
- pr_err("failed to register i2c bus notifier: %d\n", error);
- chromeos_laptop_destroy(cros_laptop);
- return error;
+ pr_err("failed to register i2c bus notifier: %d\n",
+ error);
+ goto err_destroy_cros_laptop;
}
/*
- * Scan adapters that have been registered before we installed
- * the notifier to make sure we do not miss any devices.
+ * Scan adapters that have been registered and clients that have
+ * been created before we installed the notifier to make sure
+ * we do not miss any devices.
*/
- i2c_for_each_dev(NULL, chromeos_laptop_scan_adapter);
+ i2c_for_each_dev(NULL, chromeos_laptop_scan_peripherals);
return 0;
+
+err_destroy_cros_laptop:
+ chromeos_laptop_destroy(cros_laptop);
+ return error;
}
static void __exit chromeos_laptop_exit(void)
diff --git a/drivers/platform/chrome/chromeos_tbmc.c b/drivers/platform/chrome/chromeos_tbmc.c
new file mode 100644
index 000000000000..b935df6a9694
--- /dev/null
+++ b/drivers/platform/chrome/chromeos_tbmc.c
@@ -0,0 +1,111 @@
+// SPDX-License-Identifier: GPL-2.0+
+// Driver to detect Tablet Mode for ChromeOS convertible.
+//
+// Copyright (C) 2017 Google, Inc.
+// Author: Gwendal Grignou <gwendal@chromium.org>
+
+#include <linux/acpi.h>
+#include <linux/input.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/printk.h>
+
+#define DRV_NAME "chromeos_tbmc"
+#define ACPI_DRV_NAME "GOOG0006"
+
+static int chromeos_tbmc_query_switch(struct acpi_device *adev,
+ struct input_dev *idev)
+{
+ unsigned long long state;
+ acpi_status status;
+
+ status = acpi_evaluate_integer(adev->handle, "TBMC", NULL, &state);
+ if (ACPI_FAILURE(status))
+ return -ENODEV;
+
+ /* input layer checks if event is redundant */
+ input_report_switch(idev, SW_TABLET_MODE, state);
+ input_sync(idev);
+
+ return 0;
+}
+
+static __maybe_unused int chromeos_tbmc_resume(struct device *dev)
+{
+ struct acpi_device *adev = to_acpi_device(dev);
+
+ return chromeos_tbmc_query_switch(adev, adev->driver_data);
+}
+
+static void chromeos_tbmc_notify(struct acpi_device *adev, u32 event)
+{
+ switch (event) {
+ case 0x80:
+ chromeos_tbmc_query_switch(adev, adev->driver_data);
+ break;
+ default:
+ dev_err(&adev->dev, "Unexpected event: 0x%08X\n", event);
+ }
+}
+
+static int chromeos_tbmc_open(struct input_dev *idev)
+{
+ struct acpi_device *adev = input_get_drvdata(idev);
+
+ return chromeos_tbmc_query_switch(adev, idev);
+}
+
+static int chromeos_tbmc_add(struct acpi_device *adev)
+{
+ struct input_dev *idev;
+ struct device *dev = &adev->dev;
+ int ret;
+
+ idev = devm_input_allocate_device(dev);
+ if (!idev)
+ return -ENOMEM;
+
+ idev->name = "Tablet Mode Switch";
+ idev->phys = acpi_device_hid(adev);
+
+ idev->id.bustype = BUS_HOST;
+ idev->id.version = 1;
+ idev->id.product = 0;
+ idev->open = chromeos_tbmc_open;
+
+ input_set_drvdata(idev, adev);
+ adev->driver_data = idev;
+
+ input_set_capability(idev, EV_SW, SW_TABLET_MODE);
+ ret = input_register_device(idev);
+ if (ret) {
+ dev_err(dev, "cannot register input device\n");
+ return ret;
+ }
+ return 0;
+}
+
+static const struct acpi_device_id chromeos_tbmc_acpi_device_ids[] = {
+ { ACPI_DRV_NAME, 0 },
+ { }
+};
+MODULE_DEVICE_TABLE(acpi, chromeos_tbmc_acpi_device_ids);
+
+static const SIMPLE_DEV_PM_OPS(chromeos_tbmc_pm_ops, NULL,
+ chromeos_tbmc_resume);
+
+static struct acpi_driver chromeos_tbmc_driver = {
+ .name = DRV_NAME,
+ .class = DRV_NAME,
+ .ids = chromeos_tbmc_acpi_device_ids,
+ .ops = {
+ .add = chromeos_tbmc_add,
+ .notify = chromeos_tbmc_notify,
+ },
+ .drv.pm = &chromeos_tbmc_pm_ops,
+};
+
+module_acpi_driver(chromeos_tbmc_driver);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("ChromeOS ACPI tablet switch driver");
diff --git a/drivers/platform/chrome/cros_ec_lightbar.c b/drivers/platform/chrome/cros_ec_lightbar.c
index 6ea79d495aa2..68193bb53383 100644
--- a/drivers/platform/chrome/cros_ec_lightbar.c
+++ b/drivers/platform/chrome/cros_ec_lightbar.c
@@ -170,8 +170,7 @@ static ssize_t version_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
uint32_t version = 0, flags = 0;
- struct cros_ec_dev *ec = container_of(dev,
- struct cros_ec_dev, class_dev);
+ struct cros_ec_dev *ec = to_cros_ec_dev(dev);
int ret;
ret = lb_throttle();
@@ -193,8 +192,7 @@ static ssize_t brightness_store(struct device *dev,
struct cros_ec_command *msg;
int ret;
unsigned int val;
- struct cros_ec_dev *ec = container_of(dev,
- struct cros_ec_dev, class_dev);
+ struct cros_ec_dev *ec = to_cros_ec_dev(dev);
if (kstrtouint(buf, 0, &val))
return -EINVAL;
@@ -238,8 +236,7 @@ static ssize_t led_rgb_store(struct device *dev, struct device_attribute *attr,
{
struct ec_params_lightbar *param;
struct cros_ec_command *msg;
- struct cros_ec_dev *ec = container_of(dev,
- struct cros_ec_dev, class_dev);
+ struct cros_ec_dev *ec = to_cros_ec_dev(dev);
unsigned int val[4];
int ret, i = 0, j = 0, ok = 0;
@@ -311,8 +308,7 @@ static ssize_t sequence_show(struct device *dev,
struct ec_response_lightbar *resp;
struct cros_ec_command *msg;
int ret;
- struct cros_ec_dev *ec = container_of(dev,
- struct cros_ec_dev, class_dev);
+ struct cros_ec_dev *ec = to_cros_ec_dev(dev);
msg = alloc_lightbar_cmd_msg(ec);
if (!msg)
@@ -439,8 +435,7 @@ static ssize_t sequence_store(struct device *dev, struct device_attribute *attr,
struct cros_ec_command *msg;
unsigned int num;
int ret, len;
- struct cros_ec_dev *ec = container_of(dev,
- struct cros_ec_dev, class_dev);
+ struct cros_ec_dev *ec = to_cros_ec_dev(dev);
for (len = 0; len < count; len++)
if (!isalnum(buf[len]))
@@ -488,8 +483,7 @@ static ssize_t program_store(struct device *dev, struct device_attribute *attr,
int extra_bytes, max_size, ret;
struct ec_params_lightbar *param;
struct cros_ec_command *msg;
- struct cros_ec_dev *ec = container_of(dev, struct cros_ec_dev,
- class_dev);
+ struct cros_ec_dev *ec = to_cros_ec_dev(dev);
/*
* We might need to reject the program for size reasons. The EC
@@ -599,8 +593,7 @@ static umode_t cros_ec_lightbar_attrs_are_visible(struct kobject *kobj,
struct attribute *a, int n)
{
struct device *dev = container_of(kobj, struct device, kobj);
- struct cros_ec_dev *ec = container_of(dev,
- struct cros_ec_dev, class_dev);
+ struct cros_ec_dev *ec = to_cros_ec_dev(dev);
struct platform_device *pdev = to_platform_device(ec->dev);
struct cros_ec_platform *pdata = pdev->dev.platform_data;
int is_cros_ec;
diff --git a/drivers/platform/chrome/cros_ec_lpc.c b/drivers/platform/chrome/cros_ec_lpc.c
index 3682e1539251..31c8b8c49e45 100644
--- a/drivers/platform/chrome/cros_ec_lpc.c
+++ b/drivers/platform/chrome/cros_ec_lpc.c
@@ -435,7 +435,13 @@ static int __init cros_ec_lpc_init(void)
int ret;
acpi_status status;
- if (!dmi_check_system(cros_ec_lpc_dmi_table)) {
+ status = acpi_get_devices(ACPI_DRV_NAME, cros_ec_lpc_parse_device,
+ &cros_ec_lpc_acpi_device_found, NULL);
+ if (ACPI_FAILURE(status))
+ pr_warn(DRV_NAME ": Looking for %s failed\n", ACPI_DRV_NAME);
+
+ if (!cros_ec_lpc_acpi_device_found &&
+ !dmi_check_system(cros_ec_lpc_dmi_table)) {
pr_err(DRV_NAME ": unsupported system.\n");
return -ENODEV;
}
@@ -450,11 +456,6 @@ static int __init cros_ec_lpc_init(void)
return ret;
}
- status = acpi_get_devices(ACPI_DRV_NAME, cros_ec_lpc_parse_device,
- &cros_ec_lpc_acpi_device_found, NULL);
- if (ACPI_FAILURE(status))
- pr_warn(DRV_NAME ": Looking for %s failed\n", ACPI_DRV_NAME);
-
if (!cros_ec_lpc_acpi_device_found) {
/* Register the device, and it'll get hooked up automatically */
ret = platform_device_register(&cros_ec_lpc_device);
diff --git a/drivers/platform/chrome/cros_ec_proto.c b/drivers/platform/chrome/cros_ec_proto.c
index e7bbdf947bbc..8350ca2311c7 100644
--- a/drivers/platform/chrome/cros_ec_proto.c
+++ b/drivers/platform/chrome/cros_ec_proto.c
@@ -91,6 +91,8 @@ static int send_command(struct cros_ec_device *ec_dev,
usleep_range(10000, 11000);
ret = (*xfer_fxn)(ec_dev, status_msg);
+ if (ret == -EAGAIN)
+ continue;
if (ret < 0)
break;
diff --git a/drivers/platform/chrome/cros_ec_sysfs.c b/drivers/platform/chrome/cros_ec_sysfs.c
index 5a6db3fe213a..f34a50121064 100644
--- a/drivers/platform/chrome/cros_ec_sysfs.c
+++ b/drivers/platform/chrome/cros_ec_sysfs.c
@@ -34,8 +34,6 @@
#include <linux/types.h>
#include <linux/uaccess.h>
-#define to_cros_ec_dev(dev) container_of(dev, struct cros_ec_dev, class_dev)
-
/* Accessor functions */
static ssize_t reboot_show(struct device *dev,
diff --git a/drivers/platform/chrome/cros_ec_vbc.c b/drivers/platform/chrome/cros_ec_vbc.c
index 6d38e6b08334..5356f26bc022 100644
--- a/drivers/platform/chrome/cros_ec_vbc.c
+++ b/drivers/platform/chrome/cros_ec_vbc.c
@@ -29,8 +29,7 @@ static ssize_t vboot_context_read(struct file *filp, struct kobject *kobj,
loff_t pos, size_t count)
{
struct device *dev = container_of(kobj, struct device, kobj);
- struct cros_ec_dev *ec = container_of(dev, struct cros_ec_dev,
- class_dev);
+ struct cros_ec_dev *ec = to_cros_ec_dev(dev);
struct cros_ec_device *ecdev = ec->ec_dev;
struct ec_params_vbnvcontext *params;
struct cros_ec_command *msg;
@@ -70,8 +69,7 @@ static ssize_t vboot_context_write(struct file *filp, struct kobject *kobj,
loff_t pos, size_t count)
{
struct device *dev = container_of(kobj, struct device, kobj);
- struct cros_ec_dev *ec = container_of(dev, struct cros_ec_dev,
- class_dev);
+ struct cros_ec_dev *ec = to_cros_ec_dev(dev);
struct cros_ec_device *ecdev = ec->ec_dev;
struct ec_params_vbnvcontext *params;
struct cros_ec_command *msg;
@@ -111,8 +109,7 @@ static umode_t cros_ec_vbc_is_visible(struct kobject *kobj,
struct bin_attribute *a, int n)
{
struct device *dev = container_of(kobj, struct device, kobj);
- struct cros_ec_dev *ec = container_of(dev, struct cros_ec_dev,
- class_dev);
+ struct cros_ec_dev *ec = to_cros_ec_dev(dev);
struct device_node *np = ec->ec_dev->dev->of_node;
if (IS_ENABLED(CONFIG_OF) && np) {
diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig
index 39d06dd1f63a..566644bb496a 100644
--- a/drivers/platform/x86/Kconfig
+++ b/drivers/platform/x86/Kconfig
@@ -154,7 +154,7 @@ config DELL_LAPTOP
depends on ACPI_VIDEO || ACPI_VIDEO = n
depends on RFKILL || RFKILL = n
depends on SERIO_I8042
- select DELL_SMBIOS
+ depends on DELL_SMBIOS
select POWER_SUPPLY
select LEDS_CLASS
select NEW_LEDS
@@ -168,8 +168,8 @@ config DELL_WMI
depends on DMI
depends on INPUT
depends on ACPI_VIDEO || ACPI_VIDEO = n
+ depends on DELL_SMBIOS
select DELL_WMI_DESCRIPTOR
- select DELL_SMBIOS
select INPUT_SPARSEKMAP
---help---
Say Y here if you want to support WMI-based hotkeys on Dell laptops.
diff --git a/drivers/platform/x86/asus-wireless.c b/drivers/platform/x86/asus-wireless.c
index d4aeac3477f5..f086469ea740 100644
--- a/drivers/platform/x86/asus-wireless.c
+++ b/drivers/platform/x86/asus-wireless.c
@@ -178,8 +178,10 @@ static int asus_wireless_remove(struct acpi_device *adev)
{
struct asus_wireless_data *data = acpi_driver_data(adev);
- if (data->wq)
+ if (data->wq) {
+ devm_led_classdev_unregister(&adev->dev, &data->led);
destroy_workqueue(data->wq);
+ }
return 0;
}
diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c
index a32c5c00e0e7..ffffb9909ae1 100644
--- a/drivers/platform/x86/asus-wmi.c
+++ b/drivers/platform/x86/asus-wmi.c
@@ -163,6 +163,16 @@ MODULE_LICENSE("GPL");
static const char * const ashs_ids[] = { "ATK4001", "ATK4002", NULL };
+static bool ashs_present(void)
+{
+ int i = 0;
+ while (ashs_ids[i]) {
+ if (acpi_dev_found(ashs_ids[i++]))
+ return true;
+ }
+ return false;
+}
+
struct bios_args {
u32 arg0;
u32 arg1;
@@ -1025,6 +1035,9 @@ static int asus_new_rfkill(struct asus_wmi *asus,
static void asus_wmi_rfkill_exit(struct asus_wmi *asus)
{
+ if (asus->driver->wlan_ctrl_by_user && ashs_present())
+ return;
+
asus_unregister_rfkill_notifier(asus, "\\_SB.PCI0.P0P5");
asus_unregister_rfkill_notifier(asus, "\\_SB.PCI0.P0P6");
asus_unregister_rfkill_notifier(asus, "\\_SB.PCI0.P0P7");
@@ -2121,16 +2134,6 @@ static int asus_wmi_fan_init(struct asus_wmi *asus)
return 0;
}
-static bool ashs_present(void)
-{
- int i = 0;
- while (ashs_ids[i]) {
- if (acpi_dev_found(ashs_ids[i++]))
- return true;
- }
- return false;
-}
-
/*
* WMI Driver
*/
diff --git a/drivers/platform/x86/toshiba_acpi.c b/drivers/platform/x86/toshiba_acpi.c
index e8d058c5ef21..eef76bfa5d73 100644
--- a/drivers/platform/x86/toshiba_acpi.c
+++ b/drivers/platform/x86/toshiba_acpi.c
@@ -1689,19 +1689,6 @@ static int version_proc_show(struct seq_file *m, void *v)
return 0;
}
-static int version_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, version_proc_show, PDE_DATA(inode));
-}
-
-static const struct file_operations version_proc_fops = {
- .owner = THIS_MODULE,
- .open = version_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
/*
* Proc and module init
*/
@@ -1722,8 +1709,8 @@ static void create_toshiba_proc_entries(struct toshiba_acpi_dev *dev)
if (dev->hotkey_dev)
proc_create_data("keys", S_IRUGO | S_IWUSR, toshiba_proc_dir,
&keys_proc_fops, dev);
- proc_create_data("version", S_IRUGO, toshiba_proc_dir,
- &version_proc_fops, dev);
+ proc_create_single_data("version", S_IRUGO, toshiba_proc_dir,
+ version_proc_show, dev);
}
static void remove_toshiba_proc_entries(struct toshiba_acpi_dev *dev)
diff --git a/drivers/pnp/pnpbios/proc.c b/drivers/pnp/pnpbios/proc.c
index 7d4aca7948dd..fe1c8f5d9af0 100644
--- a/drivers/pnp/pnpbios/proc.c
+++ b/drivers/pnp/pnpbios/proc.c
@@ -47,19 +47,6 @@ static int pnpconfig_proc_show(struct seq_file *m, void *v)
return 0;
}
-static int pnpconfig_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, pnpconfig_proc_show, NULL);
-}
-
-static const struct file_operations pnpconfig_proc_fops = {
- .owner = THIS_MODULE,
- .open = pnpconfig_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
static int escd_info_proc_show(struct seq_file *m, void *v)
{
struct escd_info_struc escd;
@@ -74,19 +61,6 @@ static int escd_info_proc_show(struct seq_file *m, void *v)
return 0;
}
-static int escd_info_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, escd_info_proc_show, NULL);
-}
-
-static const struct file_operations escd_info_proc_fops = {
- .owner = THIS_MODULE,
- .open = escd_info_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
#define MAX_SANE_ESCD_SIZE (32*1024)
static int escd_proc_show(struct seq_file *m, void *v)
{
@@ -129,19 +103,6 @@ static int escd_proc_show(struct seq_file *m, void *v)
return 0;
}
-static int escd_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, escd_proc_show, NULL);
-}
-
-static const struct file_operations escd_proc_fops = {
- .owner = THIS_MODULE,
- .open = escd_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
static int pnp_legacyres_proc_show(struct seq_file *m, void *v)
{
void *buf;
@@ -159,19 +120,6 @@ static int pnp_legacyres_proc_show(struct seq_file *m, void *v)
return 0;
}
-static int pnp_legacyres_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, pnp_legacyres_proc_show, NULL);
-}
-
-static const struct file_operations pnp_legacyres_proc_fops = {
- .owner = THIS_MODULE,
- .open = pnp_legacyres_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
static int pnp_devices_proc_show(struct seq_file *m, void *v)
{
struct pnp_bios_node *node;
@@ -202,19 +150,6 @@ static int pnp_devices_proc_show(struct seq_file *m, void *v)
return 0;
}
-static int pnp_devices_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, pnp_devices_proc_show, NULL);
-}
-
-static const struct file_operations pnp_devices_proc_fops = {
- .owner = THIS_MODULE,
- .open = pnp_devices_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
static int pnpbios_proc_show(struct seq_file *m, void *v)
{
void *data = m->private;
@@ -318,12 +253,13 @@ int __init pnpbios_proc_init(void)
proc_pnp_boot = proc_mkdir("boot", proc_pnp);
if (!proc_pnp_boot)
return -EIO;
- proc_create("devices", 0, proc_pnp, &pnp_devices_proc_fops);
- proc_create("configuration_info", 0, proc_pnp, &pnpconfig_proc_fops);
- proc_create("escd_info", 0, proc_pnp, &escd_info_proc_fops);
- proc_create("escd", S_IRUSR, proc_pnp, &escd_proc_fops);
- proc_create("legacy_device_resources", 0, proc_pnp, &pnp_legacyres_proc_fops);
-
+ proc_create_single("devices", 0, proc_pnp, pnp_devices_proc_show);
+ proc_create_single("configuration_info", 0, proc_pnp,
+ pnpconfig_proc_show);
+ proc_create_single("escd_info", 0, proc_pnp, escd_info_proc_show);
+ proc_create_single("escd", S_IRUSR, proc_pnp, escd_proc_show);
+ proc_create_single("legacy_device_resources", 0, proc_pnp,
+ pnp_legacyres_proc_show);
return 0;
}
diff --git a/drivers/remoteproc/qcom_q6v5_pil.c b/drivers/remoteproc/qcom_q6v5_pil.c
index 8e70a627e0bb..cbbafdcaaecb 100644
--- a/drivers/remoteproc/qcom_q6v5_pil.c
+++ b/drivers/remoteproc/qcom_q6v5_pil.c
@@ -1083,6 +1083,7 @@ static int q6v5_alloc_memory_region(struct q6v5 *qproc)
dev_err(qproc->dev, "unable to resolve mba region\n");
return ret;
}
+ of_node_put(node);
qproc->mba_phys = r.start;
qproc->mba_size = resource_size(&r);
@@ -1100,6 +1101,7 @@ static int q6v5_alloc_memory_region(struct q6v5 *qproc)
dev_err(qproc->dev, "unable to resolve mpss region\n");
return ret;
}
+ of_node_put(node);
qproc->mpss_phys = qproc->mpss_reloc = r.start;
qproc->mpss_size = resource_size(&r);
diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c
index 6d9c5832ce47..a9609d971f7f 100644
--- a/drivers/remoteproc/remoteproc_core.c
+++ b/drivers/remoteproc/remoteproc_core.c
@@ -1163,7 +1163,7 @@ int rproc_trigger_recovery(struct rproc *rproc)
if (ret)
return ret;
- ret = rproc_stop(rproc, false);
+ ret = rproc_stop(rproc, true);
if (ret)
goto unlock_mutex;
@@ -1316,7 +1316,7 @@ void rproc_shutdown(struct rproc *rproc)
if (!atomic_dec_and_test(&rproc->power))
goto out;
- ret = rproc_stop(rproc, true);
+ ret = rproc_stop(rproc, false);
if (ret) {
atomic_inc(&rproc->power);
goto out;
diff --git a/drivers/reset/reset-uniphier.c b/drivers/reset/reset-uniphier.c
index 360e06b20c53..ac18f2f27881 100644
--- a/drivers/reset/reset-uniphier.c
+++ b/drivers/reset/reset-uniphier.c
@@ -110,7 +110,7 @@ static const struct uniphier_reset_data uniphier_ld20_sys_reset_data[] = {
UNIPHIER_RESETX(4, 0x200c, 2), /* eMMC */
UNIPHIER_RESETX(6, 0x200c, 6), /* Ether */
UNIPHIER_RESETX(8, 0x200c, 8), /* STDMAC (HSC) */
- UNIPHIER_RESETX(12, 0x200c, 5), /* GIO (PCIe, USB3) */
+ UNIPHIER_RESETX(14, 0x200c, 5), /* USB30 */
UNIPHIER_RESETX(16, 0x200c, 12), /* USB30-PHY0 */
UNIPHIER_RESETX(17, 0x200c, 13), /* USB30-PHY1 */
UNIPHIER_RESETX(18, 0x200c, 14), /* USB30-PHY2 */
@@ -127,8 +127,8 @@ static const struct uniphier_reset_data uniphier_pxs3_sys_reset_data[] = {
UNIPHIER_RESETX(6, 0x200c, 9), /* Ether0 */
UNIPHIER_RESETX(7, 0x200c, 10), /* Ether1 */
UNIPHIER_RESETX(8, 0x200c, 12), /* STDMAC */
- UNIPHIER_RESETX(12, 0x200c, 4), /* USB30 link (GIO0) */
- UNIPHIER_RESETX(13, 0x200c, 5), /* USB31 link (GIO1) */
+ UNIPHIER_RESETX(12, 0x200c, 4), /* USB30 link */
+ UNIPHIER_RESETX(13, 0x200c, 5), /* USB31 link */
UNIPHIER_RESETX(16, 0x200c, 16), /* USB30-PHY0 */
UNIPHIER_RESETX(17, 0x200c, 18), /* USB30-PHY1 */
UNIPHIER_RESETX(18, 0x200c, 20), /* USB30-PHY2 */
diff --git a/drivers/rpmsg/rpmsg_char.c b/drivers/rpmsg/rpmsg_char.c
index 64b6de9763ee..1efdf9ff8679 100644
--- a/drivers/rpmsg/rpmsg_char.c
+++ b/drivers/rpmsg/rpmsg_char.c
@@ -581,4 +581,6 @@ static void rpmsg_chrdev_exit(void)
unregister_chrdev_region(rpmsg_major, RPMSG_DEV_MAX);
}
module_exit(rpmsg_chrdev_exit);
+
+MODULE_ALIAS("rpmsg:rpmsg_chrdev");
MODULE_LICENSE("GPL v2");
diff --git a/drivers/rtc/rtc-proc.c b/drivers/rtc/rtc-proc.c
index 31e7e23cc5be..a9dd9218fae2 100644
--- a/drivers/rtc/rtc-proc.c
+++ b/drivers/rtc/rtc-proc.c
@@ -107,40 +107,11 @@ static int rtc_proc_show(struct seq_file *seq, void *offset)
return 0;
}
-static int rtc_proc_open(struct inode *inode, struct file *file)
-{
- int ret;
- struct rtc_device *rtc = PDE_DATA(inode);
-
- if (!try_module_get(rtc->owner))
- return -ENODEV;
-
- ret = single_open(file, rtc_proc_show, rtc);
- if (ret)
- module_put(rtc->owner);
- return ret;
-}
-
-static int rtc_proc_release(struct inode *inode, struct file *file)
-{
- int res = single_release(inode, file);
- struct rtc_device *rtc = PDE_DATA(inode);
-
- module_put(rtc->owner);
- return res;
-}
-
-static const struct file_operations rtc_proc_fops = {
- .open = rtc_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = rtc_proc_release,
-};
-
void rtc_proc_add_device(struct rtc_device *rtc)
{
if (is_rtc_hctosys(rtc))
- proc_create_data("driver/rtc", 0, NULL, &rtc_proc_fops, rtc);
+ proc_create_single_data("driver/rtc", 0, NULL, rtc_proc_show,
+ rtc);
}
void rtc_proc_del_device(struct rtc_device *rtc)
diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c
index 04143c08bd6e..cb9b685118da 100644
--- a/drivers/s390/block/dasd.c
+++ b/drivers/s390/block/dasd.c
@@ -3034,7 +3034,8 @@ static blk_status_t do_dasd_request(struct blk_mq_hw_ctx *hctx,
cqr->callback_data = req;
cqr->status = DASD_CQR_FILLED;
cqr->dq = dq;
- req->completion_data = cqr;
+ *((struct dasd_ccw_req **) blk_mq_rq_to_pdu(req)) = cqr;
+
blk_mq_start_request(req);
spin_lock(&block->queue_lock);
list_add_tail(&cqr->blocklist, &block->ccw_queue);
@@ -3053,19 +3054,20 @@ out:
*
* Return values:
* BLK_EH_RESET_TIMER if the request should be left running
- * BLK_EH_NOT_HANDLED if the request is handled or terminated
+ * BLK_EH_DONE if the request is handled or terminated
* by the driver.
*/
enum blk_eh_timer_return dasd_times_out(struct request *req, bool reserved)
{
- struct dasd_ccw_req *cqr = req->completion_data;
struct dasd_block *block = req->q->queuedata;
struct dasd_device *device;
+ struct dasd_ccw_req *cqr;
unsigned long flags;
int rc = 0;
+ cqr = *((struct dasd_ccw_req **) blk_mq_rq_to_pdu(req));
if (!cqr)
- return BLK_EH_NOT_HANDLED;
+ return BLK_EH_DONE;
spin_lock_irqsave(&cqr->dq->lock, flags);
device = cqr->startdev ? cqr->startdev : block->base;
@@ -3124,7 +3126,7 @@ enum blk_eh_timer_return dasd_times_out(struct request *req, bool reserved)
spin_unlock(&block->queue_lock);
spin_unlock_irqrestore(&cqr->dq->lock, flags);
- return rc ? BLK_EH_RESET_TIMER : BLK_EH_NOT_HANDLED;
+ return rc ? BLK_EH_RESET_TIMER : BLK_EH_DONE;
}
static int dasd_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
@@ -3169,6 +3171,7 @@ static int dasd_alloc_queue(struct dasd_block *block)
int rc;
block->tag_set.ops = &dasd_mq_ops;
+ block->tag_set.cmd_size = sizeof(struct dasd_ccw_req *);
block->tag_set.nr_hw_queues = DASD_NR_HW_QUEUES;
block->tag_set.queue_depth = DASD_MAX_LCU_DEV * DASD_REQ_PER_DEV;
block->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
diff --git a/drivers/s390/block/dasd_proc.c b/drivers/s390/block/dasd_proc.c
index c33788a829c3..5cb80c645489 100644
--- a/drivers/s390/block/dasd_proc.c
+++ b/drivers/s390/block/dasd_proc.c
@@ -131,19 +131,6 @@ static const struct seq_operations dasd_devices_seq_ops = {
.show = dasd_devices_show,
};
-static int dasd_devices_open(struct inode *inode, struct file *file)
-{
- return seq_open(file, &dasd_devices_seq_ops);
-}
-
-static const struct file_operations dasd_devices_file_ops = {
- .owner = THIS_MODULE,
- .open = dasd_devices_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
#ifdef CONFIG_DASD_PROFILE
static int dasd_stats_all_block_on(void)
{
@@ -352,10 +339,10 @@ dasd_proc_init(void)
dasd_proc_root_entry = proc_mkdir("dasd", NULL);
if (!dasd_proc_root_entry)
goto out_nodasd;
- dasd_devices_entry = proc_create("devices",
+ dasd_devices_entry = proc_create_seq("devices",
S_IFREG | S_IRUGO | S_IWUSR,
dasd_proc_root_entry,
- &dasd_devices_file_ops);
+ &dasd_devices_seq_ops);
if (!dasd_devices_entry)
goto out_nodevices;
dasd_statistics_entry = proc_create("statistics",
diff --git a/drivers/s390/char/tape_proc.c b/drivers/s390/char/tape_proc.c
index faae30476f4b..32a14ee31c6b 100644
--- a/drivers/s390/char/tape_proc.c
+++ b/drivers/s390/char/tape_proc.c
@@ -105,29 +105,14 @@ static const struct seq_operations tape_proc_seq = {
.show = tape_proc_show,
};
-static int tape_proc_open(struct inode *inode, struct file *file)
-{
- return seq_open(file, &tape_proc_seq);
-}
-
-static const struct file_operations tape_proc_ops =
-{
- .owner = THIS_MODULE,
- .open = tape_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
/*
* Initialize procfs stuff on startup
*/
void
tape_proc_init(void)
{
- tape_proc_devices =
- proc_create("tapedevices", S_IFREG | S_IRUGO | S_IWUSR, NULL,
- &tape_proc_ops);
+ tape_proc_devices = proc_create_seq("tapedevices",
+ S_IFREG | S_IRUGO | S_IWUSR, NULL, &tape_proc_seq);
if (tape_proc_devices == NULL) {
return;
}
diff --git a/drivers/s390/cio/qdio_setup.c b/drivers/s390/cio/qdio_setup.c
index 439991d71b14..4c14ce428e92 100644
--- a/drivers/s390/cio/qdio_setup.c
+++ b/drivers/s390/cio/qdio_setup.c
@@ -141,7 +141,7 @@ static int __qdio_allocate_qs(struct qdio_q **irq_ptr_qs, int nr_queues)
int i;
for (i = 0; i < nr_queues; i++) {
- q = kmem_cache_alloc(qdio_q_cache, GFP_KERNEL);
+ q = kmem_cache_zalloc(qdio_q_cache, GFP_KERNEL);
if (!q)
return -ENOMEM;
@@ -456,7 +456,6 @@ int qdio_setup_irq(struct qdio_initialize *init_data)
{
struct ciw *ciw;
struct qdio_irq *irq_ptr = init_data->cdev->private->qdio_data;
- int rc;
memset(&irq_ptr->qib, 0, sizeof(irq_ptr->qib));
memset(&irq_ptr->siga_flag, 0, sizeof(irq_ptr->siga_flag));
@@ -493,16 +492,14 @@ int qdio_setup_irq(struct qdio_initialize *init_data)
ciw = ccw_device_get_ciw(init_data->cdev, CIW_TYPE_EQUEUE);
if (!ciw) {
DBF_ERROR("%4x NO EQ", irq_ptr->schid.sch_no);
- rc = -EINVAL;
- goto out_err;
+ return -EINVAL;
}
irq_ptr->equeue = *ciw;
ciw = ccw_device_get_ciw(init_data->cdev, CIW_TYPE_AQUEUE);
if (!ciw) {
DBF_ERROR("%4x NO AQ", irq_ptr->schid.sch_no);
- rc = -EINVAL;
- goto out_err;
+ return -EINVAL;
}
irq_ptr->aqueue = *ciw;
@@ -512,9 +509,6 @@ int qdio_setup_irq(struct qdio_initialize *init_data)
init_data->cdev->handler = qdio_int_handler;
spin_unlock_irq(get_ccwdev_lock(irq_ptr->cdev));
return 0;
-out_err:
- qdio_release_memory(irq_ptr);
- return rc;
}
void qdio_print_subchannel_info(struct qdio_irq *irq_ptr,
diff --git a/drivers/s390/cio/vfio_ccw_cp.c b/drivers/s390/cio/vfio_ccw_cp.c
index 2c7550797ec2..dce92b2a895d 100644
--- a/drivers/s390/cio/vfio_ccw_cp.c
+++ b/drivers/s390/cio/vfio_ccw_cp.c
@@ -715,6 +715,10 @@ void cp_free(struct channel_program *cp)
* and stores the result to ccwchain list. @cp must have been
* initialized by a previous call with cp_init(). Otherwise, undefined
* behavior occurs.
+ * For each chain composing the channel program:
+ * - On entry ch_len holds the count of CCWs to be translated.
+ * - On exit ch_len is adjusted to the count of successfully translated CCWs.
+ * This allows cp_free to find in ch_len the count of CCWs to free in a chain.
*
* The S/390 CCW Translation APIS (prefixed by 'cp_') are introduced
* as helpers to do ccw chain translation inside the kernel. Basically
@@ -749,11 +753,18 @@ int cp_prefetch(struct channel_program *cp)
for (idx = 0; idx < len; idx++) {
ret = ccwchain_fetch_one(chain, idx, cp);
if (ret)
- return ret;
+ goto out_err;
}
}
return 0;
+out_err:
+ /* Only cleanup the chain elements that were actually translated. */
+ chain->ch_len = idx;
+ list_for_each_entry_continue(chain, &cp->ccwchain_list, next) {
+ chain->ch_len = 0;
+ }
+ return ret;
}
/**
diff --git a/drivers/s390/scsi/zfcp_dbf.c b/drivers/s390/scsi/zfcp_dbf.c
index a8b831000b2d..18c4f933e8b9 100644
--- a/drivers/s390/scsi/zfcp_dbf.c
+++ b/drivers/s390/scsi/zfcp_dbf.c
@@ -4,7 +4,7 @@
*
* Debug traces for zfcp.
*
- * Copyright IBM Corp. 2002, 2017
+ * Copyright IBM Corp. 2002, 2018
*/
#define KMSG_COMPONENT "zfcp"
@@ -308,6 +308,27 @@ void zfcp_dbf_rec_trig(char *tag, struct zfcp_adapter *adapter,
spin_unlock_irqrestore(&dbf->rec_lock, flags);
}
+/**
+ * zfcp_dbf_rec_trig_lock - trace event related to triggered recovery with lock
+ * @tag: identifier for event
+ * @adapter: adapter on which the erp_action should run
+ * @port: remote port involved in the erp_action
+ * @sdev: scsi device involved in the erp_action
+ * @want: wanted erp_action
+ * @need: required erp_action
+ *
+ * The adapter->erp_lock must not be held.
+ */
+void zfcp_dbf_rec_trig_lock(char *tag, struct zfcp_adapter *adapter,
+ struct zfcp_port *port, struct scsi_device *sdev,
+ u8 want, u8 need)
+{
+ unsigned long flags;
+
+ read_lock_irqsave(&adapter->erp_lock, flags);
+ zfcp_dbf_rec_trig(tag, adapter, port, sdev, want, need);
+ read_unlock_irqrestore(&adapter->erp_lock, flags);
+}
/**
* zfcp_dbf_rec_run_lvl - trace event related to running recovery
diff --git a/drivers/s390/scsi/zfcp_ext.h b/drivers/s390/scsi/zfcp_ext.h
index bf8ea4df2bb8..e5eed8aac0ce 100644
--- a/drivers/s390/scsi/zfcp_ext.h
+++ b/drivers/s390/scsi/zfcp_ext.h
@@ -4,7 +4,7 @@
*
* External function declarations.
*
- * Copyright IBM Corp. 2002, 2016
+ * Copyright IBM Corp. 2002, 2018
*/
#ifndef ZFCP_EXT_H
@@ -35,6 +35,9 @@ extern int zfcp_dbf_adapter_register(struct zfcp_adapter *);
extern void zfcp_dbf_adapter_unregister(struct zfcp_adapter *);
extern void zfcp_dbf_rec_trig(char *, struct zfcp_adapter *,
struct zfcp_port *, struct scsi_device *, u8, u8);
+extern void zfcp_dbf_rec_trig_lock(char *tag, struct zfcp_adapter *adapter,
+ struct zfcp_port *port,
+ struct scsi_device *sdev, u8 want, u8 need);
extern void zfcp_dbf_rec_run(char *, struct zfcp_erp_action *);
extern void zfcp_dbf_rec_run_lvl(int level, char *tag,
struct zfcp_erp_action *erp);
diff --git a/drivers/s390/scsi/zfcp_scsi.c b/drivers/s390/scsi/zfcp_scsi.c
index 4d2ba5682493..22f9562f415c 100644
--- a/drivers/s390/scsi/zfcp_scsi.c
+++ b/drivers/s390/scsi/zfcp_scsi.c
@@ -4,7 +4,7 @@
*
* Interface to Linux SCSI midlayer.
*
- * Copyright IBM Corp. 2002, 2017
+ * Copyright IBM Corp. 2002, 2018
*/
#define KMSG_COMPONENT "zfcp"
@@ -618,9 +618,9 @@ static void zfcp_scsi_rport_register(struct zfcp_port *port)
ids.port_id = port->d_id;
ids.roles = FC_RPORT_ROLE_FCP_TARGET;
- zfcp_dbf_rec_trig("scpaddy", port->adapter, port, NULL,
- ZFCP_PSEUDO_ERP_ACTION_RPORT_ADD,
- ZFCP_PSEUDO_ERP_ACTION_RPORT_ADD);
+ zfcp_dbf_rec_trig_lock("scpaddy", port->adapter, port, NULL,
+ ZFCP_PSEUDO_ERP_ACTION_RPORT_ADD,
+ ZFCP_PSEUDO_ERP_ACTION_RPORT_ADD);
rport = fc_remote_port_add(port->adapter->scsi_host, 0, &ids);
if (!rport) {
dev_err(&port->adapter->ccw_device->dev,
@@ -642,9 +642,9 @@ static void zfcp_scsi_rport_block(struct zfcp_port *port)
struct fc_rport *rport = port->rport;
if (rport) {
- zfcp_dbf_rec_trig("scpdely", port->adapter, port, NULL,
- ZFCP_PSEUDO_ERP_ACTION_RPORT_DEL,
- ZFCP_PSEUDO_ERP_ACTION_RPORT_DEL);
+ zfcp_dbf_rec_trig_lock("scpdely", port->adapter, port, NULL,
+ ZFCP_PSEUDO_ERP_ACTION_RPORT_DEL,
+ ZFCP_PSEUDO_ERP_ACTION_RPORT_DEL);
fc_remote_port_delete(rport);
port->rport = NULL;
}
diff --git a/drivers/sbus/char/Kconfig b/drivers/sbus/char/Kconfig
index bf3c5f735614..89edd13fd572 100644
--- a/drivers/sbus/char/Kconfig
+++ b/drivers/sbus/char/Kconfig
@@ -28,13 +28,6 @@ config TADPOLE_TS102_UCTRL
events, and can also notice the attachment/detachment of external
monitors and mice.
-config SUN_JSFLASH
- tristate "JavaStation OS Flash SIMM"
- depends on SPARC32
- help
- If you say Y here, you will be able to boot from your JavaStation's
- Flash memory.
-
config BBC_I2C
tristate "UltraSPARC-III bootbus i2c controller driver"
depends on PCI && SPARC64
diff --git a/drivers/sbus/char/Makefile b/drivers/sbus/char/Makefile
index 8c48ed96683f..44347c918f6b 100644
--- a/drivers/sbus/char/Makefile
+++ b/drivers/sbus/char/Makefile
@@ -15,6 +15,5 @@ obj-$(CONFIG_DISPLAY7SEG) += display7seg.o
obj-$(CONFIG_OBP_FLASH) += flash.o
obj-$(CONFIG_SUN_OPENPROMIO) += openprom.o
obj-$(CONFIG_TADPOLE_TS102_UCTRL) += uctrl.o
-obj-$(CONFIG_SUN_JSFLASH) += jsflash.o
obj-$(CONFIG_BBC_I2C) += bbc.o
obj-$(CONFIG_ORACLE_DAX) += oradax.o
diff --git a/drivers/sbus/char/jsflash.c b/drivers/sbus/char/jsflash.c
deleted file mode 100644
index 14f377ac1280..000000000000
--- a/drivers/sbus/char/jsflash.c
+++ /dev/null
@@ -1,658 +0,0 @@
-/*
- * drivers/sbus/char/jsflash.c
- *
- * Copyright (C) 1991, 1992 Linus Torvalds (drivers/char/mem.c)
- * Copyright (C) 1997 Eddie C. Dost (drivers/sbus/char/flash.c)
- * Copyright (C) 1997-2000 Pavel Machek <pavel@ucw.cz> (drivers/block/nbd.c)
- * Copyright (C) 1999-2000 Pete Zaitcev
- *
- * This driver is used to program OS into a Flash SIMM on
- * Krups and Espresso platforms.
- *
- * TODO: do not allow erase/programming if file systems are mounted.
- * TODO: Erase/program both banks of a 8MB SIMM.
- *
- * It is anticipated that programming an OS Flash will be a routine
- * procedure. In the same time it is exceedingly dangerous because
- * a user can program its OBP flash with OS image and effectively
- * kill the machine.
- *
- * This driver uses an interface different from Eddie's flash.c
- * as a silly safeguard.
- *
- * XXX The flash.c manipulates page caching characteristics in a certain
- * dubious way; also it assumes that remap_pfn_range() can remap
- * PCI bus locations, which may be false. ioremap() must be used
- * instead. We should discuss this.
- */
-
-#include <linux/module.h>
-#include <linux/mutex.h>
-#include <linux/types.h>
-#include <linux/errno.h>
-#include <linux/miscdevice.h>
-#include <linux/fcntl.h>
-#include <linux/poll.h>
-#include <linux/init.h>
-#include <linux/string.h>
-#include <linux/genhd.h>
-#include <linux/blkdev.h>
-#include <linux/uaccess.h>
-#include <asm/pgtable.h>
-#include <asm/io.h>
-#include <asm/pcic.h>
-#include <asm/oplib.h>
-
-#include <asm/jsflash.h> /* ioctl arguments. <linux/> ?? */
-#define JSFIDSZ (sizeof(struct jsflash_ident_arg))
-#define JSFPRGSZ (sizeof(struct jsflash_program_arg))
-
-/*
- * Our device numbers have no business in system headers.
- * The only thing a user knows is the device name /dev/jsflash.
- *
- * Block devices are laid out like this:
- * minor+0 - Bootstrap, for 8MB SIMM 0x20400000[0x800000]
- * minor+1 - Filesystem to mount, normally 0x20400400[0x7ffc00]
- * minor+2 - Whole flash area for any case... 0x20000000[0x01000000]
- * Total 3 minors per flash device.
- *
- * It is easier to have static size vectors, so we define
- * a total minor range JSF_MAX, which must cover all minors.
- */
-/* character device */
-#define JSF_MINOR 178 /* 178 is registered with hpa */
-/* block device */
-#define JSF_MAX 3 /* 3 minors wasted total so far. */
-#define JSF_NPART 3 /* 3 minors per flash device */
-#define JSF_PART_BITS 2 /* 2 bits of minors to cover JSF_NPART */
-#define JSF_PART_MASK 0x3 /* 2 bits mask */
-
-static DEFINE_MUTEX(jsf_mutex);
-
-/*
- * Access functions.
- * We could ioremap(), but it's easier this way.
- */
-static unsigned int jsf_inl(unsigned long addr)
-{
- unsigned long retval;
-
- __asm__ __volatile__("lda [%1] %2, %0\n\t" :
- "=r" (retval) :
- "r" (addr), "i" (ASI_M_BYPASS));
- return retval;
-}
-
-static void jsf_outl(unsigned long addr, __u32 data)
-{
-
- __asm__ __volatile__("sta %0, [%1] %2\n\t" : :
- "r" (data), "r" (addr), "i" (ASI_M_BYPASS) :
- "memory");
-}
-
-/*
- * soft carrier
- */
-
-struct jsfd_part {
- unsigned long dbase;
- unsigned long dsize;
-};
-
-struct jsflash {
- unsigned long base;
- unsigned long size;
- unsigned long busy; /* In use? */
- struct jsflash_ident_arg id;
- /* int mbase; */ /* Minor base, typically zero */
- struct jsfd_part dv[JSF_NPART];
-};
-
-/*
- * We do not map normal memory or obio as a safety precaution.
- * But offsets are real, for ease of userland programming.
- */
-#define JSF_BASE_TOP 0x30000000
-#define JSF_BASE_ALL 0x20000000
-
-#define JSF_BASE_JK 0x20400000
-
-/*
- */
-static struct gendisk *jsfd_disk[JSF_MAX];
-
-/*
- * Let's pretend we may have several of these...
- */
-static struct jsflash jsf0;
-
-/*
- * Wait for AMD to finish its embedded algorithm.
- * We use the Toggle bit DQ6 (0x40) because it does not
- * depend on the data value as /DATA bit DQ7 does.
- *
- * XXX Do we need any timeout here? So far it never hanged, beware broken hw.
- */
-static void jsf_wait(unsigned long p) {
- unsigned int x1, x2;
-
- for (;;) {
- x1 = jsf_inl(p);
- x2 = jsf_inl(p);
- if ((x1 & 0x40404040) == (x2 & 0x40404040)) return;
- }
-}
-
-/*
- * Programming will only work if Flash is clean,
- * we leave it to the programmer application.
- *
- * AMD must be programmed one byte at a time;
- * thus, Simple Tech SIMM must be written 4 bytes at a time.
- *
- * Write waits for the chip to become ready after the write
- * was finished. This is done so that application would read
- * consistent data after the write is done.
- */
-static void jsf_write4(unsigned long fa, u32 data) {
-
- jsf_outl(fa, 0xAAAAAAAA); /* Unlock 1 Write 1 */
- jsf_outl(fa, 0x55555555); /* Unlock 1 Write 2 */
- jsf_outl(fa, 0xA0A0A0A0); /* Byte Program */
- jsf_outl(fa, data);
-
- jsf_wait(fa);
-}
-
-/*
- */
-static void jsfd_read(char *buf, unsigned long p, size_t togo) {
- union byte4 {
- char s[4];
- unsigned int n;
- } b;
-
- while (togo >= 4) {
- togo -= 4;
- b.n = jsf_inl(p);
- memcpy(buf, b.s, 4);
- p += 4;
- buf += 4;
- }
-}
-
-static int jsfd_queue;
-
-static struct request *jsfd_next_request(void)
-{
- struct request_queue *q;
- struct request *rq;
- int old_pos = jsfd_queue;
-
- do {
- q = jsfd_disk[jsfd_queue]->queue;
- if (++jsfd_queue == JSF_MAX)
- jsfd_queue = 0;
- if (q) {
- rq = blk_fetch_request(q);
- if (rq)
- return rq;
- }
- } while (jsfd_queue != old_pos);
-
- return NULL;
-}
-
-static void jsfd_request(void)
-{
- struct request *req;
-
- req = jsfd_next_request();
- while (req) {
- struct jsfd_part *jdp = req->rq_disk->private_data;
- unsigned long offset = blk_rq_pos(req) << 9;
- size_t len = blk_rq_cur_bytes(req);
- blk_status_t err = BLK_STS_IOERR;
-
- if ((offset + len) > jdp->dsize)
- goto end;
-
- if (rq_data_dir(req) != READ) {
- printk(KERN_ERR "jsfd: write\n");
- goto end;
- }
-
- if ((jdp->dbase & 0xff000000) != 0x20000000) {
- printk(KERN_ERR "jsfd: bad base %x\n", (int)jdp->dbase);
- goto end;
- }
-
- jsfd_read(bio_data(req->bio), jdp->dbase + offset, len);
- err = BLK_STS_OK;
- end:
- if (!__blk_end_request_cur(req, err))
- req = jsfd_next_request();
- }
-}
-
-static void jsfd_do_request(struct request_queue *q)
-{
- jsfd_request();
-}
-
-/*
- * The memory devices use the full 32/64 bits of the offset, and so we cannot
- * check against negative addresses: they are ok. The return value is weird,
- * though, in that case (0).
- *
- * also note that seeking relative to the "end of file" isn't supported:
- * it has no meaning, so it returns -EINVAL.
- */
-static loff_t jsf_lseek(struct file * file, loff_t offset, int orig)
-{
- loff_t ret;
-
- mutex_lock(&jsf_mutex);
- switch (orig) {
- case 0:
- file->f_pos = offset;
- ret = file->f_pos;
- break;
- case 1:
- file->f_pos += offset;
- ret = file->f_pos;
- break;
- default:
- ret = -EINVAL;
- }
- mutex_unlock(&jsf_mutex);
- return ret;
-}
-
-/*
- * OS SIMM Cannot be read in other size but a 32bits word.
- */
-static ssize_t jsf_read(struct file * file, char __user * buf,
- size_t togo, loff_t *ppos)
-{
- unsigned long p = *ppos;
- char __user *tmp = buf;
-
- union byte4 {
- char s[4];
- unsigned int n;
- } b;
-
- if (p < JSF_BASE_ALL || p >= JSF_BASE_TOP) {
- return 0;
- }
-
- if ((p + togo) < p /* wrap */
- || (p + togo) >= JSF_BASE_TOP) {
- togo = JSF_BASE_TOP - p;
- }
-
- if (p < JSF_BASE_ALL && togo != 0) {
-#if 0 /* __bzero XXX */
- size_t x = JSF_BASE_ALL - p;
- if (x > togo) x = togo;
- clear_user(tmp, x);
- tmp += x;
- p += x;
- togo -= x;
-#else
- /*
- * Implementation of clear_user() calls __bzero
- * without regard to modversions,
- * so we cannot build a module.
- */
- return 0;
-#endif
- }
-
- while (togo >= 4) {
- togo -= 4;
- b.n = jsf_inl(p);
- if (copy_to_user(tmp, b.s, 4))
- return -EFAULT;
- tmp += 4;
- p += 4;
- }
-
- /*
- * XXX Small togo may remain if 1 byte is ordered.
- * It would be nice if we did a word size read and unpacked it.
- */
-
- *ppos = p;
- return tmp-buf;
-}
-
-static ssize_t jsf_write(struct file * file, const char __user * buf,
- size_t count, loff_t *ppos)
-{
- return -ENOSPC;
-}
-
-/*
- */
-static int jsf_ioctl_erase(unsigned long arg)
-{
- unsigned long p;
-
- /* p = jsf0.base; hits wrong bank */
- p = 0x20400000;
-
- jsf_outl(p, 0xAAAAAAAA); /* Unlock 1 Write 1 */
- jsf_outl(p, 0x55555555); /* Unlock 1 Write 2 */
- jsf_outl(p, 0x80808080); /* Erase setup */
- jsf_outl(p, 0xAAAAAAAA); /* Unlock 2 Write 1 */
- jsf_outl(p, 0x55555555); /* Unlock 2 Write 2 */
- jsf_outl(p, 0x10101010); /* Chip erase */
-
-#if 0
- /*
- * This code is ok, except that counter based timeout
- * has no place in this world. Let's just drop timeouts...
- */
- {
- int i;
- __u32 x;
- for (i = 0; i < 1000000; i++) {
- x = jsf_inl(p);
- if ((x & 0x80808080) == 0x80808080) break;
- }
- if ((x & 0x80808080) != 0x80808080) {
- printk("jsf0: erase timeout with 0x%08x\n", x);
- } else {
- printk("jsf0: erase done with 0x%08x\n", x);
- }
- }
-#else
- jsf_wait(p);
-#endif
-
- return 0;
-}
-
-/*
- * Program a block of flash.
- * Very simple because we can do it byte by byte anyway.
- */
-static int jsf_ioctl_program(void __user *arg)
-{
- struct jsflash_program_arg abuf;
- char __user *uptr;
- unsigned long p;
- unsigned int togo;
- union {
- unsigned int n;
- char s[4];
- } b;
-
- if (copy_from_user(&abuf, arg, JSFPRGSZ))
- return -EFAULT;
- p = abuf.off;
- togo = abuf.size;
- if ((togo & 3) || (p & 3)) return -EINVAL;
-
- uptr = (char __user *) (unsigned long) abuf.data;
- while (togo != 0) {
- togo -= 4;
- if (copy_from_user(&b.s[0], uptr, 4))
- return -EFAULT;
- jsf_write4(p, b.n);
- p += 4;
- uptr += 4;
- }
-
- return 0;
-}
-
-static long jsf_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
-{
- mutex_lock(&jsf_mutex);
- int error = -ENOTTY;
- void __user *argp = (void __user *)arg;
-
- if (!capable(CAP_SYS_ADMIN)) {
- mutex_unlock(&jsf_mutex);
- return -EPERM;
- }
- switch (cmd) {
- case JSFLASH_IDENT:
- if (copy_to_user(argp, &jsf0.id, JSFIDSZ)) {
- mutex_unlock(&jsf_mutex);
- return -EFAULT;
- }
- break;
- case JSFLASH_ERASE:
- error = jsf_ioctl_erase(arg);
- break;
- case JSFLASH_PROGRAM:
- error = jsf_ioctl_program(argp);
- break;
- }
-
- mutex_unlock(&jsf_mutex);
- return error;
-}
-
-static int jsf_mmap(struct file * file, struct vm_area_struct * vma)
-{
- return -ENXIO;
-}
-
-static int jsf_open(struct inode * inode, struct file * filp)
-{
- mutex_lock(&jsf_mutex);
- if (jsf0.base == 0) {
- mutex_unlock(&jsf_mutex);
- return -ENXIO;
- }
- if (test_and_set_bit(0, (void *)&jsf0.busy) != 0) {
- mutex_unlock(&jsf_mutex);
- return -EBUSY;
- }
-
- mutex_unlock(&jsf_mutex);
- return 0; /* XXX What security? */
-}
-
-static int jsf_release(struct inode *inode, struct file *file)
-{
- jsf0.busy = 0;
- return 0;
-}
-
-static const struct file_operations jsf_fops = {
- .owner = THIS_MODULE,
- .llseek = jsf_lseek,
- .read = jsf_read,
- .write = jsf_write,
- .unlocked_ioctl = jsf_ioctl,
- .mmap = jsf_mmap,
- .open = jsf_open,
- .release = jsf_release,
-};
-
-static struct miscdevice jsf_dev = { JSF_MINOR, "jsflash", &jsf_fops };
-
-static const struct block_device_operations jsfd_fops = {
- .owner = THIS_MODULE,
-};
-
-static int jsflash_init(void)
-{
- int rc;
- struct jsflash *jsf;
- phandle node;
- char banner[128];
- struct linux_prom_registers reg0;
-
- node = prom_getchild(prom_root_node);
- node = prom_searchsiblings(node, "flash-memory");
- if (node != 0 && (s32)node != -1) {
- if (prom_getproperty(node, "reg",
- (char *)&reg0, sizeof(reg0)) == -1) {
- printk("jsflash: no \"reg\" property\n");
- return -ENXIO;
- }
- if (reg0.which_io != 0) {
- printk("jsflash: bus number nonzero: 0x%x:%x\n",
- reg0.which_io, reg0.phys_addr);
- return -ENXIO;
- }
- /*
- * Flash may be somewhere else, for instance on Ebus.
- * So, don't do the following check for IIep flash space.
- */
-#if 0
- if ((reg0.phys_addr >> 24) != 0x20) {
- printk("jsflash: suspicious address: 0x%x:%x\n",
- reg0.which_io, reg0.phys_addr);
- return -ENXIO;
- }
-#endif
- if ((int)reg0.reg_size <= 0) {
- printk("jsflash: bad size 0x%x\n", (int)reg0.reg_size);
- return -ENXIO;
- }
- } else {
- /* XXX Remove this code once PROLL ID12 got widespread */
- printk("jsflash: no /flash-memory node, use PROLL >= 12\n");
- prom_getproperty(prom_root_node, "banner-name", banner, 128);
- if (strcmp (banner, "JavaStation-NC") != 0 &&
- strcmp (banner, "JavaStation-E") != 0) {
- return -ENXIO;
- }
- reg0.which_io = 0;
- reg0.phys_addr = 0x20400000;
- reg0.reg_size = 0x00800000;
- }
-
- /* Let us be really paranoid for modifications to probing code. */
- if (sparc_cpu_model != sun4m) {
- /* We must be on sun4m because we use MMU Bypass ASI. */
- return -ENXIO;
- }
-
- if (jsf0.base == 0) {
- jsf = &jsf0;
-
- jsf->base = reg0.phys_addr;
- jsf->size = reg0.reg_size;
-
- /* XXX Redo the userland interface. */
- jsf->id.off = JSF_BASE_ALL;
- jsf->id.size = 0x01000000; /* 16M - all segments */
- strcpy(jsf->id.name, "Krups_all");
-
- jsf->dv[0].dbase = jsf->base;
- jsf->dv[0].dsize = jsf->size;
- jsf->dv[1].dbase = jsf->base + 1024;
- jsf->dv[1].dsize = jsf->size - 1024;
- jsf->dv[2].dbase = JSF_BASE_ALL;
- jsf->dv[2].dsize = 0x01000000;
-
- printk("Espresso Flash @0x%lx [%d MB]\n", jsf->base,
- (int) (jsf->size / (1024*1024)));
- }
-
- if ((rc = misc_register(&jsf_dev)) != 0) {
- printk(KERN_ERR "jsf: unable to get misc minor %d\n",
- JSF_MINOR);
- jsf0.base = 0;
- return rc;
- }
-
- return 0;
-}
-
-static int jsfd_init(void)
-{
- static DEFINE_SPINLOCK(lock);
- struct jsflash *jsf;
- struct jsfd_part *jdp;
- int err;
- int i;
-
- if (jsf0.base == 0)
- return -ENXIO;
-
- err = -ENOMEM;
- for (i = 0; i < JSF_MAX; i++) {
- struct gendisk *disk = alloc_disk(1);
- if (!disk)
- goto out;
- disk->queue = blk_init_queue(jsfd_do_request, &lock);
- if (!disk->queue) {
- put_disk(disk);
- goto out;
- }
- blk_queue_bounce_limit(disk->queue, BLK_BOUNCE_HIGH);
- jsfd_disk[i] = disk;
- }
-
- if (register_blkdev(JSFD_MAJOR, "jsfd")) {
- err = -EIO;
- goto out;
- }
-
- for (i = 0; i < JSF_MAX; i++) {
- struct gendisk *disk = jsfd_disk[i];
- if ((i & JSF_PART_MASK) >= JSF_NPART) continue;
- jsf = &jsf0; /* actually, &jsfv[i >> JSF_PART_BITS] */
- jdp = &jsf->dv[i&JSF_PART_MASK];
-
- disk->major = JSFD_MAJOR;
- disk->first_minor = i;
- sprintf(disk->disk_name, "jsfd%d", i);
- disk->fops = &jsfd_fops;
- set_capacity(disk, jdp->dsize >> 9);
- disk->private_data = jdp;
- add_disk(disk);
- set_disk_ro(disk, 1);
- }
- return 0;
-out:
- while (i--)
- put_disk(jsfd_disk[i]);
- return err;
-}
-
-MODULE_LICENSE("GPL");
-
-static int __init jsflash_init_module(void) {
- int rc;
-
- if ((rc = jsflash_init()) == 0) {
- jsfd_init();
- return 0;
- }
- return rc;
-}
-
-static void __exit jsflash_cleanup_module(void)
-{
- int i;
-
- for (i = 0; i < JSF_MAX; i++) {
- if ((i & JSF_PART_MASK) >= JSF_NPART) continue;
- del_gendisk(jsfd_disk[i]);
- blk_cleanup_queue(jsfd_disk[i]->queue);
- put_disk(jsfd_disk[i]);
- }
- if (jsf0.busy)
- printk("jsf0: cleaning busy unit\n");
- jsf0.base = 0;
- jsf0.busy = 0;
-
- misc_deregister(&jsf_dev);
- unregister_blkdev(JSFD_MAJOR, "jsfd");
-}
-
-module_init(jsflash_init_module);
-module_exit(jsflash_cleanup_module);
diff --git a/drivers/sbus/char/oradax.c b/drivers/sbus/char/oradax.c
index c44d7c7ffc92..1754f55e2fac 100644
--- a/drivers/sbus/char/oradax.c
+++ b/drivers/sbus/char/oradax.c
@@ -3,7 +3,7 @@
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
+ * the Free Software Foundation, either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
diff --git a/drivers/scsi/Makefile b/drivers/scsi/Makefile
index e29f9b8fd66d..56c940394729 100644
--- a/drivers/scsi/Makefile
+++ b/drivers/scsi/Makefile
@@ -182,7 +182,7 @@ zalon7xx-objs := zalon.o ncr53c8xx.o
NCR_Q720_mod-objs := NCR_Q720.o ncr53c8xx.o
# Files generated that shall be removed upon make clean
-clean-files := 53c700_d.h 53c700_u.h
+clean-files := 53c700_d.h 53c700_u.h scsi_devinfo_tbl.c
$(obj)/53c700.o $(MODVERDIR)/$(obj)/53c700.ver: $(obj)/53c700_d.h
diff --git a/drivers/scsi/aacraid/commsup.c b/drivers/scsi/aacraid/commsup.c
index 0156c9623c35..d62ddd63f4fe 100644
--- a/drivers/scsi/aacraid/commsup.c
+++ b/drivers/scsi/aacraid/commsup.c
@@ -724,6 +724,8 @@ int aac_hba_send(u8 command, struct fib *fibptr, fib_callback callback,
int wait;
unsigned long flags = 0;
unsigned long mflags = 0;
+ struct aac_hba_cmd_req *hbacmd = (struct aac_hba_cmd_req *)
+ fibptr->hw_fib_va;
fibptr->flags = (FIB_CONTEXT_FLAG | FIB_CONTEXT_FLAG_NATIVE_HBA);
if (callback) {
@@ -734,11 +736,9 @@ int aac_hba_send(u8 command, struct fib *fibptr, fib_callback callback,
wait = 1;
- if (command == HBA_IU_TYPE_SCSI_CMD_REQ) {
- struct aac_hba_cmd_req *hbacmd =
- (struct aac_hba_cmd_req *)fibptr->hw_fib_va;
+ hbacmd->iu_type = command;
- hbacmd->iu_type = command;
+ if (command == HBA_IU_TYPE_SCSI_CMD_REQ) {
/* bit1 of request_id must be 0 */
hbacmd->request_id =
cpu_to_le32((((u32)(fibptr - dev->fibs)) << 2) + 1);
diff --git a/drivers/scsi/gdth.c b/drivers/scsi/gdth.c
index c35f05c4c6bb..85604795d8ee 100644
--- a/drivers/scsi/gdth.c
+++ b/drivers/scsi/gdth.c
@@ -3882,7 +3882,7 @@ static enum blk_eh_timer_return gdth_timed_out(struct scsi_cmnd *scp)
struct gdth_cmndinfo *cmndinfo = gdth_cmnd_priv(scp);
u8 b, t;
unsigned long flags;
- enum blk_eh_timer_return retval = BLK_EH_NOT_HANDLED;
+ enum blk_eh_timer_return retval = BLK_EH_DONE;
TRACE(("%s() cmd 0x%x\n", scp->cmnd[0], __func__));
b = scp->device->channel;
diff --git a/drivers/scsi/isci/port_config.c b/drivers/scsi/isci/port_config.c
index edb7be786c65..9e8de1462593 100644
--- a/drivers/scsi/isci/port_config.c
+++ b/drivers/scsi/isci/port_config.c
@@ -291,7 +291,7 @@ sci_mpc_agent_validate_phy_configuration(struct isci_host *ihost,
* Note: We have not moved the current phy_index so we will actually
* compare the startting phy with itself.
* This is expected and required to add the phy to the port. */
- while (phy_index < SCI_MAX_PHYS) {
+ for (; phy_index < SCI_MAX_PHYS; phy_index++) {
if ((phy_mask & (1 << phy_index)) == 0)
continue;
sci_phy_get_sas_address(&ihost->phys[phy_index],
@@ -311,7 +311,6 @@ sci_mpc_agent_validate_phy_configuration(struct isci_host *ihost,
&ihost->phys[phy_index]);
assigned_phy_mask |= (1 << phy_index);
- phy_index++;
}
}
diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c
index 15a2fef51e38..71bdc0b52cf9 100644
--- a/drivers/scsi/libiscsi.c
+++ b/drivers/scsi/libiscsi.c
@@ -1963,7 +1963,7 @@ static int iscsi_has_ping_timed_out(struct iscsi_conn *conn)
enum blk_eh_timer_return iscsi_eh_cmd_timed_out(struct scsi_cmnd *sc)
{
- enum blk_eh_timer_return rc = BLK_EH_NOT_HANDLED;
+ enum blk_eh_timer_return rc = BLK_EH_DONE;
struct iscsi_task *task = NULL, *running_task;
struct iscsi_cls_session *cls_session;
struct iscsi_session *session;
@@ -1982,7 +1982,7 @@ enum blk_eh_timer_return iscsi_eh_cmd_timed_out(struct scsi_cmnd *sc)
* Raced with completion. Blk layer has taken ownership
* so let timeout code complete it now.
*/
- rc = BLK_EH_HANDLED;
+ rc = BLK_EH_DONE;
goto done;
}
@@ -1997,7 +1997,7 @@ enum blk_eh_timer_return iscsi_eh_cmd_timed_out(struct scsi_cmnd *sc)
if (unlikely(system_state != SYSTEM_RUNNING)) {
sc->result = DID_NO_CONNECT << 16;
ISCSI_DBG_EH(session, "sc on shutdown, handled\n");
- rc = BLK_EH_HANDLED;
+ rc = BLK_EH_DONE;
goto done;
}
/*
diff --git a/drivers/scsi/megaraid.c b/drivers/scsi/megaraid.c
index 7195cff51d4c..91f5e2c68dbc 100644
--- a/drivers/scsi/megaraid.c
+++ b/drivers/scsi/megaraid.c
@@ -2731,53 +2731,6 @@ proc_show_rdrv_40(struct seq_file *m, void *v)
return proc_show_rdrv(m, m->private, 30, 39);
}
-
-/*
- * seq_file wrappers for procfile show routines.
- */
-static int mega_proc_open(struct inode *inode, struct file *file)
-{
- adapter_t *adapter = proc_get_parent_data(inode);
- int (*show)(struct seq_file *, void *) = PDE_DATA(inode);
-
- return single_open(file, show, adapter);
-}
-
-static const struct file_operations mega_proc_fops = {
- .open = mega_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
-/*
- * Table of proc files we need to create.
- */
-struct mega_proc_file {
- const char *name;
- unsigned short ptr_offset;
- int (*show) (struct seq_file *m, void *v);
-};
-
-static const struct mega_proc_file mega_proc_files[] = {
- { "config", offsetof(adapter_t, proc_read), proc_show_config },
- { "stat", offsetof(adapter_t, proc_stat), proc_show_stat },
- { "mailbox", offsetof(adapter_t, proc_mbox), proc_show_mbox },
-#if MEGA_HAVE_ENH_PROC
- { "rebuild-rate", offsetof(adapter_t, proc_rr), proc_show_rebuild_rate },
- { "battery-status", offsetof(adapter_t, proc_battery), proc_show_battery },
- { "diskdrives-ch0", offsetof(adapter_t, proc_pdrvstat[0]), proc_show_pdrv_ch0 },
- { "diskdrives-ch1", offsetof(adapter_t, proc_pdrvstat[1]), proc_show_pdrv_ch1 },
- { "diskdrives-ch2", offsetof(adapter_t, proc_pdrvstat[2]), proc_show_pdrv_ch2 },
- { "diskdrives-ch3", offsetof(adapter_t, proc_pdrvstat[3]), proc_show_pdrv_ch3 },
- { "raiddrives-0-9", offsetof(adapter_t, proc_rdrvstat[0]), proc_show_rdrv_10 },
- { "raiddrives-10-19", offsetof(adapter_t, proc_rdrvstat[1]), proc_show_rdrv_20 },
- { "raiddrives-20-29", offsetof(adapter_t, proc_rdrvstat[2]), proc_show_rdrv_30 },
- { "raiddrives-30-39", offsetof(adapter_t, proc_rdrvstat[3]), proc_show_rdrv_40 },
-#endif
- { NULL }
-};
-
/**
* mega_create_proc_entry()
* @index - index in soft state array
@@ -2788,31 +2741,45 @@ static const struct mega_proc_file mega_proc_files[] = {
static void
mega_create_proc_entry(int index, struct proc_dir_entry *parent)
{
- const struct mega_proc_file *f;
- adapter_t *adapter = hba_soft_state[index];
- struct proc_dir_entry *dir, *de, **ppde;
- u8 string[16];
+ adapter_t *adapter = hba_soft_state[index];
+ struct proc_dir_entry *dir;
+ u8 string[16];
sprintf(string, "hba%d", adapter->host->host_no);
-
- dir = adapter->controller_proc_dir_entry =
- proc_mkdir_data(string, 0, parent, adapter);
- if(!dir) {
+ dir = proc_mkdir_data(string, 0, parent, adapter);
+ if (!dir) {
dev_warn(&adapter->dev->dev, "proc_mkdir failed\n");
return;
}
- for (f = mega_proc_files; f->name; f++) {
- de = proc_create_data(f->name, S_IRUSR, dir, &mega_proc_fops,
- f->show);
- if (!de) {
- dev_warn(&adapter->dev->dev, "proc_create failed\n");
- return;
- }
-
- ppde = (void *)adapter + f->ptr_offset;
- *ppde = de;
- }
+ proc_create_single_data("config", S_IRUSR, dir,
+ proc_show_config, adapter);
+ proc_create_single_data("stat", S_IRUSR, dir,
+ proc_show_stat, adapter);
+ proc_create_single_data("mailbox", S_IRUSR, dir,
+ proc_show_mbox, adapter);
+#if MEGA_HAVE_ENH_PROC
+ proc_create_single_data("rebuild-rate", S_IRUSR, dir,
+ proc_show_rebuild_rate, adapter);
+ proc_create_single_data("battery-status", S_IRUSR, dir,
+ proc_show_battery, adapter);
+ proc_create_single_data("diskdrives-ch0", S_IRUSR, dir,
+ proc_show_pdrv_ch0, adapter);
+ proc_create_single_data("diskdrives-ch1", S_IRUSR, dir,
+ proc_show_pdrv_ch1, adapter);
+ proc_create_single_data("diskdrives-ch2", S_IRUSR, dir,
+ proc_show_pdrv_ch2, adapter);
+ proc_create_single_data("diskdrives-ch3", S_IRUSR, dir,
+ proc_show_pdrv_ch3, adapter);
+ proc_create_single_data("raiddrives-0-9", S_IRUSR, dir,
+ proc_show_rdrv_10, adapter);
+ proc_create_single_data("raiddrives-10-19", S_IRUSR, dir,
+ proc_show_rdrv_20, adapter);
+ proc_create_single_data("raiddrives-20-29", S_IRUSR, dir,
+ proc_show_rdrv_30, adapter);
+ proc_create_single_data("raiddrives-30-39", S_IRUSR, dir,
+ proc_show_rdrv_40, adapter);
+#endif
}
#else
@@ -4580,6 +4547,7 @@ megaraid_remove_one(struct pci_dev *pdev)
{
struct Scsi_Host *host = pci_get_drvdata(pdev);
adapter_t *adapter = (adapter_t *)host->hostdata;
+ char buf[12] = { 0 };
scsi_remove_host(host);
@@ -4594,44 +4562,8 @@ megaraid_remove_one(struct pci_dev *pdev)
mega_free_sgl(adapter);
-#ifdef CONFIG_PROC_FS
- if (adapter->controller_proc_dir_entry) {
- remove_proc_entry("stat", adapter->controller_proc_dir_entry);
- remove_proc_entry("config",
- adapter->controller_proc_dir_entry);
- remove_proc_entry("mailbox",
- adapter->controller_proc_dir_entry);
-#if MEGA_HAVE_ENH_PROC
- remove_proc_entry("rebuild-rate",
- adapter->controller_proc_dir_entry);
- remove_proc_entry("battery-status",
- adapter->controller_proc_dir_entry);
-
- remove_proc_entry("diskdrives-ch0",
- adapter->controller_proc_dir_entry);
- remove_proc_entry("diskdrives-ch1",
- adapter->controller_proc_dir_entry);
- remove_proc_entry("diskdrives-ch2",
- adapter->controller_proc_dir_entry);
- remove_proc_entry("diskdrives-ch3",
- adapter->controller_proc_dir_entry);
-
- remove_proc_entry("raiddrives-0-9",
- adapter->controller_proc_dir_entry);
- remove_proc_entry("raiddrives-10-19",
- adapter->controller_proc_dir_entry);
- remove_proc_entry("raiddrives-20-29",
- adapter->controller_proc_dir_entry);
- remove_proc_entry("raiddrives-30-39",
- adapter->controller_proc_dir_entry);
-#endif
- {
- char buf[12] = { 0 };
- sprintf(buf, "hba%d", adapter->host->host_no);
- remove_proc_entry(buf, mega_proc_dir_entry);
- }
- }
-#endif
+ sprintf(buf, "hba%d", adapter->host->host_no);
+ remove_proc_subtree(buf, mega_proc_dir_entry);
pci_free_consistent(adapter->dev, MEGA_BUFFER_SIZE,
adapter->mega_buffer, adapter->buf_dma_handle);
diff --git a/drivers/scsi/megaraid.h b/drivers/scsi/megaraid.h
index 21eba2fd465a..18e85d9267ff 100644
--- a/drivers/scsi/megaraid.h
+++ b/drivers/scsi/megaraid.h
@@ -814,18 +814,6 @@ typedef struct {
#ifdef CONFIG_PROC_FS
struct proc_dir_entry *controller_proc_dir_entry;
- struct proc_dir_entry *proc_read;
- struct proc_dir_entry *proc_stat;
- struct proc_dir_entry *proc_mbox;
-
-#if MEGA_HAVE_ENH_PROC
- struct proc_dir_entry *proc_rr;
- struct proc_dir_entry *proc_battery;
-#define MAX_PROC_CHANNELS 4
- struct proc_dir_entry *proc_pdrvstat[MAX_PROC_CHANNELS];
- struct proc_dir_entry *proc_rdrvstat[MAX_PROC_CHANNELS];
-#endif
-
#endif
int has_64bit_addr; /* are we using 64-bit addressing */
diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c
index b89c6e6c0589..ce656c466ca9 100644
--- a/drivers/scsi/megaraid/megaraid_sas_base.c
+++ b/drivers/scsi/megaraid/megaraid_sas_base.c
@@ -2772,7 +2772,7 @@ blk_eh_timer_return megasas_reset_timer(struct scsi_cmnd *scmd)
if (time_after(jiffies, scmd->jiffies_at_alloc +
(scmd_timeout * 2) * HZ)) {
- return BLK_EH_NOT_HANDLED;
+ return BLK_EH_DONE;
}
instance = (struct megasas_instance *)scmd->device->host->hostdata;
diff --git a/drivers/scsi/mvumi.c b/drivers/scsi/mvumi.c
index fe97401ad192..afd27165cd93 100644
--- a/drivers/scsi/mvumi.c
+++ b/drivers/scsi/mvumi.c
@@ -2155,7 +2155,7 @@ static enum blk_eh_timer_return mvumi_timed_out(struct scsi_cmnd *scmd)
mvumi_return_cmd(mhba, cmd);
spin_unlock_irqrestore(mhba->shost->host_lock, flags);
- return BLK_EH_NOT_HANDLED;
+ return BLK_EH_DONE;
}
static int
diff --git a/drivers/scsi/osd/osd_initiator.c b/drivers/scsi/osd/osd_initiator.c
index e18877177f1b..5a33e1ad9881 100644
--- a/drivers/scsi/osd/osd_initiator.c
+++ b/drivers/scsi/osd/osd_initiator.c
@@ -99,7 +99,7 @@ static int _osd_get_print_system_info(struct osd_dev *od,
int nelem = ARRAY_SIZE(get_attrs), a = 0;
int ret;
- or = osd_start_request(od, GFP_KERNEL);
+ or = osd_start_request(od);
if (!or)
return -ENOMEM;
@@ -409,16 +409,15 @@ static void _osd_request_free(struct osd_request *or)
kfree(or);
}
-struct osd_request *osd_start_request(struct osd_dev *dev, gfp_t gfp)
+struct osd_request *osd_start_request(struct osd_dev *dev)
{
struct osd_request *or;
- or = _osd_request_alloc(gfp);
+ or = _osd_request_alloc(GFP_KERNEL);
if (!or)
return NULL;
or->osd_dev = dev;
- or->alloc_flags = gfp;
or->timeout = dev->def_timeout;
or->retries = OSD_REQ_RETRIES;
@@ -546,7 +545,7 @@ static int _osd_realloc_seg(struct osd_request *or,
if (seg->alloc_size >= max_bytes)
return 0;
- buff = krealloc(seg->buff, max_bytes, or->alloc_flags);
+ buff = krealloc(seg->buff, max_bytes, GFP_KERNEL);
if (!buff) {
OSD_ERR("Failed to Realloc %d-bytes was-%d\n", max_bytes,
seg->alloc_size);
@@ -728,7 +727,7 @@ static int _osd_req_list_objects(struct osd_request *or,
_osd_req_encode_olist(or, list);
WARN_ON(or->in.bio);
- bio = bio_map_kern(q, list, len, or->alloc_flags);
+ bio = bio_map_kern(q, list, len, GFP_KERNEL);
if (IS_ERR(bio)) {
OSD_ERR("!!! Failed to allocate list_objects BIO\n");
return PTR_ERR(bio);
@@ -1190,14 +1189,14 @@ static int _req_append_segment(struct osd_request *or,
pad_buff = io->pad_buff;
ret = blk_rq_map_kern(io->req->q, io->req, pad_buff, padding,
- or->alloc_flags);
+ GFP_KERNEL);
if (ret)
return ret;
io->total_bytes += padding;
}
ret = blk_rq_map_kern(io->req->q, io->req, seg->buff, seg->total_bytes,
- or->alloc_flags);
+ GFP_KERNEL);
if (ret)
return ret;
@@ -1564,14 +1563,14 @@ static int _osd_req_finalize_data_integrity(struct osd_request *or,
* osd_finalize_request and helpers
*/
static struct request *_make_request(struct request_queue *q, bool has_write,
- struct _osd_io_info *oii, gfp_t flags)
+ struct _osd_io_info *oii)
{
struct request *req;
struct bio *bio = oii->bio;
int ret;
req = blk_get_request(q, has_write ? REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN,
- flags);
+ 0);
if (IS_ERR(req))
return req;
@@ -1589,13 +1588,12 @@ static struct request *_make_request(struct request_queue *q, bool has_write,
static int _init_blk_request(struct osd_request *or,
bool has_in, bool has_out)
{
- gfp_t flags = or->alloc_flags;
struct scsi_device *scsi_device = or->osd_dev->scsi_device;
struct request_queue *q = scsi_device->request_queue;
struct request *req;
int ret;
- req = _make_request(q, has_out, has_out ? &or->out : &or->in, flags);
+ req = _make_request(q, has_out, has_out ? &or->out : &or->in);
if (IS_ERR(req)) {
ret = PTR_ERR(req);
goto out;
@@ -1611,7 +1609,7 @@ static int _init_blk_request(struct osd_request *or,
or->out.req = req;
if (has_in) {
/* allocate bidi request */
- req = _make_request(q, false, &or->in, flags);
+ req = _make_request(q, false, &or->in);
if (IS_ERR(req)) {
OSD_DEBUG("blk_get_request for bidi failed\n");
ret = PTR_ERR(req);
diff --git a/drivers/scsi/osst.c b/drivers/scsi/osst.c
index 20ec1c01dbd5..2bbe797f8c3d 100644
--- a/drivers/scsi/osst.c
+++ b/drivers/scsi/osst.c
@@ -368,7 +368,7 @@ static int osst_execute(struct osst_request *SRpnt, const unsigned char *cmd,
int write = (data_direction == DMA_TO_DEVICE);
req = blk_get_request(SRpnt->stp->device->request_queue,
- write ? REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN, GFP_KERNEL);
+ write ? REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN, 0);
if (IS_ERR(req))
return DRIVER_ERROR << 24;
diff --git a/drivers/scsi/qla4xxx/ql4_os.c b/drivers/scsi/qla4xxx/ql4_os.c
index 94c14ce94da2..0e13349dce57 100644
--- a/drivers/scsi/qla4xxx/ql4_os.c
+++ b/drivers/scsi/qla4xxx/ql4_os.c
@@ -1848,7 +1848,7 @@ static enum blk_eh_timer_return qla4xxx_eh_cmd_timed_out(struct scsi_cmnd *sc)
struct iscsi_cls_session *session;
struct iscsi_session *sess;
unsigned long flags;
- enum blk_eh_timer_return ret = BLK_EH_NOT_HANDLED;
+ enum blk_eh_timer_return ret = BLK_EH_DONE;
session = starget_to_session(scsi_target(sc->device));
sess = session->dd_data;
diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
index 946039117bf4..9c02ba2e7ef3 100644
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -282,7 +282,7 @@ void scsi_eh_scmd_add(struct scsi_cmnd *scmd)
enum blk_eh_timer_return scsi_times_out(struct request *req)
{
struct scsi_cmnd *scmd = blk_mq_rq_to_pdu(req);
- enum blk_eh_timer_return rtn = BLK_EH_NOT_HANDLED;
+ enum blk_eh_timer_return rtn = BLK_EH_DONE;
struct Scsi_Host *host = scmd->device->host;
trace_scsi_dispatch_cmd_timeout(scmd);
@@ -294,7 +294,7 @@ enum blk_eh_timer_return scsi_times_out(struct request *req)
if (host->hostt->eh_timed_out)
rtn = host->hostt->eh_timed_out(scmd);
- if (rtn == BLK_EH_NOT_HANDLED) {
+ if (rtn == BLK_EH_DONE) {
if (scsi_abort_command(scmd) != SUCCESS) {
set_host_byte(scmd, DID_TIME_OUT);
scsi_eh_scmd_add(scmd);
@@ -1933,11 +1933,7 @@ static void scsi_eh_lock_door(struct scsi_device *sdev)
struct request *req;
struct scsi_request *rq;
- /*
- * blk_get_request with GFP_KERNEL (__GFP_RECLAIM) sleeps until a
- * request becomes available
- */
- req = blk_get_request(sdev->request_queue, REQ_OP_SCSI_IN, GFP_KERNEL);
+ req = blk_get_request(sdev->request_queue, REQ_OP_SCSI_IN, 0);
if (IS_ERR(req))
return;
rq = scsi_req(req);
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index e9b4f279d29c..fb38aeff9dbd 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -265,7 +265,7 @@ int scsi_execute(struct scsi_device *sdev, const unsigned char *cmd,
struct scsi_request *rq;
int ret = DRIVER_ERROR << 24;
- req = blk_get_request_flags(sdev->request_queue,
+ req = blk_get_request(sdev->request_queue,
data_direction == DMA_TO_DEVICE ?
REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN, BLK_MQ_REQ_PREEMPT);
if (IS_ERR(req))
@@ -273,7 +273,7 @@ int scsi_execute(struct scsi_device *sdev, const unsigned char *cmd,
rq = scsi_req(req);
if (bufflen && blk_rq_map_kern(sdev->request_queue, req,
- buffer, bufflen, __GFP_RECLAIM))
+ buffer, bufflen, GFP_NOIO))
goto out;
rq->cmd_len = COMMAND_SIZE(cmd[0]);
@@ -2149,27 +2149,6 @@ static int scsi_map_queues(struct blk_mq_tag_set *set)
return blk_mq_map_queues(set);
}
-static u64 scsi_calculate_bounce_limit(struct Scsi_Host *shost)
-{
- struct device *host_dev;
- u64 bounce_limit = 0xffffffff;
-
- if (shost->unchecked_isa_dma)
- return BLK_BOUNCE_ISA;
- /*
- * Platforms with virtual-DMA translation
- * hardware have no practical limit.
- */
- if (!PCI_DMA_BUS_IS_PHYS)
- return BLK_BOUNCE_ANY;
-
- host_dev = scsi_get_device(shost);
- if (host_dev && host_dev->dma_mask)
- bounce_limit = (u64)dma_max_pfn(host_dev) << PAGE_SHIFT;
-
- return bounce_limit;
-}
-
void __scsi_init_queue(struct Scsi_Host *shost, struct request_queue *q)
{
struct device *dev = shost->dma_dev;
@@ -2189,7 +2168,8 @@ void __scsi_init_queue(struct Scsi_Host *shost, struct request_queue *q)
}
blk_queue_max_hw_sectors(q, shost->max_sectors);
- blk_queue_bounce_limit(q, scsi_calculate_bounce_limit(shost));
+ if (shost->unchecked_isa_dma)
+ blk_queue_bounce_limit(q, BLK_BOUNCE_ISA);
blk_queue_segment_boundary(q, shost->dma_boundary);
dma_set_seg_boundary(dev, shost->dma_boundary);
diff --git a/drivers/scsi/scsi_transport_fc.c b/drivers/scsi/scsi_transport_fc.c
index be3be0f9cb2d..1da3d71e9f61 100644
--- a/drivers/scsi/scsi_transport_fc.c
+++ b/drivers/scsi/scsi_transport_fc.c
@@ -2087,7 +2087,7 @@ fc_eh_timed_out(struct scsi_cmnd *scmd)
if (rport->port_state == FC_PORTSTATE_BLOCKED)
return BLK_EH_RESET_TIMER;
- return BLK_EH_NOT_HANDLED;
+ return BLK_EH_DONE;
}
EXPORT_SYMBOL(fc_eh_timed_out);
@@ -3591,10 +3591,9 @@ fc_bsg_job_timeout(struct request *req)
}
/* the blk_end_sync_io() doesn't check the error */
- if (!inflight)
- return BLK_EH_NOT_HANDLED;
- else
- return BLK_EH_HANDLED;
+ if (inflight)
+ blk_mq_complete_request(req);
+ return BLK_EH_DONE;
}
/**
@@ -3781,8 +3780,7 @@ fc_bsg_hostadd(struct Scsi_Host *shost, struct fc_host_attrs *fc_host)
snprintf(bsg_name, sizeof(bsg_name),
"fc_host%d", shost->host_no);
- q = bsg_setup_queue(dev, bsg_name, fc_bsg_dispatch, i->f->dd_bsg_size,
- NULL);
+ q = bsg_setup_queue(dev, bsg_name, fc_bsg_dispatch, i->f->dd_bsg_size);
if (IS_ERR(q)) {
dev_err(dev,
"fc_host%d: bsg interface failed to initialize - setup queue\n",
@@ -3827,8 +3825,8 @@ fc_bsg_rportadd(struct Scsi_Host *shost, struct fc_rport *rport)
if (!i->f->bsg_request)
return -ENOTSUPP;
- q = bsg_setup_queue(dev, NULL, fc_bsg_dispatch, i->f->dd_bsg_size,
- NULL);
+ q = bsg_setup_queue(dev, dev_name(dev), fc_bsg_dispatch,
+ i->f->dd_bsg_size);
if (IS_ERR(q)) {
dev_err(dev, "failed to setup bsg queue\n");
return PTR_ERR(q);
diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c
index 65f6c94f2e9b..6fd2fe210fc3 100644
--- a/drivers/scsi/scsi_transport_iscsi.c
+++ b/drivers/scsi/scsi_transport_iscsi.c
@@ -1542,7 +1542,7 @@ iscsi_bsg_host_add(struct Scsi_Host *shost, struct iscsi_cls_host *ihost)
return -ENOTSUPP;
snprintf(bsg_name, sizeof(bsg_name), "iscsi_host%d", shost->host_no);
- q = bsg_setup_queue(dev, bsg_name, iscsi_bsg_host_dispatch, 0, NULL);
+ q = bsg_setup_queue(dev, bsg_name, iscsi_bsg_host_dispatch, 0);
if (IS_ERR(q)) {
shost_printk(KERN_ERR, shost, "bsg interface failed to "
"initialize - no request queue\n");
diff --git a/drivers/scsi/scsi_transport_sas.c b/drivers/scsi/scsi_transport_sas.c
index 08acbabfae07..e2953b416746 100644
--- a/drivers/scsi/scsi_transport_sas.c
+++ b/drivers/scsi/scsi_transport_sas.c
@@ -187,16 +187,6 @@ static int sas_smp_dispatch(struct bsg_job *job)
return 0;
}
-static void sas_host_release(struct device *dev)
-{
- struct Scsi_Host *shost = dev_to_shost(dev);
- struct sas_host_attrs *sas_host = to_sas_host_attrs(shost);
- struct request_queue *q = sas_host->q;
-
- if (q)
- blk_cleanup_queue(q);
-}
-
static int sas_bsg_initialize(struct Scsi_Host *shost, struct sas_rphy *rphy)
{
struct request_queue *q;
@@ -208,7 +198,7 @@ static int sas_bsg_initialize(struct Scsi_Host *shost, struct sas_rphy *rphy)
if (rphy) {
q = bsg_setup_queue(&rphy->dev, dev_name(&rphy->dev),
- sas_smp_dispatch, 0, NULL);
+ sas_smp_dispatch, 0);
if (IS_ERR(q))
return PTR_ERR(q);
rphy->q = q;
@@ -217,7 +207,7 @@ static int sas_bsg_initialize(struct Scsi_Host *shost, struct sas_rphy *rphy)
snprintf(name, sizeof(name), "sas_host%d", shost->host_no);
q = bsg_setup_queue(&shost->shost_gendev, name,
- sas_smp_dispatch, 0, sas_host_release);
+ sas_smp_dispatch, 0);
if (IS_ERR(q))
return PTR_ERR(q);
to_sas_host_attrs(shost)->q = q;
@@ -260,8 +250,11 @@ static int sas_host_remove(struct transport_container *tc, struct device *dev,
struct Scsi_Host *shost = dev_to_shost(dev);
struct request_queue *q = to_sas_host_attrs(shost)->q;
- if (q)
+ if (q) {
bsg_unregister_queue(q);
+ blk_cleanup_queue(q);
+ }
+
return 0;
}
diff --git a/drivers/scsi/scsi_transport_srp.c b/drivers/scsi/scsi_transport_srp.c
index 36f6190931bc..4e46fdb2d7c9 100644
--- a/drivers/scsi/scsi_transport_srp.c
+++ b/drivers/scsi/scsi_transport_srp.c
@@ -51,6 +51,8 @@ struct srp_internal {
struct transport_container rport_attr_cont;
};
+static int scsi_is_srp_rport(const struct device *dev);
+
#define to_srp_internal(tmpl) container_of(tmpl, struct srp_internal, t)
#define dev_to_rport(d) container_of(d, struct srp_rport, dev)
@@ -60,9 +62,24 @@ static inline struct Scsi_Host *rport_to_shost(struct srp_rport *r)
return dev_to_shost(r->dev.parent);
}
+static int find_child_rport(struct device *dev, void *data)
+{
+ struct device **child = data;
+
+ if (scsi_is_srp_rport(dev)) {
+ WARN_ON_ONCE(*child);
+ *child = dev;
+ }
+ return 0;
+}
+
static inline struct srp_rport *shost_to_rport(struct Scsi_Host *shost)
{
- return transport_class_to_srp_rport(&shost->shost_gendev);
+ struct device *child = NULL;
+
+ WARN_ON_ONCE(device_for_each_child(&shost->shost_gendev, &child,
+ find_child_rport) < 0);
+ return child ? dev_to_rport(child) : NULL;
}
/**
@@ -587,7 +604,7 @@ EXPORT_SYMBOL(srp_reconnect_rport);
*
* If a timeout occurs while an rport is in the blocked state, ask the SCSI
* EH to continue waiting (BLK_EH_RESET_TIMER). Otherwise let the SCSI core
- * handle the timeout (BLK_EH_NOT_HANDLED).
+ * handle the timeout (BLK_EH_DONE).
*
* Note: This function is called from soft-IRQ context and with the request
* queue lock held.
@@ -600,9 +617,10 @@ enum blk_eh_timer_return srp_timed_out(struct scsi_cmnd *scmd)
struct srp_rport *rport = shost_to_rport(shost);
pr_debug("timeout for sdev %s\n", dev_name(&sdev->sdev_gendev));
- return rport->fast_io_fail_tmo < 0 && rport->dev_loss_tmo < 0 &&
+ return rport && rport->fast_io_fail_tmo < 0 &&
+ rport->dev_loss_tmo < 0 &&
i->f->reset_timer_if_blocked && scsi_device_blocked(sdev) ?
- BLK_EH_RESET_TIMER : BLK_EH_NOT_HANDLED;
+ BLK_EH_RESET_TIMER : BLK_EH_DONE;
}
EXPORT_SYMBOL(srp_timed_out);
diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
index c198b96368dd..6fc58e2c99d3 100644
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -66,7 +66,6 @@ static int sg_version_num = 30536; /* 2 digits for each component */
static char *sg_version_date = "20140603";
static int sg_proc_init(void);
-static void sg_proc_cleanup(void);
#endif
#define SG_ALLOW_DIO_DEF 0
@@ -1661,7 +1660,7 @@ static void __exit
exit_sg(void)
{
#ifdef CONFIG_SCSI_PROC_FS
- sg_proc_cleanup();
+ remove_proc_subtree("scsi/sg", NULL);
#endif /* CONFIG_SCSI_PROC_FS */
scsi_unregister_interface(&sg_interface);
class_destroy(sg_sysfs_class);
@@ -1715,7 +1714,7 @@ sg_start_req(Sg_request *srp, unsigned char *cmd)
* does not sleep except under memory pressure.
*/
rq = blk_get_request(q, hp->dxfer_direction == SG_DXFER_TO_DEV ?
- REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN, GFP_KERNEL);
+ REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN, 0);
if (IS_ERR(rq)) {
kfree(long_cmdp);
return PTR_ERR(rq);
@@ -1894,7 +1893,7 @@ retry:
num = (rem_sz > scatter_elem_sz_prev) ?
scatter_elem_sz_prev : rem_sz;
- schp->pages[k] = alloc_pages(gfp_mask, order);
+ schp->pages[k] = alloc_pages(gfp_mask | __GFP_ZERO, order);
if (!schp->pages[k])
goto out;
@@ -2274,11 +2273,6 @@ sg_get_dev(int dev)
}
#ifdef CONFIG_SCSI_PROC_FS
-
-static struct proc_dir_entry *sg_proc_sgp = NULL;
-
-static char sg_proc_sg_dirname[] = "scsi/sg";
-
static int sg_proc_seq_show_int(struct seq_file *s, void *v);
static int sg_proc_single_open_adio(struct inode *inode, struct file *file);
@@ -2306,37 +2300,11 @@ static const struct file_operations dressz_fops = {
};
static int sg_proc_seq_show_version(struct seq_file *s, void *v);
-static int sg_proc_single_open_version(struct inode *inode, struct file *file);
-static const struct file_operations version_fops = {
- .owner = THIS_MODULE,
- .open = sg_proc_single_open_version,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
static int sg_proc_seq_show_devhdr(struct seq_file *s, void *v);
-static int sg_proc_single_open_devhdr(struct inode *inode, struct file *file);
-static const struct file_operations devhdr_fops = {
- .owner = THIS_MODULE,
- .open = sg_proc_single_open_devhdr,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
static int sg_proc_seq_show_dev(struct seq_file *s, void *v);
-static int sg_proc_open_dev(struct inode *inode, struct file *file);
static void * dev_seq_start(struct seq_file *s, loff_t *pos);
static void * dev_seq_next(struct seq_file *s, void *v, loff_t *pos);
static void dev_seq_stop(struct seq_file *s, void *v);
-static const struct file_operations dev_fops = {
- .owner = THIS_MODULE,
- .open = sg_proc_open_dev,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
static const struct seq_operations dev_seq_ops = {
.start = dev_seq_start,
.next = dev_seq_next,
@@ -2345,14 +2313,6 @@ static const struct seq_operations dev_seq_ops = {
};
static int sg_proc_seq_show_devstrs(struct seq_file *s, void *v);
-static int sg_proc_open_devstrs(struct inode *inode, struct file *file);
-static const struct file_operations devstrs_fops = {
- .owner = THIS_MODULE,
- .open = sg_proc_open_devstrs,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
static const struct seq_operations devstrs_seq_ops = {
.start = dev_seq_start,
.next = dev_seq_next,
@@ -2361,14 +2321,6 @@ static const struct seq_operations devstrs_seq_ops = {
};
static int sg_proc_seq_show_debug(struct seq_file *s, void *v);
-static int sg_proc_open_debug(struct inode *inode, struct file *file);
-static const struct file_operations debug_fops = {
- .owner = THIS_MODULE,
- .open = sg_proc_open_debug,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
static const struct seq_operations debug_seq_ops = {
.start = dev_seq_start,
.next = dev_seq_next,
@@ -2376,50 +2328,23 @@ static const struct seq_operations debug_seq_ops = {
.show = sg_proc_seq_show_debug,
};
-
-struct sg_proc_leaf {
- const char * name;
- const struct file_operations * fops;
-};
-
-static const struct sg_proc_leaf sg_proc_leaf_arr[] = {
- {"allow_dio", &adio_fops},
- {"debug", &debug_fops},
- {"def_reserved_size", &dressz_fops},
- {"device_hdr", &devhdr_fops},
- {"devices", &dev_fops},
- {"device_strs", &devstrs_fops},
- {"version", &version_fops}
-};
-
static int
sg_proc_init(void)
{
- int num_leaves = ARRAY_SIZE(sg_proc_leaf_arr);
- int k;
+ struct proc_dir_entry *p;
- sg_proc_sgp = proc_mkdir(sg_proc_sg_dirname, NULL);
- if (!sg_proc_sgp)
+ p = proc_mkdir("scsi/sg", NULL);
+ if (!p)
return 1;
- for (k = 0; k < num_leaves; ++k) {
- const struct sg_proc_leaf *leaf = &sg_proc_leaf_arr[k];
- umode_t mask = leaf->fops->write ? S_IRUGO | S_IWUSR : S_IRUGO;
- proc_create(leaf->name, mask, sg_proc_sgp, leaf->fops);
- }
- return 0;
-}
-static void
-sg_proc_cleanup(void)
-{
- int k;
- int num_leaves = ARRAY_SIZE(sg_proc_leaf_arr);
-
- if (!sg_proc_sgp)
- return;
- for (k = 0; k < num_leaves; ++k)
- remove_proc_entry(sg_proc_leaf_arr[k].name, sg_proc_sgp);
- remove_proc_entry(sg_proc_sg_dirname, NULL);
+ proc_create("allow_dio", S_IRUGO | S_IWUSR, p, &adio_fops);
+ proc_create_seq("debug", S_IRUGO, p, &debug_seq_ops);
+ proc_create("def_reserved_size", S_IRUGO | S_IWUSR, p, &dressz_fops);
+ proc_create_single("device_hdr", S_IRUGO, p, sg_proc_seq_show_devhdr);
+ proc_create_seq("devices", S_IRUGO, p, &dev_seq_ops);
+ proc_create_seq("device_strs", S_IRUGO, p, &devstrs_seq_ops);
+ proc_create_single("version", S_IRUGO, p, sg_proc_seq_show_version);
+ return 0;
}
@@ -2482,22 +2407,12 @@ static int sg_proc_seq_show_version(struct seq_file *s, void *v)
return 0;
}
-static int sg_proc_single_open_version(struct inode *inode, struct file *file)
-{
- return single_open(file, sg_proc_seq_show_version, NULL);
-}
-
static int sg_proc_seq_show_devhdr(struct seq_file *s, void *v)
{
seq_puts(s, "host\tchan\tid\tlun\ttype\topens\tqdepth\tbusy\tonline\n");
return 0;
}
-static int sg_proc_single_open_devhdr(struct inode *inode, struct file *file)
-{
- return single_open(file, sg_proc_seq_show_devhdr, NULL);
-}
-
struct sg_proc_deviter {
loff_t index;
size_t max;
@@ -2531,11 +2446,6 @@ static void dev_seq_stop(struct seq_file *s, void *v)
kfree(s->private);
}
-static int sg_proc_open_dev(struct inode *inode, struct file *file)
-{
- return seq_open(file, &dev_seq_ops);
-}
-
static int sg_proc_seq_show_dev(struct seq_file *s, void *v)
{
struct sg_proc_deviter * it = (struct sg_proc_deviter *) v;
@@ -2562,11 +2472,6 @@ static int sg_proc_seq_show_dev(struct seq_file *s, void *v)
return 0;
}
-static int sg_proc_open_devstrs(struct inode *inode, struct file *file)
-{
- return seq_open(file, &devstrs_seq_ops);
-}
-
static int sg_proc_seq_show_devstrs(struct seq_file *s, void *v)
{
struct sg_proc_deviter * it = (struct sg_proc_deviter *) v;
@@ -2650,11 +2555,6 @@ static void sg_proc_debug_helper(struct seq_file *s, Sg_device * sdp)
}
}
-static int sg_proc_open_debug(struct inode *inode, struct file *file)
-{
- return seq_open(file, &debug_seq_ops);
-}
-
static int sg_proc_seq_show_debug(struct seq_file *s, void *v)
{
struct sg_proc_deviter * it = (struct sg_proc_deviter *) v;
diff --git a/drivers/scsi/sr_ioctl.c b/drivers/scsi/sr_ioctl.c
index 2a21f2d48592..35fab1e18adc 100644
--- a/drivers/scsi/sr_ioctl.c
+++ b/drivers/scsi/sr_ioctl.c
@@ -188,9 +188,13 @@ int sr_do_ioctl(Scsi_CD *cd, struct packet_command *cgc)
struct scsi_device *SDev;
struct scsi_sense_hdr sshdr;
int result, err = 0, retries = 0;
+ unsigned char sense_buffer[SCSI_SENSE_BUFFERSIZE], *senseptr = NULL;
SDev = cd->device;
+ if (cgc->sense)
+ senseptr = sense_buffer;
+
retry:
if (!scsi_block_when_processing_errors(SDev)) {
err = -ENODEV;
@@ -198,10 +202,12 @@ int sr_do_ioctl(Scsi_CD *cd, struct packet_command *cgc)
}
result = scsi_execute(SDev, cgc->cmd, cgc->data_direction,
- cgc->buffer, cgc->buflen,
- (unsigned char *)cgc->sense, &sshdr,
+ cgc->buffer, cgc->buflen, senseptr, &sshdr,
cgc->timeout, IOCTL_RETRIES, 0, 0, NULL);
+ if (cgc->sense)
+ memcpy(cgc->sense, sense_buffer, sizeof(*cgc->sense));
+
/* Minimal error checking. Ignore cases we know about, and report the rest. */
if (driver_byte(result) != 0) {
switch (sshdr.sense_key) {
diff --git a/drivers/scsi/st.c b/drivers/scsi/st.c
index 6c399480783d..a427ce9497be 100644
--- a/drivers/scsi/st.c
+++ b/drivers/scsi/st.c
@@ -545,7 +545,7 @@ static int st_scsi_execute(struct st_request *SRpnt, const unsigned char *cmd,
req = blk_get_request(SRpnt->stp->device->request_queue,
data_direction == DMA_TO_DEVICE ?
- REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN, GFP_KERNEL);
+ REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN, 0);
if (IS_ERR(req))
return DRIVER_ERROR << 24;
rq = scsi_req(req);
diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c
index 8c51d628b52e..a2ec0bc9e9fa 100644
--- a/drivers/scsi/storvsc_drv.c
+++ b/drivers/scsi/storvsc_drv.c
@@ -1722,11 +1722,14 @@ static int storvsc_probe(struct hv_device *device,
max_targets = STORVSC_MAX_TARGETS;
max_channels = STORVSC_MAX_CHANNELS;
/*
- * On Windows8 and above, we support sub-channels for storage.
+ * On Windows8 and above, we support sub-channels for storage
+ * on SCSI and FC controllers.
* The number of sub-channels offerred is based on the number of
* VCPUs in the guest.
*/
- max_sub_channels = (num_cpus / storvsc_vcpus_per_sub_channel);
+ if (!dev_is_ide)
+ max_sub_channels =
+ (num_cpus - 1) / storvsc_vcpus_per_sub_channel;
}
scsi_driver.can_queue = (max_outstanding_req_per_channel *
diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index 00e79057f870..d0a1674915a1 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -6497,12 +6497,12 @@ static enum blk_eh_timer_return ufshcd_eh_timed_out(struct scsi_cmnd *scmd)
bool found = false;
if (!scmd || !scmd->device || !scmd->device->host)
- return BLK_EH_NOT_HANDLED;
+ return BLK_EH_DONE;
host = scmd->device->host;
hba = shost_priv(host);
if (!hba)
- return BLK_EH_NOT_HANDLED;
+ return BLK_EH_DONE;
spin_lock_irqsave(host->host_lock, flags);
@@ -6520,7 +6520,7 @@ static enum blk_eh_timer_return ufshcd_eh_timed_out(struct scsi_cmnd *scmd)
* SCSI command was not actually dispatched to UFS driver, otherwise
* let SCSI layer handle the error as usual.
*/
- return found ? BLK_EH_NOT_HANDLED : BLK_EH_RESET_TIMER;
+ return found ? BLK_EH_DONE : BLK_EH_RESET_TIMER;
}
static const struct attribute_group *ufshcd_driver_groups[] = {
diff --git a/drivers/scsi/vmw_pvscsi.c b/drivers/scsi/vmw_pvscsi.c
index c374e3b5c678..777e5f1e52d1 100644
--- a/drivers/scsi/vmw_pvscsi.c
+++ b/drivers/scsi/vmw_pvscsi.c
@@ -609,7 +609,7 @@ static void pvscsi_complete_request(struct pvscsi_adapter *adapter,
break;
case BTSTAT_ABORTQUEUE:
- cmd->result = (DID_ABORT << 16);
+ cmd->result = (DID_BUS_BUSY << 16);
break;
case BTSTAT_SCSIPARITY:
diff --git a/drivers/soc/lantiq/gphy.c b/drivers/soc/lantiq/gphy.c
index 8d8659463b3e..feeb17cebc25 100644
--- a/drivers/soc/lantiq/gphy.c
+++ b/drivers/soc/lantiq/gphy.c
@@ -30,7 +30,6 @@ struct xway_gphy_priv {
struct clk *gphy_clk_gate;
struct reset_control *gphy_reset;
struct reset_control *gphy_reset2;
- struct notifier_block gphy_reboot_nb;
void __iomem *membase;
char *fw_name;
};
@@ -64,24 +63,6 @@ static const struct of_device_id xway_gphy_match[] = {
};
MODULE_DEVICE_TABLE(of, xway_gphy_match);
-static struct xway_gphy_priv *to_xway_gphy_priv(struct notifier_block *nb)
-{
- return container_of(nb, struct xway_gphy_priv, gphy_reboot_nb);
-}
-
-static int xway_gphy_reboot_notify(struct notifier_block *reboot_nb,
- unsigned long code, void *unused)
-{
- struct xway_gphy_priv *priv = to_xway_gphy_priv(reboot_nb);
-
- if (priv) {
- reset_control_assert(priv->gphy_reset);
- reset_control_assert(priv->gphy_reset2);
- }
-
- return NOTIFY_DONE;
-}
-
static int xway_gphy_load(struct device *dev, struct xway_gphy_priv *priv,
dma_addr_t *dev_addr)
{
@@ -205,14 +186,6 @@ static int xway_gphy_probe(struct platform_device *pdev)
reset_control_deassert(priv->gphy_reset);
reset_control_deassert(priv->gphy_reset2);
- /* assert the gphy reset because it can hang after a reboot: */
- priv->gphy_reboot_nb.notifier_call = xway_gphy_reboot_notify;
- priv->gphy_reboot_nb.priority = -1;
-
- ret = register_reboot_notifier(&priv->gphy_reboot_nb);
- if (ret)
- dev_warn(dev, "Failed to register reboot notifier\n");
-
platform_set_drvdata(pdev, priv);
return ret;
@@ -220,21 +193,12 @@ static int xway_gphy_probe(struct platform_device *pdev)
static int xway_gphy_remove(struct platform_device *pdev)
{
- struct device *dev = &pdev->dev;
struct xway_gphy_priv *priv = platform_get_drvdata(pdev);
- int ret;
-
- reset_control_assert(priv->gphy_reset);
- reset_control_assert(priv->gphy_reset2);
iowrite32be(0, priv->membase);
clk_disable_unprepare(priv->gphy_clk_gate);
- ret = unregister_reboot_notifier(&priv->gphy_reboot_nb);
- if (ret)
- dev_warn(dev, "Failed to unregister reboot notifier\n");
-
return 0;
}
diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig
index 2d4146ce2f1b..ad5d68e1dab7 100644
--- a/drivers/spi/Kconfig
+++ b/drivers/spi/Kconfig
@@ -47,6 +47,13 @@ config SPI_MASTER
if SPI_MASTER
+config SPI_MEM
+ bool "SPI memory extension"
+ help
+ Enable this option if you want to enable the SPI memory extension.
+ This extension is meant to simplify interaction with SPI memories
+ by providing a high-level interface to send memory-like commands.
+
comment "SPI Master Controller Drivers"
config SPI_ALTERA
@@ -71,7 +78,6 @@ config SPI_ARMADA_3700
config SPI_ATMEL
tristate "Atmel SPI Controller"
- depends on HAS_DMA
depends on ARCH_AT91 || COMPILE_TEST
help
This selects a driver for the Atmel SPI Controller, present on
@@ -115,14 +121,6 @@ config SPI_BCM2835AUX
"universal SPI master", and the regular SPI controller.
This driver is for the universal/auxiliary SPI controller.
-config SPI_BCM53XX
- tristate "Broadcom BCM53xx SPI controller"
- depends on ARCH_BCM_5301X
- depends on BCMA_POSSIBLE
- select BCMA
- help
- Enable support for the SPI controller on Broadcom BCM53xx ARM SoCs.
-
config SPI_BCM63XX
tristate "Broadcom BCM63xx SPI controller"
depends on BCM63XX || COMPILE_TEST
@@ -233,7 +231,6 @@ config SPI_EFM32
config SPI_EP93XX
tristate "Cirrus Logic EP93xx SPI controller"
- depends on HAS_DMA
depends on ARCH_EP93XX || COMPILE_TEST
help
This enables using the Cirrus EP93xx SPI controller in master
@@ -355,7 +352,6 @@ config SPI_FSL_SPI
config SPI_FSL_DSPI
tristate "Freescale DSPI controller"
select REGMAP_MMIO
- depends on HAS_DMA
depends on SOC_VF610 || SOC_LS1021A || ARCH_LAYERSCAPE || M5441x || COMPILE_TEST
help
This enables support for the Freescale DSPI controller in master
@@ -431,7 +427,6 @@ config SPI_OMAP_UWIRE
config SPI_OMAP24XX
tristate "McSPI driver for OMAP"
- depends on HAS_DMA
depends on ARCH_OMAP2PLUS || COMPILE_TEST
select SG_SPLIT
help
@@ -440,7 +435,6 @@ config SPI_OMAP24XX
config SPI_TI_QSPI
tristate "DRA7xxx QSPI controller support"
- depends on HAS_DMA
depends on ARCH_OMAP2PLUS || COMPILE_TEST
help
QSPI master controller for DRA7xxx used for flash devices.
@@ -469,7 +463,6 @@ config SPI_PIC32
config SPI_PIC32_SQI
tristate "Microchip PIC32 Quad SPI driver"
depends on MACH_PIC32 || COMPILE_TEST
- depends on HAS_DMA
help
SPI driver for PIC32 Quad SPI controller.
@@ -572,7 +565,7 @@ config SPI_SC18IS602
config SPI_SH_MSIOF
tristate "SuperH MSIOF SPI controller"
- depends on HAVE_CLK && HAS_DMA
+ depends on HAVE_CLK
depends on ARCH_SHMOBILE || ARCH_RENESAS || COMPILE_TEST
help
SPI driver for SuperH and SH Mobile MSIOF blocks.
@@ -650,7 +643,7 @@ config SPI_MXS
config SPI_TEGRA114
tristate "NVIDIA Tegra114 SPI Controller"
depends on (ARCH_TEGRA && TEGRA20_APB_DMA) || COMPILE_TEST
- depends on RESET_CONTROLLER && HAS_DMA
+ depends on RESET_CONTROLLER
help
SPI driver for NVIDIA Tegra114 SPI Controller interface. This controller
is different than the older SoCs SPI controller and also register interface
@@ -668,7 +661,7 @@ config SPI_TEGRA20_SFLASH
config SPI_TEGRA20_SLINK
tristate "Nvidia Tegra20/Tegra30 SLINK Controller"
depends on (ARCH_TEGRA && TEGRA20_APB_DMA) || COMPILE_TEST
- depends on RESET_CONTROLLER && HAS_DMA
+ depends on RESET_CONTROLLER
help
SPI driver for Nvidia Tegra20/Tegra30 SLINK Controller interface.
diff --git a/drivers/spi/Makefile b/drivers/spi/Makefile
index b935f10eb961..cb1f4378b87c 100644
--- a/drivers/spi/Makefile
+++ b/drivers/spi/Makefile
@@ -8,6 +8,7 @@ ccflags-$(CONFIG_SPI_DEBUG) := -DDEBUG
# small core, mostly translating board-specific
# config declarations into driver model code
obj-$(CONFIG_SPI_MASTER) += spi.o
+obj-$(CONFIG_SPI_MEM) += spi-mem.o
obj-$(CONFIG_SPI_SPIDEV) += spidev.o
obj-$(CONFIG_SPI_LOOPBACK_TEST) += spi-loopback-test.o
@@ -20,7 +21,6 @@ obj-$(CONFIG_SPI_AU1550) += spi-au1550.o
obj-$(CONFIG_SPI_AXI_SPI_ENGINE) += spi-axi-spi-engine.o
obj-$(CONFIG_SPI_BCM2835) += spi-bcm2835.o
obj-$(CONFIG_SPI_BCM2835AUX) += spi-bcm2835aux.o
-obj-$(CONFIG_SPI_BCM53XX) += spi-bcm53xx.o
obj-$(CONFIG_SPI_BCM63XX) += spi-bcm63xx.o
obj-$(CONFIG_SPI_BCM63XX_HSSPI) += spi-bcm63xx-hsspi.o
obj-$(CONFIG_SPI_BCM_QSPI) += spi-iproc-qspi.o spi-brcmstb-qspi.o spi-bcm-qspi.o
diff --git a/drivers/spi/internals.h b/drivers/spi/internals.h
new file mode 100644
index 000000000000..4a28a8395552
--- /dev/null
+++ b/drivers/spi/internals.h
@@ -0,0 +1,43 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Copyright (C) 2018 Exceet Electronics GmbH
+ * Copyright (C) 2018 Bootlin
+ *
+ * Author: Boris Brezillon <boris.brezillon@bootlin.com>
+ *
+ * Helpers needed by the spi or spi-mem logic. Should not be used outside of
+ * spi-mem.c and spi.c.
+ */
+
+#ifndef __LINUX_SPI_INTERNALS_H
+#define __LINUX_SPI_INTERNALS_H
+
+#include <linux/device.h>
+#include <linux/dma-direction.h>
+#include <linux/scatterlist.h>
+#include <linux/spi/spi.h>
+
+void spi_flush_queue(struct spi_controller *ctrl);
+
+#ifdef CONFIG_HAS_DMA
+int spi_map_buf(struct spi_controller *ctlr, struct device *dev,
+ struct sg_table *sgt, void *buf, size_t len,
+ enum dma_data_direction dir);
+void spi_unmap_buf(struct spi_controller *ctlr, struct device *dev,
+ struct sg_table *sgt, enum dma_data_direction dir);
+#else /* !CONFIG_HAS_DMA */
+static inline int spi_map_buf(struct spi_controller *ctlr, struct device *dev,
+ struct sg_table *sgt, void *buf, size_t len,
+ enum dma_data_direction dir)
+{
+ return -EINVAL;
+}
+
+static inline void spi_unmap_buf(struct spi_controller *ctlr,
+ struct device *dev, struct sg_table *sgt,
+ enum dma_data_direction dir)
+{
+}
+#endif /* CONFIG_HAS_DMA */
+
+#endif /* __LINUX_SPI_INTERNALS_H */
diff --git a/drivers/spi/spi-bcm-qspi.c b/drivers/spi/spi-bcm-qspi.c
index 1596d35498c5..8612525fa4e3 100644
--- a/drivers/spi/spi-bcm-qspi.c
+++ b/drivers/spi/spi-bcm-qspi.c
@@ -30,6 +30,7 @@
#include <linux/platform_device.h>
#include <linux/slab.h>
#include <linux/spi/spi.h>
+#include <linux/spi/spi-mem.h>
#include <linux/sysfs.h>
#include <linux/types.h>
#include "spi-bcm-qspi.h"
@@ -215,10 +216,10 @@ struct bcm_qspi {
int bspi_maj_rev;
int bspi_min_rev;
int bspi_enabled;
- struct spi_flash_read_message *bspi_rf_msg;
- u32 bspi_rf_msg_idx;
- u32 bspi_rf_msg_len;
- u32 bspi_rf_msg_status;
+ const struct spi_mem_op *bspi_rf_op;
+ u32 bspi_rf_op_idx;
+ u32 bspi_rf_op_len;
+ u32 bspi_rf_op_status;
struct bcm_xfer_mode xfer_mode;
u32 s3_strap_override_ctrl;
bool bspi_mode;
@@ -313,26 +314,26 @@ static inline void bcm_qspi_bspi_lr_clear(struct bcm_qspi *qspi)
static void bcm_qspi_bspi_lr_data_read(struct bcm_qspi *qspi)
{
- u32 *buf = (u32 *)qspi->bspi_rf_msg->buf;
+ u32 *buf = (u32 *)qspi->bspi_rf_op->data.buf.in;
u32 data = 0;
- dev_dbg(&qspi->pdev->dev, "xfer %p rx %p rxlen %d\n", qspi->bspi_rf_msg,
- qspi->bspi_rf_msg->buf, qspi->bspi_rf_msg_len);
+ dev_dbg(&qspi->pdev->dev, "xfer %p rx %p rxlen %d\n", qspi->bspi_rf_op,
+ qspi->bspi_rf_op->data.buf.in, qspi->bspi_rf_op_len);
while (!bcm_qspi_bspi_lr_is_fifo_empty(qspi)) {
data = bcm_qspi_bspi_lr_read_fifo(qspi);
- if (likely(qspi->bspi_rf_msg_len >= 4) &&
+ if (likely(qspi->bspi_rf_op_len >= 4) &&
IS_ALIGNED((uintptr_t)buf, 4)) {
- buf[qspi->bspi_rf_msg_idx++] = data;
- qspi->bspi_rf_msg_len -= 4;
+ buf[qspi->bspi_rf_op_idx++] = data;
+ qspi->bspi_rf_op_len -= 4;
} else {
/* Read out remaining bytes, make sure*/
- u8 *cbuf = (u8 *)&buf[qspi->bspi_rf_msg_idx];
+ u8 *cbuf = (u8 *)&buf[qspi->bspi_rf_op_idx];
data = cpu_to_le32(data);
- while (qspi->bspi_rf_msg_len) {
+ while (qspi->bspi_rf_op_len) {
*cbuf++ = (u8)data;
data >>= 8;
- qspi->bspi_rf_msg_len--;
+ qspi->bspi_rf_op_len--;
}
}
}
@@ -349,14 +350,12 @@ static void bcm_qspi_bspi_set_xfer_params(struct bcm_qspi *qspi, u8 cmd_byte,
}
static int bcm_qspi_bspi_set_flex_mode(struct bcm_qspi *qspi,
- struct spi_flash_read_message *msg,
- int hp)
+ const struct spi_mem_op *op, int hp)
{
int bpc = 0, bpp = 0;
- u8 command = msg->read_opcode;
- int width = msg->data_nbits ? msg->data_nbits : SPI_NBITS_SINGLE;
- int addrlen = msg->addr_width;
- int addr_nbits = msg->addr_nbits ? msg->addr_nbits : SPI_NBITS_SINGLE;
+ u8 command = op->cmd.opcode;
+ int width = op->cmd.buswidth ? op->cmd.buswidth : SPI_NBITS_SINGLE;
+ int addrlen = op->addr.nbytes * 8;
int flex_mode = 1;
dev_dbg(&qspi->pdev->dev, "set flex mode w %x addrlen %x hp %d\n",
@@ -365,7 +364,7 @@ static int bcm_qspi_bspi_set_flex_mode(struct bcm_qspi *qspi,
if (addrlen == BSPI_ADDRLEN_4BYTES)
bpp = BSPI_BPP_ADDR_SELECT_MASK;
- bpp |= msg->dummy_bytes * (8/addr_nbits);
+ bpp |= (op->dummy.nbytes * 8) / op->dummy.buswidth;
switch (width) {
case SPI_NBITS_SINGLE:
@@ -397,11 +396,10 @@ static int bcm_qspi_bspi_set_flex_mode(struct bcm_qspi *qspi,
}
static int bcm_qspi_bspi_set_override(struct bcm_qspi *qspi,
- struct spi_flash_read_message *msg,
- int hp)
+ const struct spi_mem_op *op, int hp)
{
- int width = msg->data_nbits ? msg->data_nbits : SPI_NBITS_SINGLE;
- int addrlen = msg->addr_width;
+ int width = op->data.buswidth ? op->data.buswidth : SPI_NBITS_SINGLE;
+ int addrlen = op->addr.nbytes;
u32 data = bcm_qspi_read(qspi, BSPI, BSPI_STRAP_OVERRIDE_CTRL);
dev_dbg(&qspi->pdev->dev, "set override mode w %x addrlen %x hp %d\n",
@@ -437,17 +435,17 @@ static int bcm_qspi_bspi_set_override(struct bcm_qspi *qspi,
/* set the override mode */
data |= BSPI_STRAP_OVERRIDE_CTRL_OVERRIDE;
bcm_qspi_write(qspi, BSPI, BSPI_STRAP_OVERRIDE_CTRL, data);
- bcm_qspi_bspi_set_xfer_params(qspi, msg->read_opcode, 0, 0, 0);
+ bcm_qspi_bspi_set_xfer_params(qspi, op->cmd.opcode, 0, 0, 0);
return 0;
}
static int bcm_qspi_bspi_set_mode(struct bcm_qspi *qspi,
- struct spi_flash_read_message *msg, int hp)
+ const struct spi_mem_op *op, int hp)
{
int error = 0;
- int width = msg->data_nbits ? msg->data_nbits : SPI_NBITS_SINGLE;
- int addrlen = msg->addr_width;
+ int width = op->data.buswidth ? op->data.buswidth : SPI_NBITS_SINGLE;
+ int addrlen = op->addr.nbytes;
/* default mode */
qspi->xfer_mode.flex_mode = true;
@@ -460,12 +458,12 @@ static int bcm_qspi_bspi_set_mode(struct bcm_qspi *qspi,
if (val & mask || qspi->s3_strap_override_ctrl & mask) {
qspi->xfer_mode.flex_mode = false;
bcm_qspi_write(qspi, BSPI, BSPI_FLEX_MODE_ENABLE, 0);
- error = bcm_qspi_bspi_set_override(qspi, msg, hp);
+ error = bcm_qspi_bspi_set_override(qspi, op, hp);
}
}
if (qspi->xfer_mode.flex_mode)
- error = bcm_qspi_bspi_set_flex_mode(qspi, msg, hp);
+ error = bcm_qspi_bspi_set_flex_mode(qspi, op, hp);
if (error) {
dev_warn(&qspi->pdev->dev,
@@ -490,7 +488,7 @@ static int bcm_qspi_bspi_set_mode(struct bcm_qspi *qspi,
static void bcm_qspi_enable_bspi(struct bcm_qspi *qspi)
{
- if (!has_bspi(qspi) || (qspi->bspi_enabled))
+ if (!has_bspi(qspi))
return;
qspi->bspi_enabled = 1;
@@ -505,7 +503,7 @@ static void bcm_qspi_enable_bspi(struct bcm_qspi *qspi)
static void bcm_qspi_disable_bspi(struct bcm_qspi *qspi)
{
- if (!has_bspi(qspi) || (!qspi->bspi_enabled))
+ if (!has_bspi(qspi))
return;
qspi->bspi_enabled = 0;
@@ -519,16 +517,19 @@ static void bcm_qspi_disable_bspi(struct bcm_qspi *qspi)
static void bcm_qspi_chip_select(struct bcm_qspi *qspi, int cs)
{
- u32 data = 0;
+ u32 rd = 0;
+ u32 wr = 0;
- if (qspi->curr_cs == cs)
- return;
if (qspi->base[CHIP_SELECT]) {
- data = bcm_qspi_read(qspi, CHIP_SELECT, 0);
- data = (data & ~0xff) | (1 << cs);
- bcm_qspi_write(qspi, CHIP_SELECT, 0, data);
+ rd = bcm_qspi_read(qspi, CHIP_SELECT, 0);
+ wr = (rd & ~0xff) | (1 << cs);
+ if (rd == wr)
+ return;
+ bcm_qspi_write(qspi, CHIP_SELECT, 0, wr);
usleep_range(10, 20);
}
+
+ dev_dbg(&qspi->pdev->dev, "using cs:%d\n", cs);
qspi->curr_cs = cs;
}
@@ -755,8 +756,13 @@ static int write_to_hw(struct bcm_qspi *qspi, struct spi_device *spi)
dev_dbg(&qspi->pdev->dev, "WR %04x\n", val);
}
mspi_cdram = MSPI_CDRAM_CONT_BIT;
- mspi_cdram |= (~(1 << spi->chip_select) &
- MSPI_CDRAM_PCS);
+
+ if (has_bspi(qspi))
+ mspi_cdram &= ~1;
+ else
+ mspi_cdram |= (~(1 << spi->chip_select) &
+ MSPI_CDRAM_PCS);
+
mspi_cdram |= ((tp.trans->bits_per_word <= 8) ? 0 :
MSPI_CDRAM_BITSE_BIT);
@@ -794,19 +800,20 @@ done:
return slot;
}
-static int bcm_qspi_bspi_flash_read(struct spi_device *spi,
- struct spi_flash_read_message *msg)
+static int bcm_qspi_bspi_exec_mem_op(struct spi_device *spi,
+ const struct spi_mem_op *op)
{
struct bcm_qspi *qspi = spi_master_get_devdata(spi->master);
- u32 addr = 0, len, rdlen, len_words;
+ u32 addr = 0, len, rdlen, len_words, from = 0;
int ret = 0;
unsigned long timeo = msecs_to_jiffies(100);
struct bcm_qspi_soc_intc *soc_intc = qspi->soc_intc;
if (bcm_qspi_bspi_ver_three(qspi))
- if (msg->addr_width == BSPI_ADDRLEN_4BYTES)
+ if (op->addr.nbytes == BSPI_ADDRLEN_4BYTES)
return -EIO;
+ from = op->addr.val;
bcm_qspi_chip_select(qspi, spi->chip_select);
bcm_qspi_write(qspi, MSPI, MSPI_WRITE_LOCK, 0);
@@ -815,15 +822,15 @@ static int bcm_qspi_bspi_flash_read(struct spi_device *spi,
* the upper address byte to bspi
*/
if (bcm_qspi_bspi_ver_three(qspi) == false) {
- addr = msg->from & 0xff000000;
+ addr = from & 0xff000000;
bcm_qspi_write(qspi, BSPI,
BSPI_BSPI_FLASH_UPPER_ADDR_BYTE, addr);
}
if (!qspi->xfer_mode.flex_mode)
- addr = msg->from;
+ addr = from;
else
- addr = msg->from & 0x00ffffff;
+ addr = from & 0x00ffffff;
if (bcm_qspi_bspi_ver_three(qspi) == true)
addr = (addr + 0xc00000) & 0xffffff;
@@ -832,8 +839,8 @@ static int bcm_qspi_bspi_flash_read(struct spi_device *spi,
* read into the entire buffer by breaking the reads
* into RAF buffer read lengths
*/
- len = msg->len;
- qspi->bspi_rf_msg_idx = 0;
+ len = op->data.nbytes;
+ qspi->bspi_rf_op_idx = 0;
do {
if (len > BSPI_READ_LENGTH)
@@ -844,9 +851,9 @@ static int bcm_qspi_bspi_flash_read(struct spi_device *spi,
reinit_completion(&qspi->bspi_done);
bcm_qspi_enable_bspi(qspi);
len_words = (rdlen + 3) >> 2;
- qspi->bspi_rf_msg = msg;
- qspi->bspi_rf_msg_status = 0;
- qspi->bspi_rf_msg_len = rdlen;
+ qspi->bspi_rf_op = op;
+ qspi->bspi_rf_op_status = 0;
+ qspi->bspi_rf_op_len = rdlen;
dev_dbg(&qspi->pdev->dev,
"bspi xfr addr 0x%x len 0x%x", addr, rdlen);
bcm_qspi_write(qspi, BSPI, BSPI_RAF_START_ADDR, addr);
@@ -871,7 +878,6 @@ static int bcm_qspi_bspi_flash_read(struct spi_device *spi,
}
/* set msg return length */
- msg->retlen += rdlen;
addr += rdlen;
len -= rdlen;
} while (len);
@@ -906,61 +912,63 @@ static int bcm_qspi_transfer_one(struct spi_master *master,
return 0;
}
-static int bcm_qspi_mspi_flash_read(struct spi_device *spi,
- struct spi_flash_read_message *msg)
+static int bcm_qspi_mspi_exec_mem_op(struct spi_device *spi,
+ const struct spi_mem_op *op)
{
- struct bcm_qspi *qspi = spi_master_get_devdata(spi->master);
+ struct spi_master *master = spi->master;
+ struct bcm_qspi *qspi = spi_master_get_devdata(master);
struct spi_transfer t[2];
- u8 cmd[6];
- int ret;
+ u8 cmd[6] = { };
+ int ret, i;
memset(cmd, 0, sizeof(cmd));
memset(t, 0, sizeof(t));
/* tx */
/* opcode is in cmd[0] */
- cmd[0] = msg->read_opcode;
- cmd[1] = msg->from >> (msg->addr_width * 8 - 8);
- cmd[2] = msg->from >> (msg->addr_width * 8 - 16);
- cmd[3] = msg->from >> (msg->addr_width * 8 - 24);
- cmd[4] = msg->from >> (msg->addr_width * 8 - 32);
+ cmd[0] = op->cmd.opcode;
+ for (i = 0; i < op->addr.nbytes; i++)
+ cmd[1 + i] = op->addr.val >> (8 * (op->addr.nbytes - i - 1));
+
t[0].tx_buf = cmd;
- t[0].len = msg->addr_width + msg->dummy_bytes + 1;
+ t[0].len = op->addr.nbytes + op->dummy.nbytes + 1;
t[0].bits_per_word = spi->bits_per_word;
- t[0].tx_nbits = msg->opcode_nbits;
+ t[0].tx_nbits = op->cmd.buswidth;
/* lets mspi know that this is not last transfer */
qspi->trans_pos.mspi_last_trans = false;
- ret = bcm_qspi_transfer_one(spi->master, spi, &t[0]);
+ ret = bcm_qspi_transfer_one(master, spi, &t[0]);
/* rx */
qspi->trans_pos.mspi_last_trans = true;
if (!ret) {
/* rx */
- t[1].rx_buf = msg->buf;
- t[1].len = msg->len;
- t[1].rx_nbits = msg->data_nbits;
+ t[1].rx_buf = op->data.buf.in;
+ t[1].len = op->data.nbytes;
+ t[1].rx_nbits = op->data.buswidth;
t[1].bits_per_word = spi->bits_per_word;
- ret = bcm_qspi_transfer_one(spi->master, spi, &t[1]);
+ ret = bcm_qspi_transfer_one(master, spi, &t[1]);
}
- if (!ret)
- msg->retlen = msg->len;
-
return ret;
}
-static int bcm_qspi_flash_read(struct spi_device *spi,
- struct spi_flash_read_message *msg)
+static int bcm_qspi_exec_mem_op(struct spi_mem *mem,
+ const struct spi_mem_op *op)
{
+ struct spi_device *spi = mem->spi;
struct bcm_qspi *qspi = spi_master_get_devdata(spi->master);
int ret = 0;
bool mspi_read = false;
- u32 addr, len;
+ u32 addr = 0, len;
u_char *buf;
- buf = msg->buf;
- addr = msg->from;
- len = msg->len;
+ if (!op->data.nbytes || !op->addr.nbytes || op->addr.nbytes > 4 ||
+ op->data.dir != SPI_MEM_DATA_IN)
+ return -ENOTSUPP;
+
+ buf = op->data.buf.in;
+ addr = op->addr.val;
+ len = op->data.nbytes;
if (bcm_qspi_bspi_ver_three(qspi) == true) {
/*
@@ -982,12 +990,12 @@ static int bcm_qspi_flash_read(struct spi_device *spi,
mspi_read = true;
if (mspi_read)
- return bcm_qspi_mspi_flash_read(spi, msg);
+ return bcm_qspi_mspi_exec_mem_op(spi, op);
- ret = bcm_qspi_bspi_set_mode(qspi, msg, -1);
+ ret = bcm_qspi_bspi_set_mode(qspi, op, -1);
if (!ret)
- ret = bcm_qspi_bspi_flash_read(spi, msg);
+ ret = bcm_qspi_bspi_exec_mem_op(spi, op);
return ret;
}
@@ -1026,10 +1034,10 @@ static irqreturn_t bcm_qspi_bspi_lr_l2_isr(int irq, void *dev_id)
struct bcm_qspi_soc_intc *soc_intc = qspi->soc_intc;
u32 status = qspi_dev_id->irqp->mask;
- if (qspi->bspi_enabled && qspi->bspi_rf_msg) {
+ if (qspi->bspi_enabled && qspi->bspi_rf_op) {
bcm_qspi_bspi_lr_data_read(qspi);
- if (qspi->bspi_rf_msg_len == 0) {
- qspi->bspi_rf_msg = NULL;
+ if (qspi->bspi_rf_op_len == 0) {
+ qspi->bspi_rf_op = NULL;
if (qspi->soc_intc) {
/* disable soc BSPI interrupt */
soc_intc->bcm_qspi_int_set(soc_intc, BSPI_DONE,
@@ -1038,7 +1046,7 @@ static irqreturn_t bcm_qspi_bspi_lr_l2_isr(int irq, void *dev_id)
status = INTR_BSPI_LR_SESSION_DONE_MASK;
}
- if (qspi->bspi_rf_msg_status)
+ if (qspi->bspi_rf_op_status)
bcm_qspi_bspi_lr_clear(qspi);
else
bcm_qspi_bspi_flush_prefetch_buffers(qspi);
@@ -1050,7 +1058,7 @@ static irqreturn_t bcm_qspi_bspi_lr_l2_isr(int irq, void *dev_id)
}
status &= INTR_BSPI_LR_SESSION_DONE_MASK;
- if (qspi->bspi_enabled && status && qspi->bspi_rf_msg_len == 0)
+ if (qspi->bspi_enabled && status && qspi->bspi_rf_op_len == 0)
complete(&qspi->bspi_done);
return IRQ_HANDLED;
@@ -1063,7 +1071,7 @@ static irqreturn_t bcm_qspi_bspi_lr_err_l2_isr(int irq, void *dev_id)
struct bcm_qspi_soc_intc *soc_intc = qspi->soc_intc;
dev_err(&qspi->pdev->dev, "BSPI INT error\n");
- qspi->bspi_rf_msg_status = -EIO;
+ qspi->bspi_rf_op_status = -EIO;
if (qspi->soc_intc)
/* clear soc interrupt */
soc_intc->bcm_qspi_int_ack(soc_intc, BSPI_ERR);
@@ -1186,6 +1194,10 @@ static void bcm_qspi_hw_uninit(struct bcm_qspi *qspi)
}
+static const struct spi_controller_mem_ops bcm_qspi_mem_ops = {
+ .exec_op = bcm_qspi_exec_mem_op,
+};
+
static const struct of_device_id bcm_qspi_of_match[] = {
{ .compatible = "brcm,spi-bcm-qspi" },
{},
@@ -1228,7 +1240,7 @@ int bcm_qspi_probe(struct platform_device *pdev,
master->mode_bits = SPI_CPHA | SPI_CPOL | SPI_RX_DUAL | SPI_RX_QUAD;
master->setup = bcm_qspi_setup;
master->transfer_one = bcm_qspi_transfer_one;
- master->spi_flash_read = bcm_qspi_flash_read;
+ master->mem_ops = &bcm_qspi_mem_ops;
master->cleanup = bcm_qspi_cleanup;
master->dev.of_node = dev->of_node;
master->num_chipselect = NUM_CHIPSELECT;
diff --git a/drivers/spi/spi-bcm2835aux.c b/drivers/spi/spi-bcm2835aux.c
index 1431cb98fe40..3094d818cf06 100644
--- a/drivers/spi/spi-bcm2835aux.c
+++ b/drivers/spi/spi-bcm2835aux.c
@@ -184,6 +184,11 @@ static irqreturn_t bcm2835aux_spi_interrupt(int irq, void *dev_id)
struct bcm2835aux_spi *bs = spi_master_get_devdata(master);
irqreturn_t ret = IRQ_NONE;
+ /* IRQ may be shared, so return if our interrupts are disabled */
+ if (!(bcm2835aux_rd(bs, BCM2835_AUX_SPI_CNTL1) &
+ (BCM2835_AUX_SPI_CNTL1_TXEMPTY | BCM2835_AUX_SPI_CNTL1_IDLE)))
+ return ret;
+
/* check if we have data to read */
while (bs->rx_len &&
(!(bcm2835aux_rd(bs, BCM2835_AUX_SPI_STAT) &
diff --git a/drivers/spi/spi-bcm53xx.c b/drivers/spi/spi-bcm53xx.c
deleted file mode 100644
index d02ceb7a29d1..000000000000
--- a/drivers/spi/spi-bcm53xx.c
+++ /dev/null
@@ -1,360 +0,0 @@
-/*
- * Copyright (C) 2014-2016 Rafał Miłecki <rafal@milecki.pl>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/delay.h>
-#include <linux/bcma/bcma.h>
-#include <linux/spi/spi.h>
-
-#include "spi-bcm53xx.h"
-
-#define BCM53XXSPI_MAX_SPI_BAUD 13500000 /* 216 MHz? */
-#define BCM53XXSPI_FLASH_WINDOW SZ_32M
-
-/* The longest observed required wait was 19 ms */
-#define BCM53XXSPI_SPE_TIMEOUT_MS 80
-
-struct bcm53xxspi {
- struct bcma_device *core;
- struct spi_master *master;
- void __iomem *mmio_base;
- bool bspi; /* Boot SPI mode with memory mapping */
-};
-
-static inline u32 bcm53xxspi_read(struct bcm53xxspi *b53spi, u16 offset)
-{
- return bcma_read32(b53spi->core, offset);
-}
-
-static inline void bcm53xxspi_write(struct bcm53xxspi *b53spi, u16 offset,
- u32 value)
-{
- bcma_write32(b53spi->core, offset, value);
-}
-
-static void bcm53xxspi_disable_bspi(struct bcm53xxspi *b53spi)
-{
- struct device *dev = &b53spi->core->dev;
- unsigned long deadline;
- u32 tmp;
-
- if (!b53spi->bspi)
- return;
-
- tmp = bcm53xxspi_read(b53spi, B53SPI_BSPI_MAST_N_BOOT_CTRL);
- if (tmp & 0x1)
- return;
-
- deadline = jiffies + usecs_to_jiffies(200);
- do {
- tmp = bcm53xxspi_read(b53spi, B53SPI_BSPI_BUSY_STATUS);
- if (!(tmp & 0x1)) {
- bcm53xxspi_write(b53spi, B53SPI_BSPI_MAST_N_BOOT_CTRL,
- 0x1);
- ndelay(200);
- b53spi->bspi = false;
- return;
- }
- udelay(1);
- } while (!time_after_eq(jiffies, deadline));
-
- dev_warn(dev, "Timeout disabling BSPI\n");
-}
-
-static void bcm53xxspi_enable_bspi(struct bcm53xxspi *b53spi)
-{
- u32 tmp;
-
- if (b53spi->bspi)
- return;
-
- tmp = bcm53xxspi_read(b53spi, B53SPI_BSPI_MAST_N_BOOT_CTRL);
- if (!(tmp & 0x1))
- return;
-
- bcm53xxspi_write(b53spi, B53SPI_BSPI_MAST_N_BOOT_CTRL, 0x0);
- b53spi->bspi = true;
-}
-
-static inline unsigned int bcm53xxspi_calc_timeout(size_t len)
-{
- /* Do some magic calculation based on length and buad. Add 10% and 1. */
- return (len * 9000 / BCM53XXSPI_MAX_SPI_BAUD * 110 / 100) + 1;
-}
-
-static int bcm53xxspi_wait(struct bcm53xxspi *b53spi, unsigned int timeout_ms)
-{
- unsigned long deadline;
- u32 tmp;
-
- /* SPE bit has to be 0 before we read MSPI STATUS */
- deadline = jiffies + msecs_to_jiffies(BCM53XXSPI_SPE_TIMEOUT_MS);
- do {
- tmp = bcm53xxspi_read(b53spi, B53SPI_MSPI_SPCR2);
- if (!(tmp & B53SPI_MSPI_SPCR2_SPE))
- break;
- udelay(5);
- } while (!time_after_eq(jiffies, deadline));
-
- if (tmp & B53SPI_MSPI_SPCR2_SPE)
- goto spi_timeout;
-
- /* Check status */
- deadline = jiffies + msecs_to_jiffies(timeout_ms);
- do {
- tmp = bcm53xxspi_read(b53spi, B53SPI_MSPI_MSPI_STATUS);
- if (tmp & B53SPI_MSPI_MSPI_STATUS_SPIF) {
- bcm53xxspi_write(b53spi, B53SPI_MSPI_MSPI_STATUS, 0);
- return 0;
- }
-
- cpu_relax();
- udelay(100);
- } while (!time_after_eq(jiffies, deadline));
-
-spi_timeout:
- bcm53xxspi_write(b53spi, B53SPI_MSPI_MSPI_STATUS, 0);
-
- pr_err("Timeout waiting for SPI to be ready!\n");
-
- return -EBUSY;
-}
-
-static void bcm53xxspi_buf_write(struct bcm53xxspi *b53spi, u8 *w_buf,
- size_t len, bool cont)
-{
- u32 tmp;
- int i;
-
- for (i = 0; i < len; i++) {
- /* Transmit Register File MSB */
- bcm53xxspi_write(b53spi, B53SPI_MSPI_TXRAM + 4 * (i * 2),
- (unsigned int)w_buf[i]);
- }
-
- for (i = 0; i < len; i++) {
- tmp = B53SPI_CDRAM_CONT | B53SPI_CDRAM_PCS_DISABLE_ALL |
- B53SPI_CDRAM_PCS_DSCK;
- if (!cont && i == len - 1)
- tmp &= ~B53SPI_CDRAM_CONT;
- tmp &= ~0x1;
- /* Command Register File */
- bcm53xxspi_write(b53spi, B53SPI_MSPI_CDRAM + 4 * i, tmp);
- }
-
- /* Set queue pointers */
- bcm53xxspi_write(b53spi, B53SPI_MSPI_NEWQP, 0);
- bcm53xxspi_write(b53spi, B53SPI_MSPI_ENDQP, len - 1);
-
- if (cont)
- bcm53xxspi_write(b53spi, B53SPI_MSPI_WRITE_LOCK, 1);
-
- /* Start SPI transfer */
- tmp = bcm53xxspi_read(b53spi, B53SPI_MSPI_SPCR2);
- tmp |= B53SPI_MSPI_SPCR2_SPE;
- if (cont)
- tmp |= B53SPI_MSPI_SPCR2_CONT_AFTER_CMD;
- bcm53xxspi_write(b53spi, B53SPI_MSPI_SPCR2, tmp);
-
- /* Wait for SPI to finish */
- bcm53xxspi_wait(b53spi, bcm53xxspi_calc_timeout(len));
-
- if (!cont)
- bcm53xxspi_write(b53spi, B53SPI_MSPI_WRITE_LOCK, 0);
-}
-
-static void bcm53xxspi_buf_read(struct bcm53xxspi *b53spi, u8 *r_buf,
- size_t len, bool cont)
-{
- u32 tmp;
- int i;
-
- for (i = 0; i < len; i++) {
- tmp = B53SPI_CDRAM_CONT | B53SPI_CDRAM_PCS_DISABLE_ALL |
- B53SPI_CDRAM_PCS_DSCK;
- if (!cont && i == len - 1)
- tmp &= ~B53SPI_CDRAM_CONT;
- tmp &= ~0x1;
- /* Command Register File */
- bcm53xxspi_write(b53spi, B53SPI_MSPI_CDRAM + 4 * i, tmp);
- }
-
- /* Set queue pointers */
- bcm53xxspi_write(b53spi, B53SPI_MSPI_NEWQP, 0);
- bcm53xxspi_write(b53spi, B53SPI_MSPI_ENDQP, len - 1);
-
- if (cont)
- bcm53xxspi_write(b53spi, B53SPI_MSPI_WRITE_LOCK, 1);
-
- /* Start SPI transfer */
- tmp = bcm53xxspi_read(b53spi, B53SPI_MSPI_SPCR2);
- tmp |= B53SPI_MSPI_SPCR2_SPE;
- if (cont)
- tmp |= B53SPI_MSPI_SPCR2_CONT_AFTER_CMD;
- bcm53xxspi_write(b53spi, B53SPI_MSPI_SPCR2, tmp);
-
- /* Wait for SPI to finish */
- bcm53xxspi_wait(b53spi, bcm53xxspi_calc_timeout(len));
-
- if (!cont)
- bcm53xxspi_write(b53spi, B53SPI_MSPI_WRITE_LOCK, 0);
-
- for (i = 0; i < len; ++i) {
- u16 reg = B53SPI_MSPI_RXRAM + 4 * (1 + i * 2);
-
- /* Data stored in the transmit register file LSB */
- r_buf[i] = (u8)bcm53xxspi_read(b53spi, reg);
- }
-}
-
-static int bcm53xxspi_transfer_one(struct spi_master *master,
- struct spi_device *spi,
- struct spi_transfer *t)
-{
- struct bcm53xxspi *b53spi = spi_master_get_devdata(master);
- u8 *buf;
- size_t left;
-
- bcm53xxspi_disable_bspi(b53spi);
-
- if (t->tx_buf) {
- buf = (u8 *)t->tx_buf;
- left = t->len;
- while (left) {
- size_t to_write = min_t(size_t, 16, left);
- bool cont = !spi_transfer_is_last(master, t) ||
- left - to_write > 0;
-
- bcm53xxspi_buf_write(b53spi, buf, to_write, cont);
- left -= to_write;
- buf += to_write;
- }
- }
-
- if (t->rx_buf) {
- buf = (u8 *)t->rx_buf;
- left = t->len;
- while (left) {
- size_t to_read = min_t(size_t, 16, left);
- bool cont = !spi_transfer_is_last(master, t) ||
- left - to_read > 0;
-
- bcm53xxspi_buf_read(b53spi, buf, to_read, cont);
- left -= to_read;
- buf += to_read;
- }
- }
-
- return 0;
-}
-
-static int bcm53xxspi_flash_read(struct spi_device *spi,
- struct spi_flash_read_message *msg)
-{
- struct bcm53xxspi *b53spi = spi_master_get_devdata(spi->master);
- int ret = 0;
-
- if (msg->from + msg->len > BCM53XXSPI_FLASH_WINDOW)
- return -EINVAL;
-
- bcm53xxspi_enable_bspi(b53spi);
- memcpy_fromio(msg->buf, b53spi->mmio_base + msg->from, msg->len);
- msg->retlen = msg->len;
-
- return ret;
-}
-
-/**************************************************
- * BCMA
- **************************************************/
-
-static const struct bcma_device_id bcm53xxspi_bcma_tbl[] = {
- BCMA_CORE(BCMA_MANUF_BCM, BCMA_CORE_NS_QSPI, BCMA_ANY_REV, BCMA_ANY_CLASS),
- {},
-};
-MODULE_DEVICE_TABLE(bcma, bcm53xxspi_bcma_tbl);
-
-static int bcm53xxspi_bcma_probe(struct bcma_device *core)
-{
- struct device *dev = &core->dev;
- struct bcm53xxspi *b53spi;
- struct spi_master *master;
- int err;
-
- if (core->bus->drv_cc.core->id.rev != 42) {
- pr_err("SPI on SoC with unsupported ChipCommon rev\n");
- return -ENOTSUPP;
- }
-
- master = spi_alloc_master(dev, sizeof(*b53spi));
- if (!master)
- return -ENOMEM;
-
- b53spi = spi_master_get_devdata(master);
- b53spi->master = master;
- b53spi->core = core;
-
- if (core->addr_s[0])
- b53spi->mmio_base = devm_ioremap(dev, core->addr_s[0],
- BCM53XXSPI_FLASH_WINDOW);
- b53spi->bspi = true;
- bcm53xxspi_disable_bspi(b53spi);
-
- master->dev.of_node = dev->of_node;
- master->transfer_one = bcm53xxspi_transfer_one;
- if (b53spi->mmio_base)
- master->spi_flash_read = bcm53xxspi_flash_read;
-
- bcma_set_drvdata(core, b53spi);
-
- err = devm_spi_register_master(dev, master);
- if (err) {
- spi_master_put(master);
- bcma_set_drvdata(core, NULL);
- return err;
- }
-
- return 0;
-}
-
-static struct bcma_driver bcm53xxspi_bcma_driver = {
- .name = KBUILD_MODNAME,
- .id_table = bcm53xxspi_bcma_tbl,
- .probe = bcm53xxspi_bcma_probe,
-};
-
-/**************************************************
- * Init & exit
- **************************************************/
-
-static int __init bcm53xxspi_module_init(void)
-{
- int err = 0;
-
- err = bcma_driver_register(&bcm53xxspi_bcma_driver);
- if (err)
- pr_err("Failed to register bcma driver: %d\n", err);
-
- return err;
-}
-
-static void __exit bcm53xxspi_module_exit(void)
-{
- bcma_driver_unregister(&bcm53xxspi_bcma_driver);
-}
-
-module_init(bcm53xxspi_module_init);
-module_exit(bcm53xxspi_module_exit);
-
-MODULE_DESCRIPTION("Broadcom BCM53xx SPI Controller driver");
-MODULE_AUTHOR("Rafał Miłecki <zajec5@gmail.com>");
-MODULE_LICENSE("GPL v2");
diff --git a/drivers/spi/spi-bcm53xx.h b/drivers/spi/spi-bcm53xx.h
deleted file mode 100644
index 03e3442086ec..000000000000
--- a/drivers/spi/spi-bcm53xx.h
+++ /dev/null
@@ -1,73 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef SPI_BCM53XX_H
-#define SPI_BCM53XX_H
-
-#define B53SPI_BSPI_REVISION_ID 0x000
-#define B53SPI_BSPI_SCRATCH 0x004
-#define B53SPI_BSPI_MAST_N_BOOT_CTRL 0x008
-#define B53SPI_BSPI_BUSY_STATUS 0x00c
-#define B53SPI_BSPI_INTR_STATUS 0x010
-#define B53SPI_BSPI_B0_STATUS 0x014
-#define B53SPI_BSPI_B0_CTRL 0x018
-#define B53SPI_BSPI_B1_STATUS 0x01c
-#define B53SPI_BSPI_B1_CTRL 0x020
-#define B53SPI_BSPI_STRAP_OVERRIDE_CTRL 0x024
-#define B53SPI_BSPI_FLEX_MODE_ENABLE 0x028
-#define B53SPI_BSPI_BITS_PER_CYCLE 0x02c
-#define B53SPI_BSPI_BITS_PER_PHASE 0x030
-#define B53SPI_BSPI_CMD_AND_MODE_BYTE 0x034
-#define B53SPI_BSPI_BSPI_FLASH_UPPER_ADDR_BYTE 0x038
-#define B53SPI_BSPI_BSPI_XOR_VALUE 0x03c
-#define B53SPI_BSPI_BSPI_XOR_ENABLE 0x040
-#define B53SPI_BSPI_BSPI_PIO_MODE_ENABLE 0x044
-#define B53SPI_BSPI_BSPI_PIO_IODIR 0x048
-#define B53SPI_BSPI_BSPI_PIO_DATA 0x04c
-
-/* RAF */
-#define B53SPI_RAF_START_ADDR 0x100
-#define B53SPI_RAF_NUM_WORDS 0x104
-#define B53SPI_RAF_CTRL 0x108
-#define B53SPI_RAF_FULLNESS 0x10c
-#define B53SPI_RAF_WATERMARK 0x110
-#define B53SPI_RAF_STATUS 0x114
-#define B53SPI_RAF_READ_DATA 0x118
-#define B53SPI_RAF_WORD_CNT 0x11c
-#define B53SPI_RAF_CURR_ADDR 0x120
-
-/* MSPI */
-#define B53SPI_MSPI_SPCR0_LSB 0x200
-#define B53SPI_MSPI_SPCR0_MSB 0x204
-#define B53SPI_MSPI_SPCR1_LSB 0x208
-#define B53SPI_MSPI_SPCR1_MSB 0x20c
-#define B53SPI_MSPI_NEWQP 0x210
-#define B53SPI_MSPI_ENDQP 0x214
-#define B53SPI_MSPI_SPCR2 0x218
-#define B53SPI_MSPI_SPCR2_SPE 0x00000040
-#define B53SPI_MSPI_SPCR2_CONT_AFTER_CMD 0x00000080
-#define B53SPI_MSPI_MSPI_STATUS 0x220
-#define B53SPI_MSPI_MSPI_STATUS_SPIF 0x00000001
-#define B53SPI_MSPI_CPTQP 0x224
-#define B53SPI_MSPI_TXRAM 0x240 /* 32 registers, up to 0x2b8 */
-#define B53SPI_MSPI_RXRAM 0x2c0 /* 32 registers, up to 0x33c */
-#define B53SPI_MSPI_CDRAM 0x340 /* 16 registers, up to 0x37c */
-#define B53SPI_CDRAM_PCS_PCS0 0x00000001
-#define B53SPI_CDRAM_PCS_PCS1 0x00000002
-#define B53SPI_CDRAM_PCS_PCS2 0x00000004
-#define B53SPI_CDRAM_PCS_PCS3 0x00000008
-#define B53SPI_CDRAM_PCS_DISABLE_ALL 0x0000000f
-#define B53SPI_CDRAM_PCS_DSCK 0x00000010
-#define B53SPI_CDRAM_BITSE 0x00000040
-#define B53SPI_CDRAM_CONT 0x00000080
-#define B53SPI_MSPI_WRITE_LOCK 0x380
-#define B53SPI_MSPI_DISABLE_FLUSH_GEN 0x384
-
-/* Interrupt */
-#define B53SPI_INTR_RAF_LR_FULLNESS_REACHED 0x3a0
-#define B53SPI_INTR_RAF_LR_TRUNCATED 0x3a4
-#define B53SPI_INTR_RAF_LR_IMPATIENT 0x3a8
-#define B53SPI_INTR_RAF_LR_SESSION_DONE 0x3ac
-#define B53SPI_INTR_RAF_LR_OVERREAD 0x3b0
-#define B53SPI_INTR_MSPI_DONE 0x3b4
-#define B53SPI_INTR_MSPI_HALT_SET_TRANSACTION_DONE 0x3b8
-
-#endif /* SPI_BCM53XX_H */
diff --git a/drivers/spi/spi-bcm63xx-hsspi.c b/drivers/spi/spi-bcm63xx-hsspi.c
index cbcba614b253..c23849f7aa7b 100644
--- a/drivers/spi/spi-bcm63xx-hsspi.c
+++ b/drivers/spi/spi-bcm63xx-hsspi.c
@@ -352,22 +352,31 @@ static int bcm63xx_hsspi_probe(struct platform_device *pdev)
if (IS_ERR(clk))
return PTR_ERR(clk);
+ ret = clk_prepare_enable(clk);
+ if (ret)
+ return ret;
+
rate = clk_get_rate(clk);
if (!rate) {
struct clk *pll_clk = devm_clk_get(dev, "pll");
- if (IS_ERR(pll_clk))
- return PTR_ERR(pll_clk);
+ if (IS_ERR(pll_clk)) {
+ ret = PTR_ERR(pll_clk);
+ goto out_disable_clk;
+ }
+
+ ret = clk_prepare_enable(pll_clk);
+ if (ret)
+ goto out_disable_clk;
rate = clk_get_rate(pll_clk);
- if (!rate)
- return -EINVAL;
+ clk_disable_unprepare(pll_clk);
+ if (!rate) {
+ ret = -EINVAL;
+ goto out_disable_clk;
+ }
}
- ret = clk_prepare_enable(clk);
- if (ret)
- return ret;
-
master = spi_alloc_master(&pdev->dev, sizeof(*bs));
if (!master) {
ret = -ENOMEM;
diff --git a/drivers/spi/spi-cadence.c b/drivers/spi/spi-cadence.c
index 5c9516ae4942..f3dad6fcdc35 100644
--- a/drivers/spi/spi-cadence.c
+++ b/drivers/spi/spi-cadence.c
@@ -313,6 +313,14 @@ static void cdns_spi_fill_tx_fifo(struct cdns_spi *xspi)
while ((trans_cnt < CDNS_SPI_FIFO_DEPTH) &&
(xspi->tx_bytes > 0)) {
+
+ /* When xspi in busy condition, bytes may send failed,
+ * then spi control did't work thoroughly, add one byte delay
+ */
+ if (cdns_spi_read(xspi, CDNS_SPI_ISR) &
+ CDNS_SPI_IXR_TXFULL)
+ usleep_range(10, 20);
+
if (xspi->txbuf)
cdns_spi_write(xspi, CDNS_SPI_TXD, *xspi->txbuf++);
else
@@ -686,8 +694,7 @@ static int cdns_spi_remove(struct platform_device *pdev)
*/
static int __maybe_unused cdns_spi_suspend(struct device *dev)
{
- struct platform_device *pdev = to_platform_device(dev);
- struct spi_master *master = platform_get_drvdata(pdev);
+ struct spi_master *master = dev_get_drvdata(dev);
return spi_master_suspend(master);
}
@@ -702,8 +709,7 @@ static int __maybe_unused cdns_spi_suspend(struct device *dev)
*/
static int __maybe_unused cdns_spi_resume(struct device *dev)
{
- struct platform_device *pdev = to_platform_device(dev);
- struct spi_master *master = platform_get_drvdata(pdev);
+ struct spi_master *master = dev_get_drvdata(dev);
struct cdns_spi *xspi = spi_master_get_devdata(master);
cdns_spi_init_hw(xspi);
diff --git a/drivers/spi/spi-fsl-lpspi.c b/drivers/spi/spi-fsl-lpspi.c
index cb3c73007ca1..e6d5cc6ab108 100644
--- a/drivers/spi/spi-fsl-lpspi.c
+++ b/drivers/spi/spi-fsl-lpspi.c
@@ -1,19 +1,8 @@
-/*
- * Freescale i.MX7ULP LPSPI driver
- *
- * Copyright 2016 Freescale Semiconductor, Inc.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- */
+// SPDX-License-Identifier: GPL-2.0+
+//
+// Freescale i.MX7ULP LPSPI driver
+//
+// Copyright 2016 Freescale Semiconductor, Inc.
#include <linux/clk.h>
#include <linux/completion.h>
diff --git a/drivers/spi/spi-imx.c b/drivers/spi/spi-imx.c
index 6f57592a7f95..866246f21041 100644
--- a/drivers/spi/spi-imx.c
+++ b/drivers/spi/spi-imx.c
@@ -1,22 +1,6 @@
-/*
- * Copyright 2004-2007 Freescale Semiconductor, Inc. All Rights Reserved.
- * Copyright (C) 2008 Juergen Beisert
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the
- * Free Software Foundation
- * 51 Franklin Street, Fifth Floor
- * Boston, MA 02110-1301, USA.
- */
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright 2004-2007 Freescale Semiconductor, Inc. All Rights Reserved.
+// Copyright (C) 2008 Juergen Beisert
#include <linux/clk.h>
#include <linux/completion.h>
@@ -1701,7 +1685,7 @@ static struct platform_driver spi_imx_driver = {
};
module_platform_driver(spi_imx_driver);
-MODULE_DESCRIPTION("SPI Master Controller driver");
+MODULE_DESCRIPTION("SPI Controller driver");
MODULE_AUTHOR("Sascha Hauer, Pengutronix");
MODULE_LICENSE("GPL");
MODULE_ALIAS("platform:" DRIVER_NAME);
diff --git a/drivers/spi/spi-mem.c b/drivers/spi/spi-mem.c
new file mode 100644
index 000000000000..990770dfa5cf
--- /dev/null
+++ b/drivers/spi/spi-mem.c
@@ -0,0 +1,410 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright (C) 2018 Exceet Electronics GmbH
+ * Copyright (C) 2018 Bootlin
+ *
+ * Author: Boris Brezillon <boris.brezillon@bootlin.com>
+ */
+#include <linux/dmaengine.h>
+#include <linux/pm_runtime.h>
+#include <linux/spi/spi.h>
+#include <linux/spi/spi-mem.h>
+
+#include "internals.h"
+
+/**
+ * spi_controller_dma_map_mem_op_data() - DMA-map the buffer attached to a
+ * memory operation
+ * @ctlr: the SPI controller requesting this dma_map()
+ * @op: the memory operation containing the buffer to map
+ * @sgt: a pointer to a non-initialized sg_table that will be filled by this
+ * function
+ *
+ * Some controllers might want to do DMA on the data buffer embedded in @op.
+ * This helper prepares everything for you and provides a ready-to-use
+ * sg_table. This function is not intended to be called from spi drivers.
+ * Only SPI controller drivers should use it.
+ * Note that the caller must ensure the memory region pointed by
+ * op->data.buf.{in,out} is DMA-able before calling this function.
+ *
+ * Return: 0 in case of success, a negative error code otherwise.
+ */
+int spi_controller_dma_map_mem_op_data(struct spi_controller *ctlr,
+ const struct spi_mem_op *op,
+ struct sg_table *sgt)
+{
+ struct device *dmadev;
+
+ if (!op->data.nbytes)
+ return -EINVAL;
+
+ if (op->data.dir == SPI_MEM_DATA_OUT && ctlr->dma_tx)
+ dmadev = ctlr->dma_tx->device->dev;
+ else if (op->data.dir == SPI_MEM_DATA_IN && ctlr->dma_rx)
+ dmadev = ctlr->dma_rx->device->dev;
+ else
+ dmadev = ctlr->dev.parent;
+
+ if (!dmadev)
+ return -EINVAL;
+
+ return spi_map_buf(ctlr, dmadev, sgt, op->data.buf.in, op->data.nbytes,
+ op->data.dir == SPI_MEM_DATA_IN ?
+ DMA_FROM_DEVICE : DMA_TO_DEVICE);
+}
+EXPORT_SYMBOL_GPL(spi_controller_dma_map_mem_op_data);
+
+/**
+ * spi_controller_dma_unmap_mem_op_data() - DMA-unmap the buffer attached to a
+ * memory operation
+ * @ctlr: the SPI controller requesting this dma_unmap()
+ * @op: the memory operation containing the buffer to unmap
+ * @sgt: a pointer to an sg_table previously initialized by
+ * spi_controller_dma_map_mem_op_data()
+ *
+ * Some controllers might want to do DMA on the data buffer embedded in @op.
+ * This helper prepares things so that the CPU can access the
+ * op->data.buf.{in,out} buffer again.
+ *
+ * This function is not intended to be called from SPI drivers. Only SPI
+ * controller drivers should use it.
+ *
+ * This function should be called after the DMA operation has finished and is
+ * only valid if the previous spi_controller_dma_map_mem_op_data() call
+ * returned 0.
+ *
+ * Return: 0 in case of success, a negative error code otherwise.
+ */
+void spi_controller_dma_unmap_mem_op_data(struct spi_controller *ctlr,
+ const struct spi_mem_op *op,
+ struct sg_table *sgt)
+{
+ struct device *dmadev;
+
+ if (!op->data.nbytes)
+ return;
+
+ if (op->data.dir == SPI_MEM_DATA_OUT && ctlr->dma_tx)
+ dmadev = ctlr->dma_tx->device->dev;
+ else if (op->data.dir == SPI_MEM_DATA_IN && ctlr->dma_rx)
+ dmadev = ctlr->dma_rx->device->dev;
+ else
+ dmadev = ctlr->dev.parent;
+
+ spi_unmap_buf(ctlr, dmadev, sgt,
+ op->data.dir == SPI_MEM_DATA_IN ?
+ DMA_FROM_DEVICE : DMA_TO_DEVICE);
+}
+EXPORT_SYMBOL_GPL(spi_controller_dma_unmap_mem_op_data);
+
+static int spi_check_buswidth_req(struct spi_mem *mem, u8 buswidth, bool tx)
+{
+ u32 mode = mem->spi->mode;
+
+ switch (buswidth) {
+ case 1:
+ return 0;
+
+ case 2:
+ if ((tx && (mode & (SPI_TX_DUAL | SPI_TX_QUAD))) ||
+ (!tx && (mode & (SPI_RX_DUAL | SPI_RX_QUAD))))
+ return 0;
+
+ break;
+
+ case 4:
+ if ((tx && (mode & SPI_TX_QUAD)) ||
+ (!tx && (mode & SPI_RX_QUAD)))
+ return 0;
+
+ break;
+
+ default:
+ break;
+ }
+
+ return -ENOTSUPP;
+}
+
+static bool spi_mem_default_supports_op(struct spi_mem *mem,
+ const struct spi_mem_op *op)
+{
+ if (spi_check_buswidth_req(mem, op->cmd.buswidth, true))
+ return false;
+
+ if (op->addr.nbytes &&
+ spi_check_buswidth_req(mem, op->addr.buswidth, true))
+ return false;
+
+ if (op->dummy.nbytes &&
+ spi_check_buswidth_req(mem, op->dummy.buswidth, true))
+ return false;
+
+ if (op->data.nbytes &&
+ spi_check_buswidth_req(mem, op->data.buswidth,
+ op->data.dir == SPI_MEM_DATA_OUT))
+ return false;
+
+ return true;
+}
+EXPORT_SYMBOL_GPL(spi_mem_default_supports_op);
+
+/**
+ * spi_mem_supports_op() - Check if a memory device and the controller it is
+ * connected to support a specific memory operation
+ * @mem: the SPI memory
+ * @op: the memory operation to check
+ *
+ * Some controllers are only supporting Single or Dual IOs, others might only
+ * support specific opcodes, or it can even be that the controller and device
+ * both support Quad IOs but the hardware prevents you from using it because
+ * only 2 IO lines are connected.
+ *
+ * This function checks whether a specific operation is supported.
+ *
+ * Return: true if @op is supported, false otherwise.
+ */
+bool spi_mem_supports_op(struct spi_mem *mem, const struct spi_mem_op *op)
+{
+ struct spi_controller *ctlr = mem->spi->controller;
+
+ if (ctlr->mem_ops && ctlr->mem_ops->supports_op)
+ return ctlr->mem_ops->supports_op(mem, op);
+
+ return spi_mem_default_supports_op(mem, op);
+}
+EXPORT_SYMBOL_GPL(spi_mem_supports_op);
+
+/**
+ * spi_mem_exec_op() - Execute a memory operation
+ * @mem: the SPI memory
+ * @op: the memory operation to execute
+ *
+ * Executes a memory operation.
+ *
+ * This function first checks that @op is supported and then tries to execute
+ * it.
+ *
+ * Return: 0 in case of success, a negative error code otherwise.
+ */
+int spi_mem_exec_op(struct spi_mem *mem, const struct spi_mem_op *op)
+{
+ unsigned int tmpbufsize, xferpos = 0, totalxferlen = 0;
+ struct spi_controller *ctlr = mem->spi->controller;
+ struct spi_transfer xfers[4] = { };
+ struct spi_message msg;
+ u8 *tmpbuf;
+ int ret;
+
+ if (!spi_mem_supports_op(mem, op))
+ return -ENOTSUPP;
+
+ if (ctlr->mem_ops) {
+ /*
+ * Flush the message queue before executing our SPI memory
+ * operation to prevent preemption of regular SPI transfers.
+ */
+ spi_flush_queue(ctlr);
+
+ if (ctlr->auto_runtime_pm) {
+ ret = pm_runtime_get_sync(ctlr->dev.parent);
+ if (ret < 0) {
+ dev_err(&ctlr->dev,
+ "Failed to power device: %d\n",
+ ret);
+ return ret;
+ }
+ }
+
+ mutex_lock(&ctlr->bus_lock_mutex);
+ mutex_lock(&ctlr->io_mutex);
+ ret = ctlr->mem_ops->exec_op(mem, op);
+ mutex_unlock(&ctlr->io_mutex);
+ mutex_unlock(&ctlr->bus_lock_mutex);
+
+ if (ctlr->auto_runtime_pm)
+ pm_runtime_put(ctlr->dev.parent);
+
+ /*
+ * Some controllers only optimize specific paths (typically the
+ * read path) and expect the core to use the regular SPI
+ * interface in other cases.
+ */
+ if (!ret || ret != -ENOTSUPP)
+ return ret;
+ }
+
+ tmpbufsize = sizeof(op->cmd.opcode) + op->addr.nbytes +
+ op->dummy.nbytes;
+
+ /*
+ * Allocate a buffer to transmit the CMD, ADDR cycles with kmalloc() so
+ * we're guaranteed that this buffer is DMA-able, as required by the
+ * SPI layer.
+ */
+ tmpbuf = kzalloc(tmpbufsize, GFP_KERNEL | GFP_DMA);
+ if (!tmpbuf)
+ return -ENOMEM;
+
+ spi_message_init(&msg);
+
+ tmpbuf[0] = op->cmd.opcode;
+ xfers[xferpos].tx_buf = tmpbuf;
+ xfers[xferpos].len = sizeof(op->cmd.opcode);
+ xfers[xferpos].tx_nbits = op->cmd.buswidth;
+ spi_message_add_tail(&xfers[xferpos], &msg);
+ xferpos++;
+ totalxferlen++;
+
+ if (op->addr.nbytes) {
+ int i;
+
+ for (i = 0; i < op->addr.nbytes; i++)
+ tmpbuf[i + 1] = op->addr.val >>
+ (8 * (op->addr.nbytes - i - 1));
+
+ xfers[xferpos].tx_buf = tmpbuf + 1;
+ xfers[xferpos].len = op->addr.nbytes;
+ xfers[xferpos].tx_nbits = op->addr.buswidth;
+ spi_message_add_tail(&xfers[xferpos], &msg);
+ xferpos++;
+ totalxferlen += op->addr.nbytes;
+ }
+
+ if (op->dummy.nbytes) {
+ memset(tmpbuf + op->addr.nbytes + 1, 0xff, op->dummy.nbytes);
+ xfers[xferpos].tx_buf = tmpbuf + op->addr.nbytes + 1;
+ xfers[xferpos].len = op->dummy.nbytes;
+ xfers[xferpos].tx_nbits = op->dummy.buswidth;
+ spi_message_add_tail(&xfers[xferpos], &msg);
+ xferpos++;
+ totalxferlen += op->dummy.nbytes;
+ }
+
+ if (op->data.nbytes) {
+ if (op->data.dir == SPI_MEM_DATA_IN) {
+ xfers[xferpos].rx_buf = op->data.buf.in;
+ xfers[xferpos].rx_nbits = op->data.buswidth;
+ } else {
+ xfers[xferpos].tx_buf = op->data.buf.out;
+ xfers[xferpos].tx_nbits = op->data.buswidth;
+ }
+
+ xfers[xferpos].len = op->data.nbytes;
+ spi_message_add_tail(&xfers[xferpos], &msg);
+ xferpos++;
+ totalxferlen += op->data.nbytes;
+ }
+
+ ret = spi_sync(mem->spi, &msg);
+
+ kfree(tmpbuf);
+
+ if (ret)
+ return ret;
+
+ if (msg.actual_length != totalxferlen)
+ return -EIO;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(spi_mem_exec_op);
+
+/**
+ * spi_mem_adjust_op_size() - Adjust the data size of a SPI mem operation to
+ * match controller limitations
+ * @mem: the SPI memory
+ * @op: the operation to adjust
+ *
+ * Some controllers have FIFO limitations and must split a data transfer
+ * operation into multiple ones, others require a specific alignment for
+ * optimized accesses. This function allows SPI mem drivers to split a single
+ * operation into multiple sub-operations when required.
+ *
+ * Return: a negative error code if the controller can't properly adjust @op,
+ * 0 otherwise. Note that @op->data.nbytes will be updated if @op
+ * can't be handled in a single step.
+ */
+int spi_mem_adjust_op_size(struct spi_mem *mem, struct spi_mem_op *op)
+{
+ struct spi_controller *ctlr = mem->spi->controller;
+
+ if (ctlr->mem_ops && ctlr->mem_ops->adjust_op_size)
+ return ctlr->mem_ops->adjust_op_size(mem, op);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(spi_mem_adjust_op_size);
+
+static inline struct spi_mem_driver *to_spi_mem_drv(struct device_driver *drv)
+{
+ return container_of(drv, struct spi_mem_driver, spidrv.driver);
+}
+
+static int spi_mem_probe(struct spi_device *spi)
+{
+ struct spi_mem_driver *memdrv = to_spi_mem_drv(spi->dev.driver);
+ struct spi_mem *mem;
+
+ mem = devm_kzalloc(&spi->dev, sizeof(*mem), GFP_KERNEL);
+ if (!mem)
+ return -ENOMEM;
+
+ mem->spi = spi;
+ spi_set_drvdata(spi, mem);
+
+ return memdrv->probe(mem);
+}
+
+static int spi_mem_remove(struct spi_device *spi)
+{
+ struct spi_mem_driver *memdrv = to_spi_mem_drv(spi->dev.driver);
+ struct spi_mem *mem = spi_get_drvdata(spi);
+
+ if (memdrv->remove)
+ return memdrv->remove(mem);
+
+ return 0;
+}
+
+static void spi_mem_shutdown(struct spi_device *spi)
+{
+ struct spi_mem_driver *memdrv = to_spi_mem_drv(spi->dev.driver);
+ struct spi_mem *mem = spi_get_drvdata(spi);
+
+ if (memdrv->shutdown)
+ memdrv->shutdown(mem);
+}
+
+/**
+ * spi_mem_driver_register_with_owner() - Register a SPI memory driver
+ * @memdrv: the SPI memory driver to register
+ * @owner: the owner of this driver
+ *
+ * Registers a SPI memory driver.
+ *
+ * Return: 0 in case of success, a negative error core otherwise.
+ */
+
+int spi_mem_driver_register_with_owner(struct spi_mem_driver *memdrv,
+ struct module *owner)
+{
+ memdrv->spidrv.probe = spi_mem_probe;
+ memdrv->spidrv.remove = spi_mem_remove;
+ memdrv->spidrv.shutdown = spi_mem_shutdown;
+
+ return __spi_register_driver(owner, &memdrv->spidrv);
+}
+EXPORT_SYMBOL_GPL(spi_mem_driver_register_with_owner);
+
+/**
+ * spi_mem_driver_unregister_with_owner() - Unregister a SPI memory driver
+ * @memdrv: the SPI memory driver to unregister
+ *
+ * Unregisters a SPI memory driver.
+ */
+void spi_mem_driver_unregister(struct spi_mem_driver *memdrv)
+{
+ spi_unregister_driver(&memdrv->spidrv);
+}
+EXPORT_SYMBOL_GPL(spi_mem_driver_unregister);
diff --git a/drivers/spi/spi-meson-spicc.c b/drivers/spi/spi-meson-spicc.c
index 5c82910e3480..7fe4488ace57 100644
--- a/drivers/spi/spi-meson-spicc.c
+++ b/drivers/spi/spi-meson-spicc.c
@@ -574,10 +574,15 @@ static int meson_spicc_probe(struct platform_device *pdev)
master->max_speed_hz = rate >> 2;
ret = devm_spi_register_master(&pdev->dev, master);
- if (!ret)
- return 0;
+ if (ret) {
+ dev_err(&pdev->dev, "spi master registration failed\n");
+ goto out_clk;
+ }
- dev_err(&pdev->dev, "spi master registration failed\n");
+ return 0;
+
+out_clk:
+ clk_disable_unprepare(spicc->core);
out_master:
spi_master_put(master);
diff --git a/drivers/spi/spi-mpc52xx.c b/drivers/spi/spi-mpc52xx.c
index e8b59ce4dc3a..0e55784a3ad9 100644
--- a/drivers/spi/spi-mpc52xx.c
+++ b/drivers/spi/spi-mpc52xx.c
@@ -447,7 +447,7 @@ static int mpc52xx_spi_probe(struct platform_device *op)
for (i = 0; i < ms->gpio_cs_count; i++) {
gpio_cs = of_get_gpio(op->dev.of_node, i);
- if (gpio_cs < 0) {
+ if (!gpio_is_valid(gpio_cs)) {
dev_err(&op->dev,
"could not parse the gpio field in oftree\n");
rc = -ENODEV;
diff --git a/drivers/spi/spi-mxs.c b/drivers/spi/spi-mxs.c
index 3d216b950b41..6ac95a2a21ce 100644
--- a/drivers/spi/spi-mxs.c
+++ b/drivers/spi/spi-mxs.c
@@ -1,32 +1,22 @@
-/*
- * Freescale MXS SPI master driver
- *
- * Copyright 2012 DENX Software Engineering, GmbH.
- * Copyright 2012 Freescale Semiconductor, Inc.
- * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved.
- *
- * Rework and transition to new API by:
- * Marek Vasut <marex@denx.de>
- *
- * Based on previous attempt by:
- * Fabio Estevam <fabio.estevam@freescale.com>
- *
- * Based on code from U-Boot bootloader by:
- * Marek Vasut <marex@denx.de>
- *
- * Based on spi-stmp.c, which is:
- * Author: Dmitry Pervushin <dimka@embeddedalley.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- */
+// SPDX-License-Identifier: GPL-2.0+
+//
+// Freescale MXS SPI master driver
+//
+// Copyright 2012 DENX Software Engineering, GmbH.
+// Copyright 2012 Freescale Semiconductor, Inc.
+// Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved.
+//
+// Rework and transition to new API by:
+// Marek Vasut <marex@denx.de>
+//
+// Based on previous attempt by:
+// Fabio Estevam <fabio.estevam@freescale.com>
+//
+// Based on code from U-Boot bootloader by:
+// Marek Vasut <marex@denx.de>
+//
+// Based on spi-stmp.c, which is:
+// Author: Dmitry Pervushin <dimka@embeddedalley.com>
#include <linux/kernel.h>
#include <linux/ioport.h>
diff --git a/drivers/spi/spi-omap2-mcspi.c b/drivers/spi/spi-omap2-mcspi.c
index 9bf64e6eca9b..6c628a54e946 100644
--- a/drivers/spi/spi-omap2-mcspi.c
+++ b/drivers/spi/spi-omap2-mcspi.c
@@ -255,6 +255,7 @@ static void omap2_mcspi_set_cs(struct spi_device *spi, bool enable)
if (spi->controller_state) {
int err = pm_runtime_get_sync(mcspi->dev);
if (err < 0) {
+ pm_runtime_put_noidle(mcspi->dev);
dev_err(mcspi->dev, "failed to get sync: %d\n", err);
return;
}
@@ -350,20 +351,6 @@ disable_fifo:
mcspi->fifo_depth = 0;
}
-static void omap2_mcspi_restore_ctx(struct omap2_mcspi *mcspi)
-{
- struct spi_master *spi_cntrl = mcspi->master;
- struct omap2_mcspi_regs *ctx = &mcspi->ctx;
- struct omap2_mcspi_cs *cs;
-
- /* McSPI: context restore */
- mcspi_write_reg(spi_cntrl, OMAP2_MCSPI_MODULCTRL, ctx->modulctrl);
- mcspi_write_reg(spi_cntrl, OMAP2_MCSPI_WAKEUPENABLE, ctx->wakeupenable);
-
- list_for_each_entry(cs, &ctx->cs, node)
- writel_relaxed(cs->chconf0, cs->base + OMAP2_MCSPI_CHCONF0);
-}
-
static int mcspi_wait_for_reg_bit(void __iomem *reg, unsigned long bit)
{
unsigned long timeout;
@@ -1065,8 +1052,11 @@ static int omap2_mcspi_setup(struct spi_device *spi)
}
ret = pm_runtime_get_sync(mcspi->dev);
- if (ret < 0)
+ if (ret < 0) {
+ pm_runtime_put_noidle(mcspi->dev);
+
return ret;
+ }
ret = omap2_mcspi_setup_transfer(spi, NULL);
pm_runtime_mark_last_busy(mcspi->dev);
@@ -1284,8 +1274,11 @@ static int omap2_mcspi_master_setup(struct omap2_mcspi *mcspi)
int ret = 0;
ret = pm_runtime_get_sync(mcspi->dev);
- if (ret < 0)
+ if (ret < 0) {
+ pm_runtime_put_noidle(mcspi->dev);
+
return ret;
+ }
mcspi_write_reg(master, OMAP2_MCSPI_WAKEUPENABLE,
OMAP2_MCSPI_WAKEUPENABLE_WKEN);
@@ -1297,14 +1290,39 @@ static int omap2_mcspi_master_setup(struct omap2_mcspi *mcspi)
return 0;
}
+/*
+ * When SPI wake up from off-mode, CS is in activate state. If it was in
+ * inactive state when driver was suspend, then force it to inactive state at
+ * wake up.
+ */
static int omap_mcspi_runtime_resume(struct device *dev)
{
- struct omap2_mcspi *mcspi;
- struct spi_master *master;
+ struct spi_master *master = dev_get_drvdata(dev);
+ struct omap2_mcspi *mcspi = spi_master_get_devdata(master);
+ struct omap2_mcspi_regs *ctx = &mcspi->ctx;
+ struct omap2_mcspi_cs *cs;
- master = dev_get_drvdata(dev);
- mcspi = spi_master_get_devdata(master);
- omap2_mcspi_restore_ctx(mcspi);
+ /* McSPI: context restore */
+ mcspi_write_reg(master, OMAP2_MCSPI_MODULCTRL, ctx->modulctrl);
+ mcspi_write_reg(master, OMAP2_MCSPI_WAKEUPENABLE, ctx->wakeupenable);
+
+ list_for_each_entry(cs, &ctx->cs, node) {
+ /*
+ * We need to toggle CS state for OMAP take this
+ * change in account.
+ */
+ if ((cs->chconf0 & OMAP2_MCSPI_CHCONF_FORCE) == 0) {
+ cs->chconf0 |= OMAP2_MCSPI_CHCONF_FORCE;
+ writel_relaxed(cs->chconf0,
+ cs->base + OMAP2_MCSPI_CHCONF0);
+ cs->chconf0 &= ~OMAP2_MCSPI_CHCONF_FORCE;
+ writel_relaxed(cs->chconf0,
+ cs->base + OMAP2_MCSPI_CHCONF0);
+ } else {
+ writel_relaxed(cs->chconf0,
+ cs->base + OMAP2_MCSPI_CHCONF0);
+ }
+ }
return 0;
}
@@ -1447,50 +1465,33 @@ static int omap2_mcspi_remove(struct platform_device *pdev)
MODULE_ALIAS("platform:omap2_mcspi");
#ifdef CONFIG_SUSPEND
-/*
- * When SPI wake up from off-mode, CS is in activate state. If it was in
- * unactive state when driver was suspend, then force it to unactive state at
- * wake up.
- */
-static int omap2_mcspi_resume(struct device *dev)
+static int omap2_mcspi_suspend_noirq(struct device *dev)
{
- struct spi_master *master = dev_get_drvdata(dev);
- struct omap2_mcspi *mcspi = spi_master_get_devdata(master);
- struct omap2_mcspi_regs *ctx = &mcspi->ctx;
- struct omap2_mcspi_cs *cs;
-
- pm_runtime_get_sync(mcspi->dev);
- list_for_each_entry(cs, &ctx->cs, node) {
- if ((cs->chconf0 & OMAP2_MCSPI_CHCONF_FORCE) == 0) {
- /*
- * We need to toggle CS state for OMAP take this
- * change in account.
- */
- cs->chconf0 |= OMAP2_MCSPI_CHCONF_FORCE;
- writel_relaxed(cs->chconf0, cs->base + OMAP2_MCSPI_CHCONF0);
- cs->chconf0 &= ~OMAP2_MCSPI_CHCONF_FORCE;
- writel_relaxed(cs->chconf0, cs->base + OMAP2_MCSPI_CHCONF0);
- }
- }
- pm_runtime_mark_last_busy(mcspi->dev);
- pm_runtime_put_autosuspend(mcspi->dev);
-
- return pinctrl_pm_select_default_state(dev);
+ return pinctrl_pm_select_sleep_state(dev);
}
-static int omap2_mcspi_suspend(struct device *dev)
+static int omap2_mcspi_resume_noirq(struct device *dev)
{
- return pinctrl_pm_select_sleep_state(dev);
+ struct spi_master *master = dev_get_drvdata(dev);
+ struct omap2_mcspi *mcspi = spi_master_get_devdata(master);
+ int error;
+
+ error = pinctrl_pm_select_default_state(dev);
+ if (error)
+ dev_warn(mcspi->dev, "%s: failed to set pins: %i\n",
+ __func__, error);
+
+ return 0;
}
#else
-#define omap2_mcspi_suspend NULL
-#define omap2_mcspi_resume NULL
+#define omap2_mcspi_suspend_noirq NULL
+#define omap2_mcspi_resume_noirq NULL
#endif
static const struct dev_pm_ops omap2_mcspi_pm_ops = {
- .resume = omap2_mcspi_resume,
- .suspend = omap2_mcspi_suspend,
+ .suspend_noirq = omap2_mcspi_suspend_noirq,
+ .resume_noirq = omap2_mcspi_resume_noirq,
.runtime_resume = omap_mcspi_runtime_resume,
};
diff --git a/drivers/spi/spi-pxa2xx-dma.c b/drivers/spi/spi-pxa2xx-dma.c
index 3d7f66080c57..2fa7f4b43492 100644
--- a/drivers/spi/spi-pxa2xx-dma.c
+++ b/drivers/spi/spi-pxa2xx-dma.c
@@ -51,19 +51,15 @@ static void pxa2xx_spi_dma_transfer_complete(struct driver_data *drv_data,
if (!pxa25x_ssp_comp(drv_data))
pxa2xx_spi_write(drv_data, SSTO, 0);
- if (!error) {
- msg->actual_length += drv_data->len;
- msg->state = pxa2xx_spi_next_transfer(drv_data);
- } else {
+ if (error) {
/* In case we got an error we disable the SSP now */
pxa2xx_spi_write(drv_data, SSCR0,
pxa2xx_spi_read(drv_data, SSCR0)
& ~SSCR0_SSE);
-
- msg->state = ERROR_STATE;
+ msg->status = -EIO;
}
- tasklet_schedule(&drv_data->pump_transfers);
+ spi_finalize_current_transfer(drv_data->master);
}
}
@@ -74,11 +70,11 @@ static void pxa2xx_spi_dma_callback(void *data)
static struct dma_async_tx_descriptor *
pxa2xx_spi_dma_prepare_one(struct driver_data *drv_data,
- enum dma_transfer_direction dir)
+ enum dma_transfer_direction dir,
+ struct spi_transfer *xfer)
{
struct chip_data *chip =
spi_get_ctldata(drv_data->master->cur_msg->spi);
- struct spi_transfer *xfer = drv_data->cur_transfer;
enum dma_slave_buswidth width;
struct dma_slave_config cfg;
struct dma_chan *chan;
@@ -144,12 +140,13 @@ irqreturn_t pxa2xx_spi_dma_transfer(struct driver_data *drv_data)
return IRQ_NONE;
}
-int pxa2xx_spi_dma_prepare(struct driver_data *drv_data, u32 dma_burst)
+int pxa2xx_spi_dma_prepare(struct driver_data *drv_data,
+ struct spi_transfer *xfer)
{
struct dma_async_tx_descriptor *tx_desc, *rx_desc;
int err;
- tx_desc = pxa2xx_spi_dma_prepare_one(drv_data, DMA_MEM_TO_DEV);
+ tx_desc = pxa2xx_spi_dma_prepare_one(drv_data, DMA_MEM_TO_DEV, xfer);
if (!tx_desc) {
dev_err(&drv_data->pdev->dev,
"failed to get DMA TX descriptor\n");
@@ -157,7 +154,7 @@ int pxa2xx_spi_dma_prepare(struct driver_data *drv_data, u32 dma_burst)
goto err_tx;
}
- rx_desc = pxa2xx_spi_dma_prepare_one(drv_data, DMA_DEV_TO_MEM);
+ rx_desc = pxa2xx_spi_dma_prepare_one(drv_data, DMA_DEV_TO_MEM, xfer);
if (!rx_desc) {
dev_err(&drv_data->pdev->dev,
"failed to get DMA RX descriptor\n");
@@ -187,6 +184,13 @@ void pxa2xx_spi_dma_start(struct driver_data *drv_data)
atomic_set(&drv_data->dma_running, 1);
}
+void pxa2xx_spi_dma_stop(struct driver_data *drv_data)
+{
+ atomic_set(&drv_data->dma_running, 0);
+ dmaengine_terminate_sync(drv_data->master->dma_rx);
+ dmaengine_terminate_sync(drv_data->master->dma_tx);
+}
+
int pxa2xx_spi_dma_setup(struct driver_data *drv_data)
{
struct pxa2xx_spi_master *pdata = drv_data->master_info;
diff --git a/drivers/spi/spi-pxa2xx.c b/drivers/spi/spi-pxa2xx.c
index 82dcb88fcfba..0b2d60d30f69 100644
--- a/drivers/spi/spi-pxa2xx.c
+++ b/drivers/spi/spi-pxa2xx.c
@@ -340,9 +340,11 @@ static void lpss_ssp_setup(struct driver_data *drv_data)
}
}
-static void lpss_ssp_select_cs(struct driver_data *drv_data,
+static void lpss_ssp_select_cs(struct spi_device *spi,
const struct lpss_config *config)
{
+ struct driver_data *drv_data =
+ spi_controller_get_devdata(spi->controller);
u32 value, cs;
if (!config->cs_sel_mask)
@@ -350,7 +352,7 @@ static void lpss_ssp_select_cs(struct driver_data *drv_data,
value = __lpss_ssp_read_priv(drv_data, config->reg_cs_ctrl);
- cs = drv_data->master->cur_msg->spi->chip_select;
+ cs = spi->chip_select;
cs <<= config->cs_sel_shift;
if (cs != (value & config->cs_sel_mask)) {
/*
@@ -369,15 +371,17 @@ static void lpss_ssp_select_cs(struct driver_data *drv_data,
}
}
-static void lpss_ssp_cs_control(struct driver_data *drv_data, bool enable)
+static void lpss_ssp_cs_control(struct spi_device *spi, bool enable)
{
+ struct driver_data *drv_data =
+ spi_controller_get_devdata(spi->controller);
const struct lpss_config *config;
u32 value;
config = lpss_get_config(drv_data);
if (enable)
- lpss_ssp_select_cs(drv_data, config);
+ lpss_ssp_select_cs(spi, config);
value = __lpss_ssp_read_priv(drv_data, config->reg_cs_ctrl);
if (enable)
@@ -387,10 +391,11 @@ static void lpss_ssp_cs_control(struct driver_data *drv_data, bool enable)
__lpss_ssp_write_priv(drv_data, config->reg_cs_ctrl, value);
}
-static void cs_assert(struct driver_data *drv_data)
+static void cs_assert(struct spi_device *spi)
{
- struct chip_data *chip =
- spi_get_ctldata(drv_data->master->cur_msg->spi);
+ struct chip_data *chip = spi_get_ctldata(spi);
+ struct driver_data *drv_data =
+ spi_controller_get_devdata(spi->controller);
if (drv_data->ssp_type == CE4100_SSP) {
pxa2xx_spi_write(drv_data, SSSR, chip->frm);
@@ -408,13 +413,14 @@ static void cs_assert(struct driver_data *drv_data)
}
if (is_lpss_ssp(drv_data))
- lpss_ssp_cs_control(drv_data, true);
+ lpss_ssp_cs_control(spi, true);
}
-static void cs_deassert(struct driver_data *drv_data)
+static void cs_deassert(struct spi_device *spi)
{
- struct chip_data *chip =
- spi_get_ctldata(drv_data->master->cur_msg->spi);
+ struct chip_data *chip = spi_get_ctldata(spi);
+ struct driver_data *drv_data =
+ spi_controller_get_devdata(spi->controller);
unsigned long timeout;
if (drv_data->ssp_type == CE4100_SSP)
@@ -437,7 +443,15 @@ static void cs_deassert(struct driver_data *drv_data)
}
if (is_lpss_ssp(drv_data))
- lpss_ssp_cs_control(drv_data, false);
+ lpss_ssp_cs_control(spi, false);
+}
+
+static void pxa2xx_spi_set_cs(struct spi_device *spi, bool level)
+{
+ if (level)
+ cs_deassert(spi);
+ else
+ cs_assert(spi);
}
int pxa2xx_spi_flush(struct driver_data *drv_data)
@@ -549,70 +563,6 @@ static int u32_reader(struct driver_data *drv_data)
return drv_data->rx == drv_data->rx_end;
}
-void *pxa2xx_spi_next_transfer(struct driver_data *drv_data)
-{
- struct spi_message *msg = drv_data->master->cur_msg;
- struct spi_transfer *trans = drv_data->cur_transfer;
-
- /* Move to next transfer */
- if (trans->transfer_list.next != &msg->transfers) {
- drv_data->cur_transfer =
- list_entry(trans->transfer_list.next,
- struct spi_transfer,
- transfer_list);
- return RUNNING_STATE;
- } else
- return DONE_STATE;
-}
-
-/* caller already set message->status; dma and pio irqs are blocked */
-static void giveback(struct driver_data *drv_data)
-{
- struct spi_transfer* last_transfer;
- struct spi_message *msg;
-
- msg = drv_data->master->cur_msg;
- drv_data->cur_transfer = NULL;
-
- last_transfer = list_last_entry(&msg->transfers, struct spi_transfer,
- transfer_list);
-
- /* Delay if requested before any change in chip select */
- if (last_transfer->delay_usecs)
- udelay(last_transfer->delay_usecs);
-
- /* Drop chip select UNLESS cs_change is true or we are returning
- * a message with an error, or next message is for another chip
- */
- if (!last_transfer->cs_change)
- cs_deassert(drv_data);
- else {
- struct spi_message *next_msg;
-
- /* Holding of cs was hinted, but we need to make sure
- * the next message is for the same chip. Don't waste
- * time with the following tests unless this was hinted.
- *
- * We cannot postpone this until pump_messages, because
- * after calling msg->complete (below) the driver that
- * sent the current message could be unloaded, which
- * could invalidate the cs_control() callback...
- */
-
- /* get a pointer to the next message, if any */
- next_msg = spi_get_next_queued_message(drv_data->master);
-
- /* see if the next and current messages point
- * to the same chip
- */
- if ((next_msg && next_msg->spi != msg->spi) ||
- msg->state == ERROR_STATE)
- cs_deassert(drv_data);
- }
-
- spi_finalize_current_message(drv_data->master);
-}
-
static void reset_sccr1(struct driver_data *drv_data)
{
struct chip_data *chip =
@@ -648,8 +598,8 @@ static void int_error_stop(struct driver_data *drv_data, const char* msg)
dev_err(&drv_data->pdev->dev, "%s\n", msg);
- drv_data->master->cur_msg->state = ERROR_STATE;
- tasklet_schedule(&drv_data->pump_transfers);
+ drv_data->master->cur_msg->status = -EIO;
+ spi_finalize_current_transfer(drv_data->master);
}
static void int_transfer_complete(struct driver_data *drv_data)
@@ -660,19 +610,7 @@ static void int_transfer_complete(struct driver_data *drv_data)
if (!pxa25x_ssp_comp(drv_data))
pxa2xx_spi_write(drv_data, SSTO, 0);
- /* Update total byte transferred return count actual bytes read */
- drv_data->master->cur_msg->actual_length += drv_data->len -
- (drv_data->rx_end - drv_data->rx);
-
- /* Transfer delays and chip select release are
- * handled in pump_transfers or giveback
- */
-
- /* Move to next transfer */
- drv_data->master->cur_msg->state = pxa2xx_spi_next_transfer(drv_data);
-
- /* Schedule transfer tasklet */
- tasklet_schedule(&drv_data->pump_transfers);
+ spi_finalize_current_transfer(drv_data->master);
}
static irqreturn_t interrupt_transfer(struct driver_data *drv_data)
@@ -973,17 +911,16 @@ static bool pxa2xx_spi_can_dma(struct spi_controller *master,
xfer->len >= chip->dma_burst_size;
}
-static void pump_transfers(unsigned long data)
+static int pxa2xx_spi_transfer_one(struct spi_controller *master,
+ struct spi_device *spi,
+ struct spi_transfer *transfer)
{
- struct driver_data *drv_data = (struct driver_data *)data;
- struct spi_controller *master = drv_data->master;
+ struct driver_data *drv_data = spi_controller_get_devdata(master);
struct spi_message *message = master->cur_msg;
struct chip_data *chip = spi_get_ctldata(message->spi);
u32 dma_thresh = chip->dma_threshold;
u32 dma_burst = chip->dma_burst_size;
u32 change_mask = pxa2xx_spi_get_ssrc1_change_mask(drv_data);
- struct spi_transfer *transfer;
- struct spi_transfer *previous;
u32 clk_div;
u8 bits;
u32 speed;
@@ -992,36 +929,6 @@ static void pump_transfers(unsigned long data)
int err;
int dma_mapped;
- /* Get current state information */
- transfer = drv_data->cur_transfer;
-
- /* Handle for abort */
- if (message->state == ERROR_STATE) {
- message->status = -EIO;
- giveback(drv_data);
- return;
- }
-
- /* Handle end of message */
- if (message->state == DONE_STATE) {
- message->status = 0;
- giveback(drv_data);
- return;
- }
-
- /* Delay if requested at end of transfer before CS change */
- if (message->state == RUNNING_STATE) {
- previous = list_entry(transfer->transfer_list.prev,
- struct spi_transfer,
- transfer_list);
- if (previous->delay_usecs)
- udelay(previous->delay_usecs);
-
- /* Drop chip select only if cs_change is requested */
- if (previous->cs_change)
- cs_deassert(drv_data);
- }
-
/* Check if we can DMA this transfer */
if (transfer->len > MAX_DMA_LEN && chip->enable_dma) {
@@ -1029,34 +936,27 @@ static void pump_transfers(unsigned long data)
if (message->is_dma_mapped
|| transfer->rx_dma || transfer->tx_dma) {
dev_err(&drv_data->pdev->dev,
- "pump_transfers: mapped transfer length of "
- "%u is greater than %d\n",
+ "Mapped transfer length of %u is greater than %d\n",
transfer->len, MAX_DMA_LEN);
- message->status = -EINVAL;
- giveback(drv_data);
- return;
+ return -EINVAL;
}
/* warn ... we force this to PIO mode */
dev_warn_ratelimited(&message->spi->dev,
- "pump_transfers: DMA disabled for transfer length %ld "
- "greater than %d\n",
- (long)drv_data->len, MAX_DMA_LEN);
+ "DMA disabled for transfer length %ld greater than %d\n",
+ (long)transfer->len, MAX_DMA_LEN);
}
/* Setup the transfer state based on the type of transfer */
if (pxa2xx_spi_flush(drv_data) == 0) {
- dev_err(&drv_data->pdev->dev, "pump_transfers: flush failed\n");
- message->status = -EIO;
- giveback(drv_data);
- return;
+ dev_err(&drv_data->pdev->dev, "Flush failed\n");
+ return -EIO;
}
drv_data->n_bytes = chip->n_bytes;
drv_data->tx = (void *)transfer->tx_buf;
drv_data->tx_end = drv_data->tx + transfer->len;
drv_data->rx = transfer->rx_buf;
drv_data->rx_end = drv_data->rx + transfer->len;
- drv_data->len = transfer->len;
drv_data->write = drv_data->tx ? chip->write : null_writer;
drv_data->read = drv_data->rx ? chip->read : null_reader;
@@ -1095,11 +995,9 @@ static void pump_transfers(unsigned long data)
bits, &dma_burst,
&dma_thresh))
dev_warn_ratelimited(&message->spi->dev,
- "pump_transfers: DMA burst size reduced to match bits_per_word\n");
+ "DMA burst size reduced to match bits_per_word\n");
}
- message->state = RUNNING_STATE;
-
dma_mapped = master->can_dma &&
master->can_dma(master, message->spi, transfer) &&
master->cur_msg_mapped;
@@ -1108,12 +1006,9 @@ static void pump_transfers(unsigned long data)
/* Ensure we have the correct interrupt handler */
drv_data->transfer_handler = pxa2xx_spi_dma_transfer;
- err = pxa2xx_spi_dma_prepare(drv_data, dma_burst);
- if (err) {
- message->status = err;
- giveback(drv_data);
- return;
- }
+ err = pxa2xx_spi_dma_prepare(drv_data, transfer);
+ if (err)
+ return err;
/* Clear status and start DMA engine */
cr1 = chip->cr1 | dma_thresh | drv_data->dma_cr1;
@@ -1175,27 +1070,40 @@ static void pump_transfers(unsigned long data)
pxa2xx_spi_write(drv_data, SSTO, chip->timeout);
}
- cs_assert(drv_data);
-
- /* after chip select, release the data by enabling service
- * requests and interrupts, without changing any mode bits */
+ /*
+ * Release the data by enabling service requests and interrupts,
+ * without changing any mode bits
+ */
pxa2xx_spi_write(drv_data, SSCR1, cr1);
+
+ return 1;
}
-static int pxa2xx_spi_transfer_one_message(struct spi_controller *master,
- struct spi_message *msg)
+static void pxa2xx_spi_handle_err(struct spi_controller *master,
+ struct spi_message *msg)
{
struct driver_data *drv_data = spi_controller_get_devdata(master);
- /* Initial message state*/
- msg->state = START_STATE;
- drv_data->cur_transfer = list_entry(msg->transfers.next,
- struct spi_transfer,
- transfer_list);
+ /* Disable the SSP */
+ pxa2xx_spi_write(drv_data, SSCR0,
+ pxa2xx_spi_read(drv_data, SSCR0) & ~SSCR0_SSE);
+ /* Clear and disable interrupts and service requests */
+ write_SSSR_CS(drv_data, drv_data->clear_sr);
+ pxa2xx_spi_write(drv_data, SSCR1,
+ pxa2xx_spi_read(drv_data, SSCR1)
+ & ~(drv_data->int_cr1 | drv_data->dma_cr1));
+ if (!pxa25x_ssp_comp(drv_data))
+ pxa2xx_spi_write(drv_data, SSTO, 0);
- /* Mark as busy and launch transfers */
- tasklet_schedule(&drv_data->pump_transfers);
- return 0;
+ /*
+ * Stop the DMA if running. Note DMA callback handler may have unset
+ * the dma_running already, which is fine as stopping is not needed
+ * then but we shouldn't rely this flag for anything else than
+ * stopping. For instance to differentiate between PIO and DMA
+ * transfers.
+ */
+ if (atomic_read(&drv_data->dma_running))
+ pxa2xx_spi_dma_stop(drv_data);
}
static int pxa2xx_spi_unprepare_transfer(struct spi_controller *master)
@@ -1651,7 +1559,9 @@ static int pxa2xx_spi_probe(struct platform_device *pdev)
master->dma_alignment = DMA_ALIGNMENT;
master->cleanup = cleanup;
master->setup = setup;
- master->transfer_one_message = pxa2xx_spi_transfer_one_message;
+ master->set_cs = pxa2xx_spi_set_cs;
+ master->transfer_one = pxa2xx_spi_transfer_one;
+ master->handle_err = pxa2xx_spi_handle_err;
master->unprepare_transfer_hardware = pxa2xx_spi_unprepare_transfer;
master->fw_translate_cs = pxa2xx_spi_fw_translate_cs;
master->auto_runtime_pm = true;
@@ -1702,7 +1612,9 @@ static int pxa2xx_spi_probe(struct platform_device *pdev)
}
/* Enable SOC clock */
- clk_prepare_enable(ssp->clk);
+ status = clk_prepare_enable(ssp->clk);
+ if (status)
+ goto out_error_dma_irq_alloc;
master->max_speed_hz = clk_get_rate(ssp->clk);
@@ -1787,9 +1699,6 @@ static int pxa2xx_spi_probe(struct platform_device *pdev)
}
}
- tasklet_init(&drv_data->pump_transfers, pump_transfers,
- (unsigned long)drv_data);
-
pm_runtime_set_autosuspend_delay(&pdev->dev, 50);
pm_runtime_use_autosuspend(&pdev->dev);
pm_runtime_set_active(&pdev->dev);
@@ -1809,6 +1718,8 @@ out_error_clock_enabled:
pm_runtime_put_noidle(&pdev->dev);
pm_runtime_disable(&pdev->dev);
clk_disable_unprepare(ssp->clk);
+
+out_error_dma_irq_alloc:
pxa2xx_spi_dma_release(drv_data);
free_irq(ssp->irq, drv_data);
@@ -1882,8 +1793,11 @@ static int pxa2xx_spi_resume(struct device *dev)
int status;
/* Enable the SSP clock */
- if (!pm_runtime_suspended(dev))
- clk_prepare_enable(ssp->clk);
+ if (!pm_runtime_suspended(dev)) {
+ status = clk_prepare_enable(ssp->clk);
+ if (status)
+ return status;
+ }
/* Restore LPSS private register bits */
if (is_lpss_ssp(drv_data))
@@ -1912,9 +1826,10 @@ static int pxa2xx_spi_runtime_suspend(struct device *dev)
static int pxa2xx_spi_runtime_resume(struct device *dev)
{
struct driver_data *drv_data = dev_get_drvdata(dev);
+ int status;
- clk_prepare_enable(drv_data->ssp->clk);
- return 0;
+ status = clk_prepare_enable(drv_data->ssp->clk);
+ return status;
}
#endif
diff --git a/drivers/spi/spi-pxa2xx.h b/drivers/spi/spi-pxa2xx.h
index 513ec6c6e25b..513c53aaeab2 100644
--- a/drivers/spi/spi-pxa2xx.h
+++ b/drivers/spi/spi-pxa2xx.h
@@ -38,7 +38,7 @@ struct driver_data {
/* SSP register addresses */
void __iomem *ioaddr;
- u32 ssdr_physical;
+ phys_addr_t ssdr_physical;
/* SSP masks*/
u32 dma_cr1;
@@ -46,15 +46,10 @@ struct driver_data {
u32 clear_sr;
u32 mask_sr;
- /* Message Transfer pump */
- struct tasklet_struct pump_transfers;
-
/* DMA engine support */
atomic_t dma_running;
- /* Current message transfer state info */
- struct spi_transfer *cur_transfer;
- size_t len;
+ /* Current transfer state info */
void *tx;
void *tx_end;
void *rx;
@@ -104,11 +99,6 @@ static inline void pxa2xx_spi_write(const struct driver_data *drv_data,
__raw_writel(val, drv_data->ioaddr + reg);
}
-#define START_STATE ((void *)0)
-#define RUNNING_STATE ((void *)1)
-#define DONE_STATE ((void *)2)
-#define ERROR_STATE ((void *)-1)
-
#define DMA_ALIGNMENT 8
static inline int pxa25x_ssp_comp(struct driver_data *drv_data)
@@ -133,14 +123,15 @@ static inline void write_SSSR_CS(struct driver_data *drv_data, u32 val)
}
extern int pxa2xx_spi_flush(struct driver_data *drv_data);
-extern void *pxa2xx_spi_next_transfer(struct driver_data *drv_data);
#define MAX_DMA_LEN SZ_64K
#define DEFAULT_DMA_CR1 (SSCR1_TSRE | SSCR1_RSRE | SSCR1_TRAIL)
extern irqreturn_t pxa2xx_spi_dma_transfer(struct driver_data *drv_data);
-extern int pxa2xx_spi_dma_prepare(struct driver_data *drv_data, u32 dma_burst);
+extern int pxa2xx_spi_dma_prepare(struct driver_data *drv_data,
+ struct spi_transfer *xfer);
extern void pxa2xx_spi_dma_start(struct driver_data *drv_data);
+extern void pxa2xx_spi_dma_stop(struct driver_data *drv_data);
extern int pxa2xx_spi_dma_setup(struct driver_data *drv_data);
extern void pxa2xx_spi_dma_release(struct driver_data *drv_data);
extern int pxa2xx_spi_set_dma_burst_and_threshold(struct chip_data *chip,
diff --git a/drivers/spi/spi-s3c64xx.c b/drivers/spi/spi-s3c64xx.c
index baa3a9fa2638..7b7151ec14c8 100644
--- a/drivers/spi/spi-s3c64xx.c
+++ b/drivers/spi/spi-s3c64xx.c
@@ -28,15 +28,15 @@
#define S3C64XX_SPI_CH_CFG 0x00
#define S3C64XX_SPI_CLK_CFG 0x04
-#define S3C64XX_SPI_MODE_CFG 0x08
-#define S3C64XX_SPI_SLAVE_SEL 0x0C
+#define S3C64XX_SPI_MODE_CFG 0x08
+#define S3C64XX_SPI_SLAVE_SEL 0x0C
#define S3C64XX_SPI_INT_EN 0x10
#define S3C64XX_SPI_STATUS 0x14
#define S3C64XX_SPI_TX_DATA 0x18
#define S3C64XX_SPI_RX_DATA 0x1C
-#define S3C64XX_SPI_PACKET_CNT 0x20
-#define S3C64XX_SPI_PENDING_CLR 0x24
-#define S3C64XX_SPI_SWAP_CFG 0x28
+#define S3C64XX_SPI_PACKET_CNT 0x20
+#define S3C64XX_SPI_PENDING_CLR 0x24
+#define S3C64XX_SPI_SWAP_CFG 0x28
#define S3C64XX_SPI_FB_CLK 0x2C
#define S3C64XX_SPI_CH_HS_EN (1<<6) /* High Speed Enable */
@@ -77,9 +77,9 @@
#define S3C64XX_SPI_INT_TX_FIFORDY_EN (1<<0)
#define S3C64XX_SPI_ST_RX_OVERRUN_ERR (1<<5)
-#define S3C64XX_SPI_ST_RX_UNDERRUN_ERR (1<<4)
+#define S3C64XX_SPI_ST_RX_UNDERRUN_ERR (1<<4)
#define S3C64XX_SPI_ST_TX_OVERRUN_ERR (1<<3)
-#define S3C64XX_SPI_ST_TX_UNDERRUN_ERR (1<<2)
+#define S3C64XX_SPI_ST_TX_UNDERRUN_ERR (1<<2)
#define S3C64XX_SPI_ST_RX_FIFORDY (1<<1)
#define S3C64XX_SPI_ST_TX_FIFORDY (1<<0)
@@ -100,7 +100,7 @@
#define S3C64XX_SPI_SWAP_TX_BIT (1<<1)
#define S3C64XX_SPI_SWAP_TX_EN (1<<0)
-#define S3C64XX_SPI_FBCLK_MSK (3<<0)
+#define S3C64XX_SPI_FBCLK_MSK (3<<0)
#define FIFO_LVL_MASK(i) ((i)->port_conf->fifo_lvl_mask[i->port_id])
#define S3C64XX_SPI_ST_TX_DONE(v, i) (((v) & \
@@ -156,7 +156,6 @@ struct s3c64xx_spi_port_config {
* @ioclk: Pointer to the i/o clock between master and slave
* @master: Pointer to the SPI Protocol master.
* @cntrlr_info: Platform specific data for the controller this driver manages.
- * @tgl_spi: Pointer to the last CS left untoggled by the cs_change hint.
* @lock: Controller specific lock.
* @state: Set of FLAGS to indicate status.
* @rx_dmach: Controller's DMA channel for Rx.
@@ -177,7 +176,6 @@ struct s3c64xx_spi_driver_data {
struct platform_device *pdev;
struct spi_master *master;
struct s3c64xx_spi_info *cntrlr_info;
- struct spi_device *tgl_spi;
spinlock_t lock;
unsigned long sfr_start;
struct completion xfer_completion;
@@ -190,7 +188,7 @@ struct s3c64xx_spi_driver_data {
unsigned int port_id;
};
-static void flush_fifo(struct s3c64xx_spi_driver_data *sdd)
+static void s3c64xx_flush_fifo(struct s3c64xx_spi_driver_data *sdd)
{
void __iomem *regs = sdd->regs;
unsigned long loops;
@@ -350,9 +348,8 @@ static bool s3c64xx_spi_can_dma(struct spi_master *master,
return xfer->len > (FIFO_LVL_MASK(sdd) >> 1) + 1;
}
-static void enable_datapath(struct s3c64xx_spi_driver_data *sdd,
- struct spi_device *spi,
- struct spi_transfer *xfer, int dma_mode)
+static void s3c64xx_enable_datapath(struct s3c64xx_spi_driver_data *sdd,
+ struct spi_transfer *xfer, int dma_mode)
{
void __iomem *regs = sdd->regs;
u32 modecfg, chcfg;
@@ -442,8 +439,8 @@ static u32 s3c64xx_spi_wait_for_timeout(struct s3c64xx_spi_driver_data *sdd,
return RX_FIFO_LVL(status, sdd);
}
-static int wait_for_dma(struct s3c64xx_spi_driver_data *sdd,
- struct spi_transfer *xfer)
+static int s3c64xx_wait_for_dma(struct s3c64xx_spi_driver_data *sdd,
+ struct spi_transfer *xfer)
{
void __iomem *regs = sdd->regs;
unsigned long val;
@@ -485,8 +482,8 @@ static int wait_for_dma(struct s3c64xx_spi_driver_data *sdd,
return 0;
}
-static int wait_for_pio(struct s3c64xx_spi_driver_data *sdd,
- struct spi_transfer *xfer)
+static int s3c64xx_wait_for_pio(struct s3c64xx_spi_driver_data *sdd,
+ struct spi_transfer *xfer)
{
void __iomem *regs = sdd->regs;
unsigned long val;
@@ -505,6 +502,8 @@ static int wait_for_pio(struct s3c64xx_spi_driver_data *sdd,
status = readl(regs + S3C64XX_SPI_STATUS);
} while (RX_FIFO_LVL(status, sdd) < xfer->len && --val);
+ if (!val)
+ return -EIO;
/* If it was only Tx */
if (!xfer->rx_buf) {
@@ -635,11 +634,15 @@ static int s3c64xx_spi_transfer_one(struct spi_master *master,
struct spi_transfer *xfer)
{
struct s3c64xx_spi_driver_data *sdd = spi_master_get_devdata(master);
+ const unsigned int fifo_len = (FIFO_LVL_MASK(sdd) >> 1) + 1;
+ const void *tx_buf = NULL;
+ void *rx_buf = NULL;
+ int target_len = 0, origin_len = 0;
+ int use_dma = 0;
int status;
u32 speed;
u8 bpw;
unsigned long flags;
- int use_dma;
reinit_completion(&sdd->xfer_completion);
@@ -654,48 +657,77 @@ static int s3c64xx_spi_transfer_one(struct spi_master *master,
s3c64xx_spi_config(sdd);
}
- /* Polling method for xfers not bigger than FIFO capacity */
- use_dma = 0;
- if (!is_polling(sdd) &&
- (sdd->rx_dma.ch && sdd->tx_dma.ch &&
- (xfer->len > ((FIFO_LVL_MASK(sdd) >> 1) + 1))))
+ if (!is_polling(sdd) && (xfer->len > fifo_len) &&
+ sdd->rx_dma.ch && sdd->tx_dma.ch) {
use_dma = 1;
- spin_lock_irqsave(&sdd->lock, flags);
+ } else if (is_polling(sdd) && xfer->len > fifo_len) {
+ tx_buf = xfer->tx_buf;
+ rx_buf = xfer->rx_buf;
+ origin_len = xfer->len;
- /* Pending only which is to be done */
- sdd->state &= ~RXBUSY;
- sdd->state &= ~TXBUSY;
+ target_len = xfer->len;
+ if (xfer->len > fifo_len)
+ xfer->len = fifo_len;
+ }
- enable_datapath(sdd, spi, xfer, use_dma);
+ do {
+ spin_lock_irqsave(&sdd->lock, flags);
- /* Start the signals */
- s3c64xx_spi_set_cs(spi, true);
+ /* Pending only which is to be done */
+ sdd->state &= ~RXBUSY;
+ sdd->state &= ~TXBUSY;
- spin_unlock_irqrestore(&sdd->lock, flags);
+ s3c64xx_enable_datapath(sdd, xfer, use_dma);
- if (use_dma)
- status = wait_for_dma(sdd, xfer);
- else
- status = wait_for_pio(sdd, xfer);
-
- if (status) {
- dev_err(&spi->dev, "I/O Error: rx-%d tx-%d res:rx-%c tx-%c len-%d\n",
- xfer->rx_buf ? 1 : 0, xfer->tx_buf ? 1 : 0,
- (sdd->state & RXBUSY) ? 'f' : 'p',
- (sdd->state & TXBUSY) ? 'f' : 'p',
- xfer->len);
-
- if (use_dma) {
- if (xfer->tx_buf != NULL
- && (sdd->state & TXBUSY))
- dmaengine_terminate_all(sdd->tx_dma.ch);
- if (xfer->rx_buf != NULL
- && (sdd->state & RXBUSY))
- dmaengine_terminate_all(sdd->rx_dma.ch);
+ /* Start the signals */
+ s3c64xx_spi_set_cs(spi, true);
+
+ spin_unlock_irqrestore(&sdd->lock, flags);
+
+ if (use_dma)
+ status = s3c64xx_wait_for_dma(sdd, xfer);
+ else
+ status = s3c64xx_wait_for_pio(sdd, xfer);
+
+ if (status) {
+ dev_err(&spi->dev,
+ "I/O Error: rx-%d tx-%d res:rx-%c tx-%c len-%d\n",
+ xfer->rx_buf ? 1 : 0, xfer->tx_buf ? 1 : 0,
+ (sdd->state & RXBUSY) ? 'f' : 'p',
+ (sdd->state & TXBUSY) ? 'f' : 'p',
+ xfer->len);
+
+ if (use_dma) {
+ if (xfer->tx_buf && (sdd->state & TXBUSY))
+ dmaengine_terminate_all(sdd->tx_dma.ch);
+ if (xfer->rx_buf && (sdd->state & RXBUSY))
+ dmaengine_terminate_all(sdd->rx_dma.ch);
+ }
+ } else {
+ s3c64xx_flush_fifo(sdd);
}
- } else {
- flush_fifo(sdd);
+ if (target_len > 0) {
+ target_len -= xfer->len;
+
+ if (xfer->tx_buf)
+ xfer->tx_buf += xfer->len;
+
+ if (xfer->rx_buf)
+ xfer->rx_buf += xfer->len;
+
+ if (target_len > fifo_len)
+ xfer->len = fifo_len;
+ else
+ xfer->len = target_len;
+ }
+ } while (target_len > 0);
+
+ if (origin_len) {
+ /* Restore original xfer buffers and length */
+ xfer->tx_buf = tx_buf;
+ xfer->rx_buf = rx_buf;
+ xfer->len = origin_len;
}
return status;
@@ -891,7 +923,7 @@ static irqreturn_t s3c64xx_spi_irq(int irq, void *data)
return IRQ_HANDLED;
}
-static void s3c64xx_spi_hwinit(struct s3c64xx_spi_driver_data *sdd, int channel)
+static void s3c64xx_spi_hwinit(struct s3c64xx_spi_driver_data *sdd)
{
struct s3c64xx_spi_info *sci = sdd->cntrlr_info;
void __iomem *regs = sdd->regs;
@@ -929,7 +961,7 @@ static void s3c64xx_spi_hwinit(struct s3c64xx_spi_driver_data *sdd, int channel)
val |= (S3C64XX_SPI_TRAILCNT << S3C64XX_SPI_TRAILCNT_OFF);
writel(val, regs + S3C64XX_SPI_MODE_CFG);
- flush_fifo(sdd);
+ s3c64xx_flush_fifo(sdd);
}
#ifdef CONFIG_OF
@@ -1145,7 +1177,7 @@ static int s3c64xx_spi_probe(struct platform_device *pdev)
pm_runtime_get_sync(&pdev->dev);
/* Setup Deufult Mode */
- s3c64xx_spi_hwinit(sdd, sdd->port_id);
+ s3c64xx_spi_hwinit(sdd);
spin_lock_init(&sdd->lock);
init_completion(&sdd->xfer_completion);
@@ -1260,8 +1292,6 @@ static int s3c64xx_spi_resume(struct device *dev)
if (ret < 0)
return ret;
- s3c64xx_spi_hwinit(sdd, sdd->port_id);
-
return spi_master_resume(master);
}
#endif /* CONFIG_PM_SLEEP */
@@ -1299,6 +1329,8 @@ static int s3c64xx_spi_runtime_resume(struct device *dev)
if (ret != 0)
goto err_disable_src_clk;
+ s3c64xx_spi_hwinit(sdd);
+
return 0;
err_disable_src_clk:
@@ -1344,15 +1376,6 @@ static struct s3c64xx_spi_port_config exynos4_spi_port_config = {
.clk_from_cmu = true,
};
-static struct s3c64xx_spi_port_config exynos5440_spi_port_config = {
- .fifo_lvl_mask = { 0x1ff },
- .rx_lvl_offset = 15,
- .tx_st_done = 25,
- .high_speed = true,
- .clk_from_cmu = true,
- .quirks = S3C64XX_SPI_QUIRK_POLL,
-};
-
static struct s3c64xx_spi_port_config exynos7_spi_port_config = {
.fifo_lvl_mask = { 0x1ff, 0x7F, 0x7F, 0x7F, 0x7F, 0x1ff},
.rx_lvl_offset = 15,
@@ -1396,9 +1419,6 @@ static const struct of_device_id s3c64xx_spi_dt_match[] = {
{ .compatible = "samsung,exynos4210-spi",
.data = (void *)&exynos4_spi_port_config,
},
- { .compatible = "samsung,exynos5440-spi",
- .data = (void *)&exynos5440_spi_port_config,
- },
{ .compatible = "samsung,exynos7-spi",
.data = (void *)&exynos7_spi_port_config,
},
diff --git a/drivers/spi/spi-sh-msiof.c b/drivers/spi/spi-sh-msiof.c
index ae086aab57d5..0e74cbf9929d 100644
--- a/drivers/spi/spi-sh-msiof.c
+++ b/drivers/spi/spi-sh-msiof.c
@@ -39,7 +39,7 @@ struct sh_msiof_chipdata {
u16 tx_fifo_size;
u16 rx_fifo_size;
u16 master_flags;
- u16 min_div;
+ u16 min_div_pow;
};
struct sh_msiof_spi_priv {
@@ -51,7 +51,7 @@ struct sh_msiof_spi_priv {
struct completion done;
unsigned int tx_fifo_size;
unsigned int rx_fifo_size;
- unsigned int min_div;
+ unsigned int min_div_pow;
void *tx_dma_page;
void *rx_dma_page;
dma_addr_t tx_dma_addr;
@@ -249,42 +249,46 @@ static irqreturn_t sh_msiof_spi_irq(int irq, void *data)
return IRQ_HANDLED;
}
-static struct {
- unsigned short div;
- unsigned short brdv;
-} const sh_msiof_spi_div_table[] = {
- { 1, SCR_BRDV_DIV_1 },
- { 2, SCR_BRDV_DIV_2 },
- { 4, SCR_BRDV_DIV_4 },
- { 8, SCR_BRDV_DIV_8 },
- { 16, SCR_BRDV_DIV_16 },
- { 32, SCR_BRDV_DIV_32 },
+static const u32 sh_msiof_spi_div_array[] = {
+ SCR_BRDV_DIV_1, SCR_BRDV_DIV_2, SCR_BRDV_DIV_4,
+ SCR_BRDV_DIV_8, SCR_BRDV_DIV_16, SCR_BRDV_DIV_32,
};
static void sh_msiof_spi_set_clk_regs(struct sh_msiof_spi_priv *p,
unsigned long parent_rate, u32 spi_hz)
{
- unsigned long div = 1024;
+ unsigned long div;
u32 brps, scr;
- size_t k;
+ unsigned int div_pow = p->min_div_pow;
- if (!WARN_ON(!spi_hz || !parent_rate))
- div = DIV_ROUND_UP(parent_rate, spi_hz);
-
- div = max_t(unsigned long, div, p->min_div);
+ if (!spi_hz || !parent_rate) {
+ WARN(1, "Invalid clock rate parameters %lu and %u\n",
+ parent_rate, spi_hz);
+ return;
+ }
- for (k = 0; k < ARRAY_SIZE(sh_msiof_spi_div_table); k++) {
- brps = DIV_ROUND_UP(div, sh_msiof_spi_div_table[k].div);
+ div = DIV_ROUND_UP(parent_rate, spi_hz);
+ if (div <= 1024) {
/* SCR_BRDV_DIV_1 is valid only if BRPS is x 1/1 or x 1/2 */
- if (sh_msiof_spi_div_table[k].div == 1 && brps > 2)
- continue;
- if (brps <= 32) /* max of brdv is 32 */
- break;
- }
+ if (!div_pow && div <= 32 && div > 2)
+ div_pow = 1;
+
+ if (div_pow)
+ brps = (div + 1) >> div_pow;
+ else
+ brps = div;
- k = min_t(int, k, ARRAY_SIZE(sh_msiof_spi_div_table) - 1);
+ for (; brps > 32; div_pow++)
+ brps = (brps + 1) >> 1;
+ } else {
+ /* Set transfer rate composite divisor to 2^5 * 32 = 1024 */
+ dev_err(&p->pdev->dev,
+ "Requested SPI transfer rate %d is too low\n", spi_hz);
+ div_pow = 5;
+ brps = 32;
+ }
- scr = sh_msiof_spi_div_table[k].brdv | SCR_BRPS(brps);
+ scr = sh_msiof_spi_div_array[div_pow] | SCR_BRPS(brps);
sh_msiof_write(p, TSCR, scr);
if (!(p->master->flags & SPI_MASTER_MUST_TX))
sh_msiof_write(p, RSCR, scr);
@@ -563,14 +567,16 @@ static int sh_msiof_spi_setup(struct spi_device *spi)
/* Configure native chip select mode/polarity early */
clr = MDR1_SYNCMD_MASK;
- set = MDR1_TRMD | TMDR1_PCON | MDR1_SYNCMD_SPI;
+ set = MDR1_SYNCMD_SPI;
if (spi->mode & SPI_CS_HIGH)
clr |= BIT(MDR1_SYNCAC_SHIFT);
else
set |= BIT(MDR1_SYNCAC_SHIFT);
pm_runtime_get_sync(&p->pdev->dev);
tmp = sh_msiof_read(p, TMDR1) & ~clr;
- sh_msiof_write(p, TMDR1, tmp | set);
+ sh_msiof_write(p, TMDR1, tmp | set | MDR1_TRMD | TMDR1_PCON);
+ tmp = sh_msiof_read(p, RMDR1) & ~clr;
+ sh_msiof_write(p, RMDR1, tmp | set);
pm_runtime_put(&p->pdev->dev);
p->native_cs_high = spi->mode & SPI_CS_HIGH;
p->native_cs_inited = true;
@@ -1040,21 +1046,21 @@ static const struct sh_msiof_chipdata sh_data = {
.tx_fifo_size = 64,
.rx_fifo_size = 64,
.master_flags = 0,
- .min_div = 1,
+ .min_div_pow = 0,
};
static const struct sh_msiof_chipdata rcar_gen2_data = {
.tx_fifo_size = 64,
.rx_fifo_size = 64,
.master_flags = SPI_MASTER_MUST_TX,
- .min_div = 1,
+ .min_div_pow = 0,
};
static const struct sh_msiof_chipdata rcar_gen3_data = {
.tx_fifo_size = 64,
.rx_fifo_size = 64,
.master_flags = SPI_MASTER_MUST_TX,
- .min_div = 2,
+ .min_div_pow = 1,
};
static const struct of_device_id sh_msiof_match[] = {
@@ -1318,7 +1324,7 @@ static int sh_msiof_spi_probe(struct platform_device *pdev)
platform_set_drvdata(pdev, p);
p->master = master;
p->info = info;
- p->min_div = chipdata->min_div;
+ p->min_div_pow = chipdata->min_div_pow;
init_completion(&p->done);
diff --git a/drivers/spi/spi-stm32.c b/drivers/spi/spi-stm32.c
index ba9743fa2326..ad1e55d3d5d5 100644
--- a/drivers/spi/spi-stm32.c
+++ b/drivers/spi/spi-stm32.c
@@ -1129,7 +1129,7 @@ static int stm32_spi_probe(struct platform_device *pdev)
if (!spi->clk_rate) {
dev_err(&pdev->dev, "clk rate = 0\n");
ret = -EINVAL;
- goto err_master_put;
+ goto err_clk_disable;
}
spi->rst = devm_reset_control_get_exclusive(&pdev->dev, NULL);
diff --git a/drivers/spi/spi-ti-qspi.c b/drivers/spi/spi-ti-qspi.c
index c24d9b45a27c..5f19016bbf10 100644
--- a/drivers/spi/spi-ti-qspi.c
+++ b/drivers/spi/spi-ti-qspi.c
@@ -36,6 +36,7 @@
#include <linux/sizes.h>
#include <linux/spi/spi.h>
+#include <linux/spi/spi-mem.h>
struct ti_qspi_regs {
u32 clkctrl;
@@ -50,6 +51,7 @@ struct ti_qspi {
struct spi_master *master;
void __iomem *base;
void __iomem *mmap_base;
+ size_t mmap_size;
struct regmap *ctrl_base;
unsigned int ctrl_reg;
struct clk *fclk;
@@ -434,12 +436,10 @@ static int ti_qspi_dma_xfer(struct ti_qspi *qspi, dma_addr_t dma_dst,
return 0;
}
-static int ti_qspi_dma_bounce_buffer(struct ti_qspi *qspi,
- struct spi_flash_read_message *msg)
+static int ti_qspi_dma_bounce_buffer(struct ti_qspi *qspi, loff_t offs,
+ void *to, size_t readsize)
{
- size_t readsize = msg->len;
- void *to = msg->buf;
- dma_addr_t dma_src = qspi->mmap_phys_base + msg->from;
+ dma_addr_t dma_src = qspi->mmap_phys_base + offs;
int ret = 0;
/*
@@ -507,13 +507,14 @@ static void ti_qspi_disable_memory_map(struct spi_device *spi)
qspi->mmap_enabled = false;
}
-static void ti_qspi_setup_mmap_read(struct spi_device *spi,
- struct spi_flash_read_message *msg)
+static void ti_qspi_setup_mmap_read(struct spi_device *spi, u8 opcode,
+ u8 data_nbits, u8 addr_width,
+ u8 dummy_bytes)
{
struct ti_qspi *qspi = spi_master_get_devdata(spi->master);
- u32 memval = msg->read_opcode;
+ u32 memval = opcode;
- switch (msg->data_nbits) {
+ switch (data_nbits) {
case SPI_NBITS_QUAD:
memval |= QSPI_SETUP_RD_QUAD;
break;
@@ -524,48 +525,64 @@ static void ti_qspi_setup_mmap_read(struct spi_device *spi,
memval |= QSPI_SETUP_RD_NORMAL;
break;
}
- memval |= ((msg->addr_width - 1) << QSPI_SETUP_ADDR_SHIFT |
- msg->dummy_bytes << QSPI_SETUP_DUMMY_SHIFT);
+ memval |= ((addr_width - 1) << QSPI_SETUP_ADDR_SHIFT |
+ dummy_bytes << QSPI_SETUP_DUMMY_SHIFT);
ti_qspi_write(qspi, memval,
QSPI_SPI_SETUP_REG(spi->chip_select));
}
-static bool ti_qspi_spi_flash_can_dma(struct spi_device *spi,
- struct spi_flash_read_message *msg)
+static int ti_qspi_exec_mem_op(struct spi_mem *mem,
+ const struct spi_mem_op *op)
{
- return virt_addr_valid(msg->buf);
-}
-
-static int ti_qspi_spi_flash_read(struct spi_device *spi,
- struct spi_flash_read_message *msg)
-{
- struct ti_qspi *qspi = spi_master_get_devdata(spi->master);
+ struct ti_qspi *qspi = spi_master_get_devdata(mem->spi->master);
+ u32 from = 0;
int ret = 0;
+ /* Only optimize read path. */
+ if (!op->data.nbytes || op->data.dir != SPI_MEM_DATA_IN ||
+ !op->addr.nbytes || op->addr.nbytes > 4)
+ return -ENOTSUPP;
+
+ /* Address exceeds MMIO window size, fall back to regular mode. */
+ from = op->addr.val;
+ if (from + op->data.nbytes > qspi->mmap_size)
+ return -ENOTSUPP;
+
mutex_lock(&qspi->list_lock);
if (!qspi->mmap_enabled)
- ti_qspi_enable_memory_map(spi);
- ti_qspi_setup_mmap_read(spi, msg);
+ ti_qspi_enable_memory_map(mem->spi);
+ ti_qspi_setup_mmap_read(mem->spi, op->cmd.opcode, op->data.buswidth,
+ op->addr.nbytes, op->dummy.nbytes);
if (qspi->rx_chan) {
- if (msg->cur_msg_mapped)
- ret = ti_qspi_dma_xfer_sg(qspi, msg->rx_sg, msg->from);
- else
- ret = ti_qspi_dma_bounce_buffer(qspi, msg);
- if (ret)
- goto err_unlock;
+ struct sg_table sgt;
+
+ if (virt_addr_valid(op->data.buf.in) &&
+ !spi_controller_dma_map_mem_op_data(mem->spi->master, op,
+ &sgt)) {
+ ret = ti_qspi_dma_xfer_sg(qspi, sgt, from);
+ spi_controller_dma_unmap_mem_op_data(mem->spi->master,
+ op, &sgt);
+ } else {
+ ret = ti_qspi_dma_bounce_buffer(qspi, from,
+ op->data.buf.in,
+ op->data.nbytes);
+ }
} else {
- memcpy_fromio(msg->buf, qspi->mmap_base + msg->from, msg->len);
+ memcpy_fromio(op->data.buf.in, qspi->mmap_base + from,
+ op->data.nbytes);
}
- msg->retlen = msg->len;
-err_unlock:
mutex_unlock(&qspi->list_lock);
return ret;
}
+static const struct spi_controller_mem_ops ti_qspi_mem_ops = {
+ .exec_op = ti_qspi_exec_mem_op,
+};
+
static int ti_qspi_start_transfer_one(struct spi_master *master,
struct spi_message *m)
{
@@ -672,7 +689,7 @@ static int ti_qspi_probe(struct platform_device *pdev)
master->dev.of_node = pdev->dev.of_node;
master->bits_per_word_mask = SPI_BPW_MASK(32) | SPI_BPW_MASK(16) |
SPI_BPW_MASK(8);
- master->spi_flash_read = ti_qspi_spi_flash_read;
+ master->mem_ops = &ti_qspi_mem_ops;
if (!of_property_read_u32(np, "num-cs", &num_cs))
master->num_chipselect = num_cs;
@@ -702,6 +719,9 @@ static int ti_qspi_probe(struct platform_device *pdev)
}
}
+ if (res_mmap)
+ qspi->mmap_size = resource_size(res_mmap);
+
irq = platform_get_irq(pdev, 0);
if (irq < 0) {
dev_err(&pdev->dev, "no irq resource?\n");
@@ -770,7 +790,6 @@ static int ti_qspi_probe(struct platform_device *pdev)
dma_release_channel(qspi->rx_chan);
goto no_dma;
}
- master->spi_flash_can_dma = ti_qspi_spi_flash_can_dma;
master->dma_rx = qspi->rx_chan;
init_completion(&qspi->transfer_complete);
if (res_mmap)
@@ -784,7 +803,7 @@ no_dma:
"mmap failed with error %ld using PIO mode\n",
PTR_ERR(qspi->mmap_base));
qspi->mmap_base = NULL;
- master->spi_flash_read = NULL;
+ master->mem_ops = NULL;
}
}
qspi->mmap_enabled = false;
diff --git a/drivers/spi/spi-zynqmp-gqspi.c b/drivers/spi/spi-zynqmp-gqspi.c
index 18aeaceee286..cc4d31033494 100644
--- a/drivers/spi/spi-zynqmp-gqspi.c
+++ b/drivers/spi/spi-zynqmp-gqspi.c
@@ -20,6 +20,7 @@
#include <linux/of_irq.h>
#include <linux/of_address.h>
#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
#include <linux/spi/spi.h>
#include <linux/spinlock.h>
#include <linux/workqueue.h>
@@ -135,6 +136,7 @@
#define GQSPI_DMA_UNALIGN 0x3
#define GQSPI_DEFAULT_NUM_CS 1 /* Default number of chip selects */
+#define SPI_AUTOSUSPEND_TIMEOUT 3000
enum mode_type {GQSPI_MODE_IO, GQSPI_MODE_DMA};
/**
@@ -356,21 +358,9 @@ static void zynqmp_qspi_copy_read_data(struct zynqmp_qspi *xqspi,
static int zynqmp_prepare_transfer_hardware(struct spi_master *master)
{
struct zynqmp_qspi *xqspi = spi_master_get_devdata(master);
- int ret;
-
- ret = clk_enable(xqspi->refclk);
- if (ret)
- return ret;
-
- ret = clk_enable(xqspi->pclk);
- if (ret)
- goto clk_err;
zynqmp_gqspi_write(xqspi, GQSPI_EN_OFST, GQSPI_EN_MASK);
return 0;
-clk_err:
- clk_disable(xqspi->refclk);
- return ret;
}
/**
@@ -387,8 +377,6 @@ static int zynqmp_unprepare_transfer_hardware(struct spi_master *master)
struct zynqmp_qspi *xqspi = spi_master_get_devdata(master);
zynqmp_gqspi_write(xqspi, GQSPI_EN_OFST, 0x0);
- clk_disable(xqspi->refclk);
- clk_disable(xqspi->pclk);
return 0;
}
@@ -918,8 +906,7 @@ static int zynqmp_qspi_start_transfer(struct spi_master *master,
*/
static int __maybe_unused zynqmp_qspi_suspend(struct device *dev)
{
- struct platform_device *pdev = to_platform_device(dev);
- struct spi_master *master = platform_get_drvdata(pdev);
+ struct spi_master *master = dev_get_drvdata(dev);
spi_master_suspend(master);
@@ -939,8 +926,7 @@ static int __maybe_unused zynqmp_qspi_suspend(struct device *dev)
*/
static int __maybe_unused zynqmp_qspi_resume(struct device *dev)
{
- struct platform_device *pdev = to_platform_device(dev);
- struct spi_master *master = platform_get_drvdata(pdev);
+ struct spi_master *master = dev_get_drvdata(dev);
struct zynqmp_qspi *xqspi = spi_master_get_devdata(master);
int ret = 0;
@@ -959,11 +945,67 @@ static int __maybe_unused zynqmp_qspi_resume(struct device *dev)
spi_master_resume(master);
+ clk_disable(xqspi->refclk);
+ clk_disable(xqspi->pclk);
return 0;
}
-static SIMPLE_DEV_PM_OPS(zynqmp_qspi_dev_pm_ops, zynqmp_qspi_suspend,
- zynqmp_qspi_resume);
+/**
+ * zynqmp_runtime_suspend - Runtime suspend method for the SPI driver
+ * @dev: Address of the platform_device structure
+ *
+ * This function disables the clocks
+ *
+ * Return: Always 0
+ */
+static int __maybe_unused zynqmp_runtime_suspend(struct device *dev)
+{
+ struct platform_device *pdev = to_platform_device(dev);
+ struct spi_master *master = platform_get_drvdata(pdev);
+ struct zynqmp_qspi *xqspi = spi_master_get_devdata(master);
+
+ clk_disable(xqspi->refclk);
+ clk_disable(xqspi->pclk);
+
+ return 0;
+}
+
+/**
+ * zynqmp_runtime_resume - Runtime resume method for the SPI driver
+ * @dev: Address of the platform_device structure
+ *
+ * This function enables the clocks
+ *
+ * Return: 0 on success and error value on error
+ */
+static int __maybe_unused zynqmp_runtime_resume(struct device *dev)
+{
+ struct platform_device *pdev = to_platform_device(dev);
+ struct spi_master *master = platform_get_drvdata(pdev);
+ struct zynqmp_qspi *xqspi = spi_master_get_devdata(master);
+ int ret;
+
+ ret = clk_enable(xqspi->pclk);
+ if (ret) {
+ dev_err(dev, "Cannot enable APB clock.\n");
+ return ret;
+ }
+
+ ret = clk_enable(xqspi->refclk);
+ if (ret) {
+ dev_err(dev, "Cannot enable device clock.\n");
+ clk_disable(xqspi->pclk);
+ return ret;
+ }
+
+ return 0;
+}
+
+static const struct dev_pm_ops zynqmp_qspi_dev_pm_ops = {
+ SET_RUNTIME_PM_OPS(zynqmp_runtime_suspend,
+ zynqmp_runtime_resume, NULL)
+ SET_SYSTEM_SLEEP_PM_OPS(zynqmp_qspi_suspend, zynqmp_qspi_resume)
+};
/**
* zynqmp_qspi_probe: Probe method for the QSPI driver
@@ -1023,9 +1065,15 @@ static int zynqmp_qspi_probe(struct platform_device *pdev)
goto clk_dis_pclk;
}
+ pm_runtime_use_autosuspend(&pdev->dev);
+ pm_runtime_set_autosuspend_delay(&pdev->dev, SPI_AUTOSUSPEND_TIMEOUT);
+ pm_runtime_set_active(&pdev->dev);
+ pm_runtime_enable(&pdev->dev);
/* QSPI controller initializations */
zynqmp_qspi_init_hw(xqspi);
+ pm_runtime_mark_last_busy(&pdev->dev);
+ pm_runtime_put_autosuspend(&pdev->dev);
xqspi->irq = platform_get_irq(pdev, 0);
if (xqspi->irq <= 0) {
ret = -ENXIO;
@@ -1063,6 +1111,8 @@ static int zynqmp_qspi_probe(struct platform_device *pdev)
return 0;
clk_dis_all:
+ pm_runtime_set_suspended(&pdev->dev);
+ pm_runtime_disable(&pdev->dev);
clk_disable_unprepare(xqspi->refclk);
clk_dis_pclk:
clk_disable_unprepare(xqspi->pclk);
@@ -1090,6 +1140,8 @@ static int zynqmp_qspi_remove(struct platform_device *pdev)
zynqmp_gqspi_write(xqspi, GQSPI_EN_OFST, 0x0);
clk_disable_unprepare(xqspi->refclk);
clk_disable_unprepare(xqspi->pclk);
+ pm_runtime_set_suspended(&pdev->dev);
+ pm_runtime_disable(&pdev->dev);
spi_unregister_master(master);
diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index 7b213faa0a2b..20b5b2754830 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -28,6 +28,7 @@
#include <linux/slab.h>
#include <linux/mod_devicetable.h>
#include <linux/spi/spi.h>
+#include <linux/spi/spi-mem.h>
#include <linux/of_gpio.h>
#include <linux/pm_runtime.h>
#include <linux/pm_domain.h>
@@ -46,6 +47,8 @@
#define CREATE_TRACE_POINTS
#include <trace/events/spi.h>
+#include "internals.h"
+
static DEFINE_IDR(spi_master_idr);
static void spidev_release(struct device *dev)
@@ -740,9 +743,9 @@ static void spi_set_cs(struct spi_device *spi, bool enable)
}
#ifdef CONFIG_HAS_DMA
-static int spi_map_buf(struct spi_controller *ctlr, struct device *dev,
- struct sg_table *sgt, void *buf, size_t len,
- enum dma_data_direction dir)
+int spi_map_buf(struct spi_controller *ctlr, struct device *dev,
+ struct sg_table *sgt, void *buf, size_t len,
+ enum dma_data_direction dir)
{
const bool vmalloced_buf = is_vmalloc_addr(buf);
unsigned int max_seg_size = dma_get_max_seg_size(dev);
@@ -821,8 +824,8 @@ static int spi_map_buf(struct spi_controller *ctlr, struct device *dev,
return 0;
}
-static void spi_unmap_buf(struct spi_controller *ctlr, struct device *dev,
- struct sg_table *sgt, enum dma_data_direction dir)
+void spi_unmap_buf(struct spi_controller *ctlr, struct device *dev,
+ struct sg_table *sgt, enum dma_data_direction dir)
{
if (sgt->orig_nents) {
dma_unmap_sg(dev, sgt->sgl, sgt->orig_nents, dir);
@@ -907,19 +910,6 @@ static int __spi_unmap_msg(struct spi_controller *ctlr, struct spi_message *msg)
return 0;
}
#else /* !CONFIG_HAS_DMA */
-static inline int spi_map_buf(struct spi_controller *ctlr, struct device *dev,
- struct sg_table *sgt, void *buf, size_t len,
- enum dma_data_direction dir)
-{
- return -EINVAL;
-}
-
-static inline void spi_unmap_buf(struct spi_controller *ctlr,
- struct device *dev, struct sg_table *sgt,
- enum dma_data_direction dir)
-{
-}
-
static inline int __spi_map_msg(struct spi_controller *ctlr,
struct spi_message *msg)
{
@@ -1222,6 +1212,7 @@ static void __spi_pump_messages(struct spi_controller *ctlr, bool in_kthread)
if (!was_busy && ctlr->auto_runtime_pm) {
ret = pm_runtime_get_sync(ctlr->dev.parent);
if (ret < 0) {
+ pm_runtime_put_noidle(ctlr->dev.parent);
dev_err(&ctlr->dev, "Failed to power device: %d\n",
ret);
mutex_unlock(&ctlr->io_mutex);
@@ -1533,6 +1524,22 @@ err_init_queue:
return ret;
}
+/**
+ * spi_flush_queue - Send all pending messages in the queue from the callers'
+ * context
+ * @ctlr: controller to process queue for
+ *
+ * This should be used when one wants to ensure all pending messages have been
+ * sent before doing something. Is used by the spi-mem code to make sure SPI
+ * memory operations do not preempt regular SPI transfers that have been queued
+ * before the spi-mem operation.
+ */
+void spi_flush_queue(struct spi_controller *ctlr)
+{
+ if (ctlr->transfer == spi_queued_transfer)
+ __spi_pump_messages(ctlr, false);
+}
+
/*-------------------------------------------------------------------------*/
#if defined(CONFIG_OF)
@@ -2063,6 +2070,26 @@ static int of_spi_register_master(struct spi_controller *ctlr)
}
#endif
+static int spi_controller_check_ops(struct spi_controller *ctlr)
+{
+ /*
+ * The controller may implement only the high-level SPI-memory like
+ * operations if it does not support regular SPI transfers, and this is
+ * valid use case.
+ * If ->mem_ops is NULL, we request that at least one of the
+ * ->transfer_xxx() method be implemented.
+ */
+ if (ctlr->mem_ops) {
+ if (!ctlr->mem_ops->exec_op)
+ return -EINVAL;
+ } else if (!ctlr->transfer && !ctlr->transfer_one &&
+ !ctlr->transfer_one_message) {
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
/**
* spi_register_controller - register SPI master or slave controller
* @ctlr: initialized master, originally from spi_alloc_master() or
@@ -2096,6 +2123,14 @@ int spi_register_controller(struct spi_controller *ctlr)
if (!dev)
return -ENODEV;
+ /*
+ * Make sure all necessary hooks are implemented before registering
+ * the SPI controller.
+ */
+ status = spi_controller_check_ops(ctlr);
+ if (status)
+ return status;
+
if (!spi_controller_is_slave(ctlr)) {
status = of_spi_register_master(ctlr);
if (status)
@@ -2161,10 +2196,14 @@ int spi_register_controller(struct spi_controller *ctlr)
spi_controller_is_slave(ctlr) ? "slave" : "master",
dev_name(&ctlr->dev));
- /* If we're using a queued driver, start the queue */
- if (ctlr->transfer)
+ /*
+ * If we're using a queued driver, start the queue. Note that we don't
+ * need the queueing logic if the driver is only supporting high-level
+ * memory operations.
+ */
+ if (ctlr->transfer) {
dev_info(dev, "controller is unqueued, this is deprecated\n");
- else {
+ } else if (ctlr->transfer_one || ctlr->transfer_one_message) {
status = spi_controller_initialize_queue(ctlr);
if (status) {
device_del(&ctlr->dev);
@@ -2894,6 +2933,13 @@ static int __spi_async(struct spi_device *spi, struct spi_message *message)
{
struct spi_controller *ctlr = spi->controller;
+ /*
+ * Some controllers do not support doing regular SPI transfers. Return
+ * ENOTSUPP when this is the case.
+ */
+ if (!ctlr->transfer)
+ return -ENOTSUPP;
+
message->spi = spi;
SPI_STATISTICS_INCREMENT_FIELD(&ctlr->statistics, spi_async);
@@ -3010,63 +3056,6 @@ int spi_async_locked(struct spi_device *spi, struct spi_message *message)
}
EXPORT_SYMBOL_GPL(spi_async_locked);
-
-int spi_flash_read(struct spi_device *spi,
- struct spi_flash_read_message *msg)
-
-{
- struct spi_controller *master = spi->controller;
- struct device *rx_dev = NULL;
- int ret;
-
- if ((msg->opcode_nbits == SPI_NBITS_DUAL ||
- msg->addr_nbits == SPI_NBITS_DUAL) &&
- !(spi->mode & (SPI_TX_DUAL | SPI_TX_QUAD)))
- return -EINVAL;
- if ((msg->opcode_nbits == SPI_NBITS_QUAD ||
- msg->addr_nbits == SPI_NBITS_QUAD) &&
- !(spi->mode & SPI_TX_QUAD))
- return -EINVAL;
- if (msg->data_nbits == SPI_NBITS_DUAL &&
- !(spi->mode & (SPI_RX_DUAL | SPI_RX_QUAD)))
- return -EINVAL;
- if (msg->data_nbits == SPI_NBITS_QUAD &&
- !(spi->mode & SPI_RX_QUAD))
- return -EINVAL;
-
- if (master->auto_runtime_pm) {
- ret = pm_runtime_get_sync(master->dev.parent);
- if (ret < 0) {
- dev_err(&master->dev, "Failed to power device: %d\n",
- ret);
- return ret;
- }
- }
-
- mutex_lock(&master->bus_lock_mutex);
- mutex_lock(&master->io_mutex);
- if (master->dma_rx && master->spi_flash_can_dma(spi, msg)) {
- rx_dev = master->dma_rx->device->dev;
- ret = spi_map_buf(master, rx_dev, &msg->rx_sg,
- msg->buf, msg->len,
- DMA_FROM_DEVICE);
- if (!ret)
- msg->cur_msg_mapped = true;
- }
- ret = master->spi_flash_read(spi, msg);
- if (msg->cur_msg_mapped)
- spi_unmap_buf(master, rx_dev, &msg->rx_sg,
- DMA_FROM_DEVICE);
- mutex_unlock(&master->io_mutex);
- mutex_unlock(&master->bus_lock_mutex);
-
- if (master->auto_runtime_pm)
- pm_runtime_put(master->dev.parent);
-
- return ret;
-}
-EXPORT_SYMBOL_GPL(spi_flash_read);
-
/*-------------------------------------------------------------------------*/
/* Utility methods for SPI protocol drivers, layered on
diff --git a/drivers/ssb/Kconfig b/drivers/ssb/Kconfig
index 9371651d8017..c574dd210500 100644
--- a/drivers/ssb/Kconfig
+++ b/drivers/ssb/Kconfig
@@ -117,7 +117,7 @@ config SSB_SERIAL
config SSB_DRIVER_PCICORE_POSSIBLE
bool
- depends on SSB_PCIHOST && SSB = y
+ depends on SSB_PCIHOST
default y
config SSB_DRIVER_PCICORE
@@ -131,7 +131,7 @@ config SSB_DRIVER_PCICORE
config SSB_PCICORE_HOSTMODE
bool "Hostmode support for SSB PCI core"
- depends on SSB_DRIVER_PCICORE && SSB_DRIVER_MIPS
+ depends on SSB_DRIVER_PCICORE && SSB_DRIVER_MIPS && SSB = y
help
PCIcore hostmode operation (external PCI bus).
diff --git a/drivers/staging/comedi/drivers/serial2002.c b/drivers/staging/comedi/drivers/serial2002.c
index b3f3b4a201af..5471b2212a62 100644
--- a/drivers/staging/comedi/drivers/serial2002.c
+++ b/drivers/staging/comedi/drivers/serial2002.c
@@ -113,7 +113,7 @@ static void serial2002_tty_read_poll_wait(struct file *f, int timeout)
long elapsed;
__poll_t mask;
- mask = f->f_op->poll(f, &table.pt);
+ mask = vfs_poll(f, &table.pt);
if (mask & (EPOLLRDNORM | EPOLLRDBAND | EPOLLIN |
EPOLLHUP | EPOLLERR)) {
break;
@@ -136,7 +136,7 @@ static int serial2002_tty_read(struct file *f, int timeout)
result = -1;
if (!IS_ERR(f)) {
- if (f->f_op->poll) {
+ if (file_can_poll(f)) {
serial2002_tty_read_poll_wait(f, timeout);
if (kernel_read(f, &ch, 1, &pos) == 1)
diff --git a/drivers/staging/comedi/proc.c b/drivers/staging/comedi/proc.c
index 50d38938ac6f..8bc8e42beb90 100644
--- a/drivers/staging/comedi/proc.c
+++ b/drivers/staging/comedi/proc.c
@@ -62,25 +62,9 @@ static int comedi_read(struct seq_file *m, void *v)
return 0;
}
-/*
- * seq_file wrappers for procfile show routines.
- */
-static int comedi_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, comedi_read, NULL);
-}
-
-static const struct file_operations comedi_proc_fops = {
- .owner = THIS_MODULE,
- .open = comedi_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
void __init comedi_proc_init(void)
{
- if (!proc_create("comedi", 0444, NULL, &comedi_proc_fops))
+ if (!proc_create_single("comedi", 0444, NULL, comedi_read))
pr_warn("comedi: unable to create proc entry\n");
}
diff --git a/drivers/staging/fwserial/fwserial.c b/drivers/staging/fwserial/fwserial.c
index e8bfe5520bc7..fa0dd425b454 100644
--- a/drivers/staging/fwserial/fwserial.c
+++ b/drivers/staging/fwserial/fwserial.c
@@ -1506,11 +1506,6 @@ static int fwtty_debugfs_peers_show(struct seq_file *m, void *v)
return 0;
}
-static int fwtty_proc_open(struct inode *inode, struct file *fp)
-{
- return single_open(fp, fwtty_proc_show, NULL);
-}
-
static int fwtty_stats_open(struct inode *inode, struct file *fp)
{
return single_open(fp, fwtty_debugfs_stats_show, inode->i_private);
@@ -1537,14 +1532,6 @@ static const struct file_operations fwtty_peers_fops = {
.release = single_release,
};
-static const struct file_operations fwtty_proc_fops = {
- .owner = THIS_MODULE,
- .open = fwtty_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
static const struct tty_port_operations fwtty_port_ops = {
.dtr_rts = fwtty_port_dtr_rts,
.carrier_raised = fwtty_port_carrier_raised,
@@ -1570,7 +1557,7 @@ static const struct tty_operations fwtty_ops = {
.tiocmget = fwtty_tiocmget,
.tiocmset = fwtty_tiocmset,
.get_icount = fwtty_get_icount,
- .proc_fops = &fwtty_proc_fops,
+ .proc_show = fwtty_proc_show,
};
static const struct tty_operations fwloop_ops = {
diff --git a/drivers/staging/ipx/af_ipx.c b/drivers/staging/ipx/af_ipx.c
index 5703dd176787..208b5c161631 100644
--- a/drivers/staging/ipx/af_ipx.c
+++ b/drivers/staging/ipx/af_ipx.c
@@ -1965,7 +1965,7 @@ static const struct proto_ops ipx_dgram_ops = {
.socketpair = sock_no_socketpair,
.accept = sock_no_accept,
.getname = ipx_getname,
- .poll = datagram_poll,
+ .poll_mask = datagram_poll_mask,
.ioctl = ipx_ioctl,
#ifdef CONFIG_COMPAT
.compat_ioctl = ipx_compat_ioctl,
diff --git a/drivers/staging/ipx/ipx_proc.c b/drivers/staging/ipx/ipx_proc.c
index b9232e4e2ed4..360f0ad970de 100644
--- a/drivers/staging/ipx/ipx_proc.c
+++ b/drivers/staging/ipx/ipx_proc.c
@@ -244,42 +244,6 @@ static const struct seq_operations ipx_seq_socket_ops = {
.show = ipx_seq_socket_show,
};
-static int ipx_seq_route_open(struct inode *inode, struct file *file)
-{
- return seq_open(file, &ipx_seq_route_ops);
-}
-
-static int ipx_seq_interface_open(struct inode *inode, struct file *file)
-{
- return seq_open(file, &ipx_seq_interface_ops);
-}
-
-static int ipx_seq_socket_open(struct inode *inode, struct file *file)
-{
- return seq_open(file, &ipx_seq_socket_ops);
-}
-
-static const struct file_operations ipx_seq_interface_fops = {
- .open = ipx_seq_interface_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
-static const struct file_operations ipx_seq_route_fops = {
- .open = ipx_seq_route_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
-static const struct file_operations ipx_seq_socket_fops = {
- .open = ipx_seq_socket_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
static struct proc_dir_entry *ipx_proc_dir;
int __init ipx_proc_init(void)
@@ -291,16 +255,17 @@ int __init ipx_proc_init(void)
if (!ipx_proc_dir)
goto out;
- p = proc_create("interface", S_IRUGO,
- ipx_proc_dir, &ipx_seq_interface_fops);
+ p = proc_create_seq("interface", S_IRUGO, ipx_proc_dir,
+ &ipx_seq_interface_ops);
if (!p)
goto out_interface;
- p = proc_create("route", S_IRUGO, ipx_proc_dir, &ipx_seq_route_fops);
+ p = proc_create_seq("route", S_IRUGO, ipx_proc_dir, &ipx_seq_route_ops);
if (!p)
goto out_route;
- p = proc_create("socket", S_IRUGO, ipx_proc_dir, &ipx_seq_socket_fops);
+ p = proc_create_seq("socket", S_IRUGO, ipx_proc_dir,
+ &ipx_seq_socket_ops);
if (!p)
goto out_socket;
diff --git a/drivers/staging/media/imx/imx-media-csi.c b/drivers/staging/media/imx/imx-media-csi.c
index 16cab40156ca..aeab05f682d9 100644
--- a/drivers/staging/media/imx/imx-media-csi.c
+++ b/drivers/staging/media/imx/imx-media-csi.c
@@ -1799,7 +1799,7 @@ static int imx_csi_probe(struct platform_device *pdev)
priv->dev->of_node = pdata->of_node;
pinctrl = devm_pinctrl_get_select_default(priv->dev);
if (IS_ERR(pinctrl)) {
- ret = PTR_ERR(priv->vdev);
+ ret = PTR_ERR(pinctrl);
dev_dbg(priv->dev,
"devm_pinctrl_get_select_default() failed: %d\n", ret);
if (ret != -ENODEV)
diff --git a/drivers/staging/ncpfs/dir.c b/drivers/staging/ncpfs/dir.c
index 0c57c5c5d40a..072bcb12898f 100644
--- a/drivers/staging/ncpfs/dir.c
+++ b/drivers/staging/ncpfs/dir.c
@@ -823,12 +823,11 @@ static struct dentry *ncp_lookup(struct inode *dir, struct dentry *dentry, unsig
struct ncp_server *server = NCP_SERVER(dir);
struct inode *inode = NULL;
struct ncp_entry_info finfo;
- int error, res, len;
+ int res, len;
__u8 __name[NCP_MAXPATHLEN + 1];
- error = -EIO;
if (!ncp_conn_valid(server))
- goto finished;
+ return ERR_PTR(-EIO);
ncp_vdbg("server lookup for %pd2\n", dentry);
@@ -847,31 +846,20 @@ static struct dentry *ncp_lookup(struct inode *dir, struct dentry *dentry, unsig
res = ncp_obtain_info(server, dir, __name, &(finfo.i));
}
ncp_vdbg("looked for %pd2, res=%d\n", dentry, res);
- /*
- * If we didn't find an entry, make a negative dentry.
- */
- if (res)
- goto add_entry;
-
- /*
- * Create an inode for the entry.
- */
- finfo.opened = 0;
- finfo.ino = iunique(dir->i_sb, 2);
- finfo.volume = finfo.i.volNumber;
- error = -EACCES;
- inode = ncp_iget(dir->i_sb, &finfo);
-
- if (inode) {
- ncp_new_dentry(dentry);
-add_entry:
- d_add(dentry, inode);
- error = 0;
+ if (!res) {
+ /*
+ * Entry found; create an inode for it.
+ */
+ finfo.opened = 0;
+ finfo.ino = iunique(dir->i_sb, 2);
+ finfo.volume = finfo.i.volNumber;
+ inode = ncp_iget(dir->i_sb, &finfo);
+ if (unlikely(!inode))
+ inode = ERR_PTR(-EACCES);
+ else
+ ncp_new_dentry(dentry);
}
-
-finished:
- ncp_vdbg("result=%d\n", error);
- return ERR_PTR(error);
+ return d_splice_alias(inode, dentry);
}
/*
diff --git a/drivers/staging/rtl8192u/r8192U_core.c b/drivers/staging/rtl8192u/r8192U_core.c
index d607c59761cf..7a0dbc0fa18e 100644
--- a/drivers/staging/rtl8192u/r8192U_core.c
+++ b/drivers/staging/rtl8192u/r8192U_core.c
@@ -646,64 +646,25 @@ static void rtl8192_proc_module_init(void)
rtl8192_proc = proc_mkdir(RTL819xU_MODULE_NAME, init_net.proc_net);
}
-/*
- * seq_file wrappers for procfile show routines.
- */
-static int rtl8192_proc_open(struct inode *inode, struct file *file)
-{
- struct net_device *dev = proc_get_parent_data(inode);
- int (*show)(struct seq_file *, void *) = PDE_DATA(inode);
-
- return single_open(file, show, dev);
-}
-
-static const struct file_operations rtl8192_proc_fops = {
- .open = rtl8192_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
-/*
- * Table of proc files we need to create.
- */
-struct rtl8192_proc_file {
- char name[12];
- int (*show)(struct seq_file *, void *);
-};
-
-static const struct rtl8192_proc_file rtl8192_proc_files[] = {
- { "stats-rx", &proc_get_stats_rx },
- { "stats-tx", &proc_get_stats_tx },
- { "stats-ap", &proc_get_stats_ap },
- { "registers", &proc_get_registers },
- { "" }
-};
-
static void rtl8192_proc_init_one(struct net_device *dev)
{
- const struct rtl8192_proc_file *f;
struct proc_dir_entry *dir;
- if (rtl8192_proc) {
- dir = proc_mkdir_data(dev->name, 0, rtl8192_proc, dev);
- if (!dir) {
- RT_TRACE(COMP_ERR,
- "Unable to initialize /proc/net/rtl8192/%s\n",
- dev->name);
- return;
- }
+ if (!rtl8192_proc)
+ return;
- for (f = rtl8192_proc_files; f->name[0]; f++) {
- if (!proc_create_data(f->name, S_IFREG | S_IRUGO, dir,
- &rtl8192_proc_fops, f->show)) {
- RT_TRACE(COMP_ERR,
- "Unable to initialize /proc/net/rtl8192/%s/%s\n",
- dev->name, f->name);
- return;
- }
- }
- }
+ dir = proc_mkdir_data(dev->name, 0, rtl8192_proc, dev);
+ if (!dir)
+ return;
+
+ proc_create_single("stats-rx", S_IFREG | S_IRUGO, dir,
+ proc_get_stats_rx);
+ proc_create_single("stats-tx", S_IFREG | S_IRUGO, dir,
+ proc_get_stats_tx);
+ proc_create_single("stats-ap", S_IFREG | S_IRUGO, dir,
+ proc_get_stats_ap);
+ proc_create_single("registers", S_IFREG | S_IRUGO, dir,
+ proc_get_registers);
}
static void rtl8192_proc_remove_one(struct net_device *dev)
diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c
index 07c814c42648..ce1321a5cb7b 100644
--- a/drivers/target/target_core_iblock.c
+++ b/drivers/target/target_core_iblock.c
@@ -94,8 +94,8 @@ static int iblock_configure_device(struct se_device *dev)
return -EINVAL;
}
- ib_dev->ibd_bio_set = bioset_create(IBLOCK_BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
- if (!ib_dev->ibd_bio_set) {
+ ret = bioset_init(&ib_dev->ibd_bio_set, IBLOCK_BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
+ if (ret) {
pr_err("IBLOCK: Unable to create bioset\n");
goto out;
}
@@ -141,7 +141,7 @@ static int iblock_configure_device(struct se_device *dev)
bi = bdev_get_integrity(bd);
if (bi) {
- struct bio_set *bs = ib_dev->ibd_bio_set;
+ struct bio_set *bs = &ib_dev->ibd_bio_set;
if (!strcmp(bi->profile->name, "T10-DIF-TYPE3-IP") ||
!strcmp(bi->profile->name, "T10-DIF-TYPE1-IP")) {
@@ -164,7 +164,7 @@ static int iblock_configure_device(struct se_device *dev)
goto out_blkdev_put;
}
pr_debug("IBLOCK setup BIP bs->bio_integrity_pool: %p\n",
- bs->bio_integrity_pool);
+ &bs->bio_integrity_pool);
}
dev->dev_attrib.hw_pi_prot_type = dev->dev_attrib.pi_prot_type;
}
@@ -174,8 +174,7 @@ static int iblock_configure_device(struct se_device *dev)
out_blkdev_put:
blkdev_put(ib_dev->ibd_bd, FMODE_WRITE|FMODE_READ|FMODE_EXCL);
out_free_bioset:
- bioset_free(ib_dev->ibd_bio_set);
- ib_dev->ibd_bio_set = NULL;
+ bioset_exit(&ib_dev->ibd_bio_set);
out:
return ret;
}
@@ -199,8 +198,7 @@ static void iblock_destroy_device(struct se_device *dev)
if (ib_dev->ibd_bd != NULL)
blkdev_put(ib_dev->ibd_bd, FMODE_WRITE|FMODE_READ|FMODE_EXCL);
- if (ib_dev->ibd_bio_set != NULL)
- bioset_free(ib_dev->ibd_bio_set);
+ bioset_exit(&ib_dev->ibd_bio_set);
}
static unsigned long long iblock_emulate_read_cap_with_block_size(
@@ -332,7 +330,7 @@ iblock_get_bio(struct se_cmd *cmd, sector_t lba, u32 sg_num, int op,
if (sg_num > BIO_MAX_PAGES)
sg_num = BIO_MAX_PAGES;
- bio = bio_alloc_bioset(GFP_NOIO, sg_num, ib_dev->ibd_bio_set);
+ bio = bio_alloc_bioset(GFP_NOIO, sg_num, &ib_dev->ibd_bio_set);
if (!bio) {
pr_err("Unable to allocate memory for bio\n");
return NULL;
@@ -427,8 +425,8 @@ iblock_execute_zero_out(struct block_device *bdev, struct se_cmd *cmd)
{
struct se_device *dev = cmd->se_dev;
struct scatterlist *sg = &cmd->t_data_sg[0];
- unsigned char *buf, zero = 0x00, *p = &zero;
- int rc, ret;
+ unsigned char *buf, *not_zero;
+ int ret;
buf = kmap(sg_page(sg)) + sg->offset;
if (!buf)
@@ -437,10 +435,10 @@ iblock_execute_zero_out(struct block_device *bdev, struct se_cmd *cmd)
* Fall back to block_execute_write_same() slow-path if
* incoming WRITE_SAME payload does not contain zeros.
*/
- rc = memcmp(buf, p, cmd->data_length);
+ not_zero = memchr_inv(buf, 0x00, cmd->data_length);
kunmap(sg_page(sg));
- if (rc)
+ if (not_zero)
return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
ret = blkdev_issue_zeroout(bdev,
diff --git a/drivers/target/target_core_iblock.h b/drivers/target/target_core_iblock.h
index b4aeb2584ad4..9cc3843404d4 100644
--- a/drivers/target/target_core_iblock.h
+++ b/drivers/target/target_core_iblock.h
@@ -22,7 +22,7 @@ struct iblock_dev {
struct se_device dev;
unsigned char ibd_udev_path[SE_UDEV_PATH_LEN];
u32 ibd_flags;
- struct bio_set *ibd_bio_set;
+ struct bio_set ibd_bio_set;
struct block_device *ibd_bd;
bool ibd_readonly;
} ____cacheline_aligned;
diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c
index 6cb933ecc084..668934ea74cb 100644
--- a/drivers/target/target_core_pscsi.c
+++ b/drivers/target/target_core_pscsi.c
@@ -986,8 +986,7 @@ pscsi_execute_cmd(struct se_cmd *cmd)
req = blk_get_request(pdv->pdv_sd->request_queue,
cmd->data_direction == DMA_TO_DEVICE ?
- REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN,
- GFP_KERNEL);
+ REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN, 0);
if (IS_ERR(req)) {
pr_err("PSCSI: blk_get_request() failed\n");
ret = TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c
index 4ad89ea71a70..4f26bdc3d1dc 100644
--- a/drivers/target/target_core_user.c
+++ b/drivers/target/target_core_user.c
@@ -2121,6 +2121,8 @@ static ssize_t tcmu_qfull_time_out_store(struct config_item *item,
if (val >= 0) {
udev->qfull_time_out = val * MSEC_PER_SEC;
+ } else if (val == -1) {
+ udev->qfull_time_out = val;
} else {
printk(KERN_ERR "Invalid qfull timeout value %d\n", val);
return -EINVAL;
diff --git a/drivers/tee/tee_core.c b/drivers/tee/tee_core.c
index 0124a91c8d71..dd46b758852a 100644
--- a/drivers/tee/tee_core.c
+++ b/drivers/tee/tee_core.c
@@ -238,6 +238,17 @@ static int params_from_user(struct tee_context *ctx, struct tee_param *params,
if (IS_ERR(shm))
return PTR_ERR(shm);
+ /*
+ * Ensure offset + size does not overflow offset
+ * and does not overflow the size of the referred
+ * shared memory object.
+ */
+ if ((ip.a + ip.b) < ip.a ||
+ (ip.a + ip.b) > shm->size) {
+ tee_shm_put(shm);
+ return -EINVAL;
+ }
+
params[n].u.memref.shm_offs = ip.a;
params[n].u.memref.size = ip.b;
params[n].u.memref.shm = shm;
diff --git a/drivers/tee/tee_shm.c b/drivers/tee/tee_shm.c
index 556960a1bab3..07d3be6f0780 100644
--- a/drivers/tee/tee_shm.c
+++ b/drivers/tee/tee_shm.c
@@ -360,9 +360,10 @@ int tee_shm_get_fd(struct tee_shm *shm)
if (!(shm->flags & TEE_SHM_DMA_BUF))
return -EINVAL;
+ get_dma_buf(shm->dmabuf);
fd = dma_buf_fd(shm->dmabuf, O_CLOEXEC);
- if (fd >= 0)
- get_dma_buf(shm->dmabuf);
+ if (fd < 0)
+ dma_buf_put(shm->dmabuf);
return fd;
}
diff --git a/drivers/thermal/int340x_thermal/int3403_thermal.c b/drivers/thermal/int340x_thermal/int3403_thermal.c
index 8a7f24dd9315..0c19fcd56a0d 100644
--- a/drivers/thermal/int340x_thermal/int3403_thermal.c
+++ b/drivers/thermal/int340x_thermal/int3403_thermal.c
@@ -194,6 +194,7 @@ static int int3403_cdev_add(struct int3403_priv *priv)
return -EFAULT;
}
+ priv->priv = obj;
obj->max_state = p->package.count - 1;
obj->cdev =
thermal_cooling_device_register(acpi_device_bid(priv->adev),
@@ -201,8 +202,6 @@ static int int3403_cdev_add(struct int3403_priv *priv)
if (IS_ERR(obj->cdev))
result = PTR_ERR(obj->cdev);
- priv->priv = obj;
-
kfree(buf.pointer);
/* TODO: add ACPI notification support */
diff --git a/drivers/thermal/samsung/exynos_tmu.c b/drivers/thermal/samsung/exynos_tmu.c
index ed805c7c5ace..ac83f721db24 100644
--- a/drivers/thermal/samsung/exynos_tmu.c
+++ b/drivers/thermal/samsung/exynos_tmu.c
@@ -185,6 +185,7 @@
* @regulator: pointer to the TMU regulator structure.
* @reg_conf: pointer to structure to register with core thermal.
* @ntrip: number of supported trip points.
+ * @enabled: current status of TMU device
* @tmu_initialize: SoC specific TMU initialization method
* @tmu_control: SoC specific TMU control method
* @tmu_read: SoC specific TMU temperature read method
@@ -205,6 +206,7 @@ struct exynos_tmu_data {
struct regulator *regulator;
struct thermal_zone_device *tzd;
unsigned int ntrip;
+ bool enabled;
int (*tmu_initialize)(struct platform_device *pdev);
void (*tmu_control)(struct platform_device *pdev, bool on);
@@ -398,6 +400,7 @@ static void exynos_tmu_control(struct platform_device *pdev, bool on)
mutex_lock(&data->lock);
clk_enable(data->clk);
data->tmu_control(pdev, on);
+ data->enabled = on;
clk_disable(data->clk);
mutex_unlock(&data->lock);
}
@@ -889,19 +892,24 @@ static void exynos7_tmu_control(struct platform_device *pdev, bool on)
static int exynos_get_temp(void *p, int *temp)
{
struct exynos_tmu_data *data = p;
+ int value, ret = 0;
- if (!data || !data->tmu_read)
+ if (!data || !data->tmu_read || !data->enabled)
return -EINVAL;
mutex_lock(&data->lock);
clk_enable(data->clk);
- *temp = code_to_temp(data, data->tmu_read(data)) * MCELSIUS;
+ value = data->tmu_read(data);
+ if (value < 0)
+ ret = value;
+ else
+ *temp = code_to_temp(data, value) * MCELSIUS;
clk_disable(data->clk);
mutex_unlock(&data->lock);
- return 0;
+ return ret;
}
#ifdef CONFIG_THERMAL_EMULATION
diff --git a/drivers/thunderbolt/icm.c b/drivers/thunderbolt/icm.c
index 2d2ceda9aa26..500911f16498 100644
--- a/drivers/thunderbolt/icm.c
+++ b/drivers/thunderbolt/icm.c
@@ -1255,7 +1255,7 @@ static int icm_ar_get_boot_acl(struct tb *tb, uuid_t *uuids, size_t nuuids)
/* Map empty entries to null UUID */
uuid[0] = 0;
uuid[1] = 0;
- } else {
+ } else if (uuid[0] != 0 || uuid[1] != 0) {
/* Upper two DWs are always one's */
uuid[2] = 0xffffffff;
uuid[3] = 0xffffffff;
diff --git a/drivers/tty/amiserial.c b/drivers/tty/amiserial.c
index 32d7ce430b02..34dead614149 100644
--- a/drivers/tty/amiserial.c
+++ b/drivers/tty/amiserial.c
@@ -1566,19 +1566,6 @@ static int rs_proc_show(struct seq_file *m, void *v)
return 0;
}
-static int rs_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, rs_proc_show, NULL);
-}
-
-static const struct file_operations rs_proc_fops = {
- .owner = THIS_MODULE,
- .open = rs_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
/*
* ---------------------------------------------------------------------
* rs_init() and friends
@@ -1620,7 +1607,7 @@ static const struct tty_operations serial_ops = {
.tiocmget = rs_tiocmget,
.tiocmset = rs_tiocmset,
.get_icount = rs_get_icount,
- .proc_fops = &rs_proc_fops,
+ .proc_show = rs_proc_show,
};
static int amiga_carrier_raised(struct tty_port *port)
diff --git a/drivers/tty/cyclades.c b/drivers/tty/cyclades.c
index cf0bde3bb927..6d3c58051ce3 100644
--- a/drivers/tty/cyclades.c
+++ b/drivers/tty/cyclades.c
@@ -3972,19 +3972,6 @@ static int cyclades_proc_show(struct seq_file *m, void *v)
return 0;
}
-static int cyclades_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, cyclades_proc_show, NULL);
-}
-
-static const struct file_operations cyclades_proc_fops = {
- .owner = THIS_MODULE,
- .open = cyclades_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
/* The serial driver boot-time initialization code!
Hardware I/O ports are mapped to character special devices on a
first found, first allocated manner. That is, this code searches
@@ -4024,7 +4011,7 @@ static const struct tty_operations cy_ops = {
.tiocmget = cy_tiocmget,
.tiocmset = cy_tiocmset,
.get_icount = cy_get_icount,
- .proc_fops = &cyclades_proc_fops,
+ .proc_show = cyclades_proc_show,
};
static int __init cy_init(void)
diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c
index 0466f9f08a91..6ff9405954a6 100644
--- a/drivers/tty/serial/serial_core.c
+++ b/drivers/tty/serial/serial_core.c
@@ -1829,19 +1829,6 @@ static int uart_proc_show(struct seq_file *m, void *v)
uart_line_info(m, drv, i);
return 0;
}
-
-static int uart_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, uart_proc_show, PDE_DATA(inode));
-}
-
-static const struct file_operations uart_proc_fops = {
- .owner = THIS_MODULE,
- .open = uart_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
#endif
#if defined(CONFIG_SERIAL_CORE_CONSOLE) || defined(CONFIG_CONSOLE_POLL)
@@ -2415,7 +2402,7 @@ static const struct tty_operations uart_ops = {
.break_ctl = uart_break_ctl,
.wait_until_sent= uart_wait_until_sent,
#ifdef CONFIG_PROC_FS
- .proc_fops = &uart_proc_fops,
+ .proc_show = uart_proc_show,
#endif
.tiocmget = uart_tiocmget,
.tiocmset = uart_tiocmset,
diff --git a/drivers/tty/synclink.c b/drivers/tty/synclink.c
index 3c4ad71f261d..fbdf4d01c6a9 100644
--- a/drivers/tty/synclink.c
+++ b/drivers/tty/synclink.c
@@ -3534,19 +3534,6 @@ static int mgsl_proc_show(struct seq_file *m, void *v)
return 0;
}
-static int mgsl_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, mgsl_proc_show, NULL);
-}
-
-static const struct file_operations mgsl_proc_fops = {
- .owner = THIS_MODULE,
- .open = mgsl_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
/* mgsl_allocate_dma_buffers()
*
* Allocate and format DMA buffers (ISA adapter)
@@ -4298,7 +4285,7 @@ static const struct tty_operations mgsl_ops = {
.tiocmget = tiocmget,
.tiocmset = tiocmset,
.get_icount = msgl_get_icount,
- .proc_fops = &mgsl_proc_fops,
+ .proc_show = mgsl_proc_show,
};
/*
diff --git a/drivers/tty/synclink_gt.c b/drivers/tty/synclink_gt.c
index 255c49687877..a94086597ebd 100644
--- a/drivers/tty/synclink_gt.c
+++ b/drivers/tty/synclink_gt.c
@@ -1316,19 +1316,6 @@ static int synclink_gt_proc_show(struct seq_file *m, void *v)
return 0;
}
-static int synclink_gt_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, synclink_gt_proc_show, NULL);
-}
-
-static const struct file_operations synclink_gt_proc_fops = {
- .owner = THIS_MODULE,
- .open = synclink_gt_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
/*
* return count of bytes in transmit buffer
*/
@@ -3721,7 +3708,7 @@ static const struct tty_operations ops = {
.tiocmget = tiocmget,
.tiocmset = tiocmset,
.get_icount = get_icount,
- .proc_fops = &synclink_gt_proc_fops,
+ .proc_show = synclink_gt_proc_show,
};
static void slgt_cleanup(void)
diff --git a/drivers/tty/synclinkmp.c b/drivers/tty/synclinkmp.c
index 75f11ce1f0a1..1e4d5b9c981a 100644
--- a/drivers/tty/synclinkmp.c
+++ b/drivers/tty/synclinkmp.c
@@ -1421,19 +1421,6 @@ static int synclinkmp_proc_show(struct seq_file *m, void *v)
return 0;
}
-static int synclinkmp_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, synclinkmp_proc_show, NULL);
-}
-
-static const struct file_operations synclinkmp_proc_fops = {
- .owner = THIS_MODULE,
- .open = synclinkmp_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
/* Return the count of bytes in transmit buffer
*/
static int chars_in_buffer(struct tty_struct *tty)
@@ -3899,7 +3886,7 @@ static const struct tty_operations ops = {
.tiocmget = tiocmget,
.tiocmset = tiocmset,
.get_icount = get_icount,
- .proc_fops = &synclinkmp_proc_fops,
+ .proc_show = synclinkmp_proc_show,
};
diff --git a/drivers/tty/tty_ldisc.c b/drivers/tty/tty_ldisc.c
index fb7329ab2b37..fc4c97cae01e 100644
--- a/drivers/tty/tty_ldisc.c
+++ b/drivers/tty/tty_ldisc.c
@@ -229,26 +229,13 @@ static int tty_ldiscs_seq_show(struct seq_file *m, void *v)
return 0;
}
-static const struct seq_operations tty_ldiscs_seq_ops = {
+const struct seq_operations tty_ldiscs_seq_ops = {
.start = tty_ldiscs_seq_start,
.next = tty_ldiscs_seq_next,
.stop = tty_ldiscs_seq_stop,
.show = tty_ldiscs_seq_show,
};
-static int proc_tty_ldiscs_open(struct inode *inode, struct file *file)
-{
- return seq_open(file, &tty_ldiscs_seq_ops);
-}
-
-const struct file_operations tty_ldiscs_proc_fops = {
- .owner = THIS_MODULE,
- .open = proc_tty_ldiscs_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
/**
* tty_ldisc_ref_wait - wait for the tty ldisc
* @tty: tty device
diff --git a/drivers/usb/core/config.c b/drivers/usb/core/config.c
index c821b4b9647e..7b5cb28ffb35 100644
--- a/drivers/usb/core/config.c
+++ b/drivers/usb/core/config.c
@@ -191,7 +191,9 @@ static const unsigned short full_speed_maxpacket_maxes[4] = {
static const unsigned short high_speed_maxpacket_maxes[4] = {
[USB_ENDPOINT_XFER_CONTROL] = 64,
[USB_ENDPOINT_XFER_ISOC] = 1024,
- [USB_ENDPOINT_XFER_BULK] = 512,
+
+ /* Bulk should be 512, but some devices use 1024: we will warn below */
+ [USB_ENDPOINT_XFER_BULK] = 1024,
[USB_ENDPOINT_XFER_INT] = 1024,
};
static const unsigned short super_speed_maxpacket_maxes[4] = {
diff --git a/drivers/usb/dwc2/core.h b/drivers/usb/dwc2/core.h
index d83be5651f87..a666e0758a99 100644
--- a/drivers/usb/dwc2/core.h
+++ b/drivers/usb/dwc2/core.h
@@ -985,6 +985,7 @@ struct dwc2_hsotg {
/* DWC OTG HW Release versions */
#define DWC2_CORE_REV_2_71a 0x4f54271a
+#define DWC2_CORE_REV_2_72a 0x4f54272a
#define DWC2_CORE_REV_2_80a 0x4f54280a
#define DWC2_CORE_REV_2_90a 0x4f54290a
#define DWC2_CORE_REV_2_91a 0x4f54291a
@@ -992,6 +993,7 @@ struct dwc2_hsotg {
#define DWC2_CORE_REV_2_94a 0x4f54294a
#define DWC2_CORE_REV_3_00a 0x4f54300a
#define DWC2_CORE_REV_3_10a 0x4f54310a
+#define DWC2_CORE_REV_4_00a 0x4f54400a
#define DWC2_FS_IOT_REV_1_00a 0x5531100a
#define DWC2_HS_IOT_REV_1_00a 0x5532100a
diff --git a/drivers/usb/dwc2/gadget.c b/drivers/usb/dwc2/gadget.c
index 6c32bf26e48e..83cb5577a52f 100644
--- a/drivers/usb/dwc2/gadget.c
+++ b/drivers/usb/dwc2/gadget.c
@@ -3928,6 +3928,27 @@ static int dwc2_hsotg_ep_enable(struct usb_ep *ep,
if (index && !hs_ep->isochronous)
epctrl |= DXEPCTL_SETD0PID;
+ /* WA for Full speed ISOC IN in DDMA mode.
+ * By Clear NAK status of EP, core will send ZLP
+ * to IN token and assert NAK interrupt relying
+ * on TxFIFO status only
+ */
+
+ if (hsotg->gadget.speed == USB_SPEED_FULL &&
+ hs_ep->isochronous && dir_in) {
+ /* The WA applies only to core versions from 2.72a
+ * to 4.00a (including both). Also for FS_IOT_1.00a
+ * and HS_IOT_1.00a.
+ */
+ u32 gsnpsid = dwc2_readl(hsotg->regs + GSNPSID);
+
+ if ((gsnpsid >= DWC2_CORE_REV_2_72a &&
+ gsnpsid <= DWC2_CORE_REV_4_00a) ||
+ gsnpsid == DWC2_FS_IOT_REV_1_00a ||
+ gsnpsid == DWC2_HS_IOT_REV_1_00a)
+ epctrl |= DXEPCTL_CNAK;
+ }
+
dev_dbg(hsotg->dev, "%s: write DxEPCTL=0x%08x\n",
__func__, epctrl);
diff --git a/drivers/usb/dwc2/hcd.c b/drivers/usb/dwc2/hcd.c
index 190f95964000..c51b73b3e048 100644
--- a/drivers/usb/dwc2/hcd.c
+++ b/drivers/usb/dwc2/hcd.c
@@ -358,9 +358,14 @@ static void dwc2_gusbcfg_init(struct dwc2_hsotg *hsotg)
static int dwc2_vbus_supply_init(struct dwc2_hsotg *hsotg)
{
+ int ret;
+
hsotg->vbus_supply = devm_regulator_get_optional(hsotg->dev, "vbus");
- if (IS_ERR(hsotg->vbus_supply))
- return 0;
+ if (IS_ERR(hsotg->vbus_supply)) {
+ ret = PTR_ERR(hsotg->vbus_supply);
+ hsotg->vbus_supply = NULL;
+ return ret == -ENODEV ? 0 : ret;
+ }
return regulator_enable(hsotg->vbus_supply);
}
@@ -4342,9 +4347,7 @@ static int _dwc2_hcd_start(struct usb_hcd *hcd)
spin_unlock_irqrestore(&hsotg->lock, flags);
- dwc2_vbus_supply_init(hsotg);
-
- return 0;
+ return dwc2_vbus_supply_init(hsotg);
}
/*
diff --git a/drivers/usb/dwc2/pci.c b/drivers/usb/dwc2/pci.c
index 7f21747007f1..bea2e8ec0369 100644
--- a/drivers/usb/dwc2/pci.c
+++ b/drivers/usb/dwc2/pci.c
@@ -141,8 +141,10 @@ static int dwc2_pci_probe(struct pci_dev *pci,
goto err;
glue = devm_kzalloc(dev, sizeof(*glue), GFP_KERNEL);
- if (!glue)
+ if (!glue) {
+ ret = -ENOMEM;
goto err;
+ }
ret = platform_device_add(dwc2);
if (ret) {
diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index 8796a5ee9bb9..0dedf8a799f4 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -166,7 +166,7 @@ static void dwc3_ep_inc_deq(struct dwc3_ep *dep)
dwc3_ep_inc_trb(&dep->trb_dequeue);
}
-void dwc3_gadget_del_and_unmap_request(struct dwc3_ep *dep,
+static void dwc3_gadget_del_and_unmap_request(struct dwc3_ep *dep,
struct dwc3_request *req, int status)
{
struct dwc3 *dwc = dep->dwc;
@@ -1424,7 +1424,7 @@ static int dwc3_gadget_ep_dequeue(struct usb_ep *ep,
dwc->lock);
if (!r->trb)
- goto out1;
+ goto out0;
if (r->num_pending_sgs) {
struct dwc3_trb *trb;
diff --git a/drivers/usb/gadget/function/f_phonet.c b/drivers/usb/gadget/function/f_phonet.c
index 7889bcc0509a..8b72b192c747 100644
--- a/drivers/usb/gadget/function/f_phonet.c
+++ b/drivers/usb/gadget/function/f_phonet.c
@@ -221,7 +221,7 @@ static void pn_tx_complete(struct usb_ep *ep, struct usb_request *req)
netif_wake_queue(dev);
}
-static int pn_net_xmit(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t pn_net_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct phonet_port *port = netdev_priv(dev);
struct f_phonet *fp;
diff --git a/drivers/usb/gadget/udc/at91_udc.c b/drivers/usb/gadget/udc/at91_udc.c
index ad743a8493be..03959dc86cfd 100644
--- a/drivers/usb/gadget/udc/at91_udc.c
+++ b/drivers/usb/gadget/udc/at91_udc.c
@@ -234,22 +234,10 @@ static int proc_udc_show(struct seq_file *s, void *unused)
return 0;
}
-static int proc_udc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, proc_udc_show, PDE_DATA(inode));
-}
-
-static const struct file_operations proc_ops = {
- .owner = THIS_MODULE,
- .open = proc_udc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
static void create_debug_file(struct at91_udc *udc)
{
- udc->pde = proc_create_data(debug_filename, 0, NULL, &proc_ops, udc);
+ udc->pde = proc_create_single_data(debug_filename, 0, NULL,
+ proc_udc_show, udc);
}
static void remove_debug_file(struct at91_udc *udc)
diff --git a/drivers/usb/gadget/udc/fsl_udc_core.c b/drivers/usb/gadget/udc/fsl_udc_core.c
index 56b517a38865..7d8af299dfc7 100644
--- a/drivers/usb/gadget/udc/fsl_udc_core.c
+++ b/drivers/usb/gadget/udc/fsl_udc_core.c
@@ -2207,22 +2207,8 @@ static int fsl_proc_read(struct seq_file *m, void *v)
return 0;
}
-/*
- * seq_file wrappers for procfile show routines.
- */
-static int fsl_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, fsl_proc_read, NULL);
-}
-
-static const struct file_operations fsl_proc_fops = {
- .open = fsl_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
-#define create_proc_file() proc_create(proc_filename, 0, NULL, &fsl_proc_fops)
+#define create_proc_file() \
+ proc_create_single(proc_filename, 0, NULL, fsl_proc_read)
#define remove_proc_file() remove_proc_entry(proc_filename, NULL)
#else /* !CONFIG_USB_GADGET_DEBUG_FILES */
diff --git a/drivers/usb/gadget/udc/goku_udc.c b/drivers/usb/gadget/udc/goku_udc.c
index 4504d0b202db..c3721225b61e 100644
--- a/drivers/usb/gadget/udc/goku_udc.c
+++ b/drivers/usb/gadget/udc/goku_udc.c
@@ -1241,22 +1241,6 @@ done:
local_irq_restore(flags);
return 0;
}
-
-/*
- * seq_file wrappers for procfile show routines.
- */
-static int udc_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, udc_proc_read, PDE_DATA(file_inode(file)));
-}
-
-static const struct file_operations udc_proc_fops = {
- .open = udc_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
#endif /* CONFIG_USB_GADGET_DEBUG_FILES */
/*-------------------------------------------------------------------------*/
@@ -1826,7 +1810,7 @@ static int goku_probe(struct pci_dev *pdev, const struct pci_device_id *id)
#ifdef CONFIG_USB_GADGET_DEBUG_FILES
- proc_create_data(proc_node_name, 0, NULL, &udc_proc_fops, dev);
+ proc_create_single_data(proc_node_name, 0, NULL, udc_proc_read, dev);
#endif
retval = usb_add_gadget_udc_release(&pdev->dev, &dev->gadget,
diff --git a/drivers/usb/gadget/udc/omap_udc.c b/drivers/usb/gadget/udc/omap_udc.c
index dc35a54bad90..3a16431da321 100644
--- a/drivers/usb/gadget/udc/omap_udc.c
+++ b/drivers/usb/gadget/udc/omap_udc.c
@@ -2432,22 +2432,9 @@ static int proc_udc_show(struct seq_file *s, void *_)
return 0;
}
-static int proc_udc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, proc_udc_show, NULL);
-}
-
-static const struct file_operations proc_ops = {
- .owner = THIS_MODULE,
- .open = proc_udc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
static void create_proc_file(void)
{
- proc_create(proc_filename, 0, NULL, &proc_ops);
+ proc_create_single(proc_filename, 0, NULL, proc_udc_show);
}
static void remove_proc_file(void)
diff --git a/drivers/usb/host/ehci-mem.c b/drivers/usb/host/ehci-mem.c
index 4c6c08b675b5..21307d862af6 100644
--- a/drivers/usb/host/ehci-mem.c
+++ b/drivers/usb/host/ehci-mem.c
@@ -73,9 +73,10 @@ static struct ehci_qh *ehci_qh_alloc (struct ehci_hcd *ehci, gfp_t flags)
if (!qh)
goto done;
qh->hw = (struct ehci_qh_hw *)
- dma_pool_zalloc(ehci->qh_pool, flags, &dma);
+ dma_pool_alloc(ehci->qh_pool, flags, &dma);
if (!qh->hw)
goto fail;
+ memset(qh->hw, 0, sizeof *qh->hw);
qh->qh_dma = dma;
// INIT_LIST_HEAD (&qh->qh_list);
INIT_LIST_HEAD (&qh->qtd_list);
diff --git a/drivers/usb/host/ehci-sched.c b/drivers/usb/host/ehci-sched.c
index 28e2a338b481..e56db44708bc 100644
--- a/drivers/usb/host/ehci-sched.c
+++ b/drivers/usb/host/ehci-sched.c
@@ -1287,7 +1287,7 @@ itd_urb_transaction(
} else {
alloc_itd:
spin_unlock_irqrestore(&ehci->lock, flags);
- itd = dma_pool_zalloc(ehci->itd_pool, mem_flags,
+ itd = dma_pool_alloc(ehci->itd_pool, mem_flags,
&itd_dma);
spin_lock_irqsave(&ehci->lock, flags);
if (!itd) {
@@ -1297,6 +1297,7 @@ itd_urb_transaction(
}
}
+ memset(itd, 0, sizeof(*itd));
itd->itd_dma = itd_dma;
itd->frame = NO_FRAME;
list_add(&itd->itd_list, &sched->td_list);
@@ -2080,7 +2081,7 @@ sitd_urb_transaction(
} else {
alloc_sitd:
spin_unlock_irqrestore(&ehci->lock, flags);
- sitd = dma_pool_zalloc(ehci->sitd_pool, mem_flags,
+ sitd = dma_pool_alloc(ehci->sitd_pool, mem_flags,
&sitd_dma);
spin_lock_irqsave(&ehci->lock, flags);
if (!sitd) {
@@ -2090,6 +2091,7 @@ sitd_urb_transaction(
}
}
+ memset(sitd, 0, sizeof(*sitd));
sitd->sitd_dma = sitd_dma;
sitd->frame = NO_FRAME;
list_add(&sitd->sitd_list, &iso_sched->td_list);
diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c
index 72ebbc908e19..32cd52ca8318 100644
--- a/drivers/usb/host/xhci-hub.c
+++ b/drivers/usb/host/xhci-hub.c
@@ -354,7 +354,7 @@ int xhci_find_slot_id_by_port(struct usb_hcd *hcd, struct xhci_hcd *xhci,
slot_id = 0;
for (i = 0; i < MAX_HC_SLOTS; i++) {
- if (!xhci->devs[i])
+ if (!xhci->devs[i] || !xhci->devs[i]->udev)
continue;
speed = xhci->devs[i]->udev->speed;
if (((speed >= USB_SPEED_SUPER) == (hcd->speed >= HCD_USB3))
diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c
index 9b27798ecce5..711da3306b14 100644
--- a/drivers/usb/host/xhci.c
+++ b/drivers/usb/host/xhci.c
@@ -3621,6 +3621,7 @@ static void xhci_free_dev(struct usb_hcd *hcd, struct usb_device *udev)
del_timer_sync(&virt_dev->eps[i].stop_cmd_timer);
}
xhci_debugfs_remove_slot(xhci, udev->slot_id);
+ virt_dev->udev = NULL;
ret = xhci_disable_slot(xhci, udev->slot_id);
if (ret)
xhci_free_virt_device(xhci, udev->slot_id);
diff --git a/drivers/usb/musb/musb_gadget.c b/drivers/usb/musb/musb_gadget.c
index e564695c6c8d..71c5835ea9cd 100644
--- a/drivers/usb/musb/musb_gadget.c
+++ b/drivers/usb/musb/musb_gadget.c
@@ -417,7 +417,6 @@ void musb_g_tx(struct musb *musb, u8 epnum)
req = next_request(musb_ep);
request = &req->request;
- trace_musb_req_tx(req);
csr = musb_readw(epio, MUSB_TXCSR);
musb_dbg(musb, "<== %s, txcsr %04x", musb_ep->end_point.name, csr);
@@ -456,6 +455,8 @@ void musb_g_tx(struct musb *musb, u8 epnum)
u8 is_dma = 0;
bool short_packet = false;
+ trace_musb_req_tx(req);
+
if (dma && (csr & MUSB_TXCSR_DMAENAB)) {
is_dma = 1;
csr |= MUSB_TXCSR_P_WZC_BITS;
diff --git a/drivers/usb/musb/musb_host.c b/drivers/usb/musb/musb_host.c
index 4fa372c845e1..15a42cee0a9c 100644
--- a/drivers/usb/musb/musb_host.c
+++ b/drivers/usb/musb/musb_host.c
@@ -990,7 +990,9 @@ static void musb_bulk_nak_timeout(struct musb *musb, struct musb_hw_ep *ep,
/* set tx_reinit and schedule the next qh */
ep->tx_reinit = 1;
}
- musb_start_urb(musb, is_in, next_qh);
+
+ if (next_qh)
+ musb_start_urb(musb, is_in, next_qh);
}
}
@@ -2522,8 +2524,11 @@ static int musb_bus_suspend(struct usb_hcd *hcd)
{
struct musb *musb = hcd_to_musb(hcd);
u8 devctl;
+ int ret;
- musb_port_suspend(musb, true);
+ ret = musb_port_suspend(musb, true);
+ if (ret)
+ return ret;
if (!is_host_active(musb))
return 0;
diff --git a/drivers/usb/musb/musb_host.h b/drivers/usb/musb/musb_host.h
index 72392bbcd0a4..2999845632ce 100644
--- a/drivers/usb/musb/musb_host.h
+++ b/drivers/usb/musb/musb_host.h
@@ -67,7 +67,7 @@ extern void musb_host_rx(struct musb *, u8);
extern void musb_root_disconnect(struct musb *musb);
extern void musb_host_resume_root_hub(struct musb *musb);
extern void musb_host_poke_root_hub(struct musb *musb);
-extern void musb_port_suspend(struct musb *musb, bool do_suspend);
+extern int musb_port_suspend(struct musb *musb, bool do_suspend);
extern void musb_port_reset(struct musb *musb, bool do_reset);
extern void musb_host_finish_resume(struct work_struct *work);
#else
@@ -99,7 +99,10 @@ static inline void musb_root_disconnect(struct musb *musb) {}
static inline void musb_host_resume_root_hub(struct musb *musb) {}
static inline void musb_host_poll_rh_status(struct musb *musb) {}
static inline void musb_host_poke_root_hub(struct musb *musb) {}
-static inline void musb_port_suspend(struct musb *musb, bool do_suspend) {}
+static inline int musb_port_suspend(struct musb *musb, bool do_suspend)
+{
+ return 0;
+}
static inline void musb_port_reset(struct musb *musb, bool do_reset) {}
static inline void musb_host_finish_resume(struct work_struct *work) {}
#endif
diff --git a/drivers/usb/musb/musb_virthub.c b/drivers/usb/musb/musb_virthub.c
index 5165d2b07ade..2f8dd9826e94 100644
--- a/drivers/usb/musb/musb_virthub.c
+++ b/drivers/usb/musb/musb_virthub.c
@@ -48,14 +48,14 @@ void musb_host_finish_resume(struct work_struct *work)
spin_unlock_irqrestore(&musb->lock, flags);
}
-void musb_port_suspend(struct musb *musb, bool do_suspend)
+int musb_port_suspend(struct musb *musb, bool do_suspend)
{
struct usb_otg *otg = musb->xceiv->otg;
u8 power;
void __iomem *mbase = musb->mregs;
if (!is_host_active(musb))
- return;
+ return 0;
/* NOTE: this doesn't necessarily put PHY into low power mode,
* turning off its clock; that's a function of PHY integration and
@@ -66,16 +66,20 @@ void musb_port_suspend(struct musb *musb, bool do_suspend)
if (do_suspend) {
int retries = 10000;
- power &= ~MUSB_POWER_RESUME;
- power |= MUSB_POWER_SUSPENDM;
- musb_writeb(mbase, MUSB_POWER, power);
+ if (power & MUSB_POWER_RESUME)
+ return -EBUSY;
- /* Needed for OPT A tests */
- power = musb_readb(mbase, MUSB_POWER);
- while (power & MUSB_POWER_SUSPENDM) {
+ if (!(power & MUSB_POWER_SUSPENDM)) {
+ power |= MUSB_POWER_SUSPENDM;
+ musb_writeb(mbase, MUSB_POWER, power);
+
+ /* Needed for OPT A tests */
power = musb_readb(mbase, MUSB_POWER);
- if (retries-- < 1)
- break;
+ while (power & MUSB_POWER_SUSPENDM) {
+ power = musb_readb(mbase, MUSB_POWER);
+ if (retries-- < 1)
+ break;
+ }
}
musb_dbg(musb, "Root port suspended, power %02x", power);
@@ -111,6 +115,7 @@ void musb_port_suspend(struct musb *musb, bool do_suspend)
schedule_delayed_work(&musb->finish_resume_work,
msecs_to_jiffies(USB_RESUME_TIMEOUT));
}
+ return 0;
}
void musb_port_reset(struct musb *musb, bool do_reset)
diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c
index c3f252283ab9..2058852a87fa 100644
--- a/drivers/usb/serial/option.c
+++ b/drivers/usb/serial/option.c
@@ -233,6 +233,8 @@ static void option_instat_callback(struct urb *urb);
/* These Quectel products use Qualcomm's vendor ID */
#define QUECTEL_PRODUCT_UC20 0x9003
#define QUECTEL_PRODUCT_UC15 0x9090
+/* These u-blox products use Qualcomm's vendor ID */
+#define UBLOX_PRODUCT_R410M 0x90b2
/* These Yuga products use Qualcomm's vendor ID */
#define YUGA_PRODUCT_CLM920_NC5 0x9625
@@ -1065,6 +1067,9 @@ static const struct usb_device_id option_ids[] = {
/* Yuga products use Qualcomm vendor ID */
{ USB_DEVICE(QUALCOMM_VENDOR_ID, YUGA_PRODUCT_CLM920_NC5),
.driver_info = RSVD(1) | RSVD(4) },
+ /* u-blox products using Qualcomm vendor ID */
+ { USB_DEVICE(QUALCOMM_VENDOR_ID, UBLOX_PRODUCT_R410M),
+ .driver_info = RSVD(1) | RSVD(3) },
/* Quectel products using Quectel vendor ID */
{ USB_DEVICE(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EC21),
.driver_info = RSVD(4) },
diff --git a/drivers/usb/serial/usb-serial.c b/drivers/usb/serial/usb-serial.c
index 790e0cbe3da9..268ffa6b51d2 100644
--- a/drivers/usb/serial/usb-serial.c
+++ b/drivers/usb/serial/usb-serial.c
@@ -476,19 +476,6 @@ static int serial_proc_show(struct seq_file *m, void *v)
return 0;
}
-static int serial_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, serial_proc_show, NULL);
-}
-
-static const struct file_operations serial_proc_fops = {
- .owner = THIS_MODULE,
- .open = serial_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
static int serial_tiocmget(struct tty_struct *tty)
{
struct usb_serial_port *port = tty->driver_data;
@@ -1192,7 +1179,7 @@ static const struct tty_operations serial_ops = {
.get_icount = serial_get_icount,
.cleanup = serial_cleanup,
.install = serial_install,
- .proc_fops = &serial_proc_fops,
+ .proc_show = serial_proc_show,
};
diff --git a/drivers/usb/serial/visor.c b/drivers/usb/serial/visor.c
index f5373ed2cd45..8ddbecc25d89 100644
--- a/drivers/usb/serial/visor.c
+++ b/drivers/usb/serial/visor.c
@@ -335,47 +335,48 @@ static int palm_os_3_probe(struct usb_serial *serial,
goto exit;
}
- if (retval == sizeof(*connection_info)) {
- connection_info = (struct visor_connection_info *)
- transfer_buffer;
-
- num_ports = le16_to_cpu(connection_info->num_ports);
- for (i = 0; i < num_ports; ++i) {
- switch (
- connection_info->connections[i].port_function_id) {
- case VISOR_FUNCTION_GENERIC:
- string = "Generic";
- break;
- case VISOR_FUNCTION_DEBUGGER:
- string = "Debugger";
- break;
- case VISOR_FUNCTION_HOTSYNC:
- string = "HotSync";
- break;
- case VISOR_FUNCTION_CONSOLE:
- string = "Console";
- break;
- case VISOR_FUNCTION_REMOTE_FILE_SYS:
- string = "Remote File System";
- break;
- default:
- string = "unknown";
- break;
- }
- dev_info(dev, "%s: port %d, is for %s use\n",
- serial->type->description,
- connection_info->connections[i].port, string);
- }
+ if (retval != sizeof(*connection_info)) {
+ dev_err(dev, "Invalid connection information received from device\n");
+ retval = -ENODEV;
+ goto exit;
}
- /*
- * Handle devices that report invalid stuff here.
- */
+
+ connection_info = (struct visor_connection_info *)transfer_buffer;
+
+ num_ports = le16_to_cpu(connection_info->num_ports);
+
+ /* Handle devices that report invalid stuff here. */
if (num_ports == 0 || num_ports > 2) {
dev_warn(dev, "%s: No valid connect info available\n",
serial->type->description);
num_ports = 2;
}
+ for (i = 0; i < num_ports; ++i) {
+ switch (connection_info->connections[i].port_function_id) {
+ case VISOR_FUNCTION_GENERIC:
+ string = "Generic";
+ break;
+ case VISOR_FUNCTION_DEBUGGER:
+ string = "Debugger";
+ break;
+ case VISOR_FUNCTION_HOTSYNC:
+ string = "HotSync";
+ break;
+ case VISOR_FUNCTION_CONSOLE:
+ string = "Console";
+ break;
+ case VISOR_FUNCTION_REMOTE_FILE_SYS:
+ string = "Remote File System";
+ break;
+ default:
+ string = "unknown";
+ break;
+ }
+ dev_info(dev, "%s: port %d, is for %s use\n",
+ serial->type->description,
+ connection_info->connections[i].port, string);
+ }
dev_info(dev, "%s: Number of ports: %d\n", serial->type->description,
num_ports);
diff --git a/drivers/usb/typec/tcpm.c b/drivers/usb/typec/tcpm.c
index 677d12138dbd..ded49e3bf2b0 100644
--- a/drivers/usb/typec/tcpm.c
+++ b/drivers/usb/typec/tcpm.c
@@ -3725,6 +3725,7 @@ void tcpm_unregister_port(struct tcpm_port *port)
for (i = 0; i < ARRAY_SIZE(port->port_altmode); i++)
typec_unregister_altmode(port->port_altmode[i]);
typec_unregister_port(port->typec_port);
+ usb_role_switch_put(port->role_sw);
tcpm_debugfs_exit(port);
destroy_workqueue(port->wq);
}
diff --git a/drivers/usb/typec/tps6598x.c b/drivers/usb/typec/tps6598x.c
index 8b8406867c02..4b4c8d271b27 100644
--- a/drivers/usb/typec/tps6598x.c
+++ b/drivers/usb/typec/tps6598x.c
@@ -73,6 +73,7 @@ struct tps6598x {
struct device *dev;
struct regmap *regmap;
struct mutex lock; /* device lock */
+ u8 i2c_protocol:1;
struct typec_port *port;
struct typec_partner *partner;
@@ -80,19 +81,39 @@ struct tps6598x {
struct typec_capability typec_cap;
};
+static int
+tps6598x_block_read(struct tps6598x *tps, u8 reg, void *val, size_t len)
+{
+ u8 data[len + 1];
+ int ret;
+
+ if (!tps->i2c_protocol)
+ return regmap_raw_read(tps->regmap, reg, val, len);
+
+ ret = regmap_raw_read(tps->regmap, reg, data, sizeof(data));
+ if (ret)
+ return ret;
+
+ if (data[0] < len)
+ return -EIO;
+
+ memcpy(val, &data[1], len);
+ return 0;
+}
+
static inline int tps6598x_read16(struct tps6598x *tps, u8 reg, u16 *val)
{
- return regmap_raw_read(tps->regmap, reg, val, sizeof(u16));
+ return tps6598x_block_read(tps, reg, val, sizeof(u16));
}
static inline int tps6598x_read32(struct tps6598x *tps, u8 reg, u32 *val)
{
- return regmap_raw_read(tps->regmap, reg, val, sizeof(u32));
+ return tps6598x_block_read(tps, reg, val, sizeof(u32));
}
static inline int tps6598x_read64(struct tps6598x *tps, u8 reg, u64 *val)
{
- return regmap_raw_read(tps->regmap, reg, val, sizeof(u64));
+ return tps6598x_block_read(tps, reg, val, sizeof(u64));
}
static inline int tps6598x_write16(struct tps6598x *tps, u8 reg, u16 val)
@@ -121,8 +142,8 @@ static int tps6598x_read_partner_identity(struct tps6598x *tps)
struct tps6598x_rx_identity_reg id;
int ret;
- ret = regmap_raw_read(tps->regmap, TPS_REG_RX_IDENTITY_SOP,
- &id, sizeof(id));
+ ret = tps6598x_block_read(tps, TPS_REG_RX_IDENTITY_SOP,
+ &id, sizeof(id));
if (ret)
return ret;
@@ -224,13 +245,13 @@ static int tps6598x_exec_cmd(struct tps6598x *tps, const char *cmd,
} while (val);
if (out_len) {
- ret = regmap_raw_read(tps->regmap, TPS_REG_DATA1,
- out_data, out_len);
+ ret = tps6598x_block_read(tps, TPS_REG_DATA1,
+ out_data, out_len);
if (ret)
return ret;
val = out_data[0];
} else {
- ret = regmap_read(tps->regmap, TPS_REG_DATA1, &val);
+ ret = tps6598x_block_read(tps, TPS_REG_DATA1, &val, sizeof(u8));
if (ret)
return ret;
}
@@ -385,6 +406,16 @@ static int tps6598x_probe(struct i2c_client *client)
if (!vid)
return -ENODEV;
+ /*
+ * Checking can the adapter handle SMBus protocol. If it can not, the
+ * driver needs to take care of block reads separately.
+ *
+ * FIXME: Testing with I2C_FUNC_I2C. regmap-i2c uses I2C protocol
+ * unconditionally if the adapter has I2C_FUNC_I2C set.
+ */
+ if (i2c_check_functionality(client->adapter, I2C_FUNC_I2C))
+ tps->i2c_protocol = true;
+
ret = tps6598x_read32(tps, TPS_REG_STATUS, &status);
if (ret < 0)
return ret;
diff --git a/drivers/usb/usbip/stub.h b/drivers/usb/usbip/stub.h
index 14a72357800a..35618ceb2791 100644
--- a/drivers/usb/usbip/stub.h
+++ b/drivers/usb/usbip/stub.h
@@ -73,6 +73,7 @@ struct bus_id_priv {
struct stub_device *sdev;
struct usb_device *udev;
char shutdown_busid;
+ spinlock_t busid_lock;
};
/* stub_priv is allocated from stub_priv_cache */
@@ -83,6 +84,7 @@ extern struct usb_device_driver stub_driver;
/* stub_main.c */
struct bus_id_priv *get_busid_priv(const char *busid);
+void put_busid_priv(struct bus_id_priv *bid);
int del_match_busid(char *busid);
void stub_device_cleanup_urbs(struct stub_device *sdev);
diff --git a/drivers/usb/usbip/stub_dev.c b/drivers/usb/usbip/stub_dev.c
index dd8ef36ab10e..c0d6ff1baa72 100644
--- a/drivers/usb/usbip/stub_dev.c
+++ b/drivers/usb/usbip/stub_dev.c
@@ -300,9 +300,9 @@ static int stub_probe(struct usb_device *udev)
struct stub_device *sdev = NULL;
const char *udev_busid = dev_name(&udev->dev);
struct bus_id_priv *busid_priv;
- int rc;
+ int rc = 0;
- dev_dbg(&udev->dev, "Enter\n");
+ dev_dbg(&udev->dev, "Enter probe\n");
/* check we should claim or not by busid_table */
busid_priv = get_busid_priv(udev_busid);
@@ -317,13 +317,15 @@ static int stub_probe(struct usb_device *udev)
* other matched drivers by the driver core.
* See driver_probe_device() in driver/base/dd.c
*/
- return -ENODEV;
+ rc = -ENODEV;
+ goto call_put_busid_priv;
}
if (udev->descriptor.bDeviceClass == USB_CLASS_HUB) {
dev_dbg(&udev->dev, "%s is a usb hub device... skip!\n",
udev_busid);
- return -ENODEV;
+ rc = -ENODEV;
+ goto call_put_busid_priv;
}
if (!strcmp(udev->bus->bus_name, "vhci_hcd")) {
@@ -331,13 +333,16 @@ static int stub_probe(struct usb_device *udev)
"%s is attached on vhci_hcd... skip!\n",
udev_busid);
- return -ENODEV;
+ rc = -ENODEV;
+ goto call_put_busid_priv;
}
/* ok, this is my device */
sdev = stub_device_alloc(udev);
- if (!sdev)
- return -ENOMEM;
+ if (!sdev) {
+ rc = -ENOMEM;
+ goto call_put_busid_priv;
+ }
dev_info(&udev->dev,
"usbip-host: register new device (bus %u dev %u)\n",
@@ -369,7 +374,9 @@ static int stub_probe(struct usb_device *udev)
}
busid_priv->status = STUB_BUSID_ALLOC;
- return 0;
+ rc = 0;
+ goto call_put_busid_priv;
+
err_files:
usb_hub_release_port(udev->parent, udev->portnum,
(struct usb_dev_state *) udev);
@@ -379,6 +386,9 @@ err_port:
busid_priv->sdev = NULL;
stub_device_free(sdev);
+
+call_put_busid_priv:
+ put_busid_priv(busid_priv);
return rc;
}
@@ -404,7 +414,7 @@ static void stub_disconnect(struct usb_device *udev)
struct bus_id_priv *busid_priv;
int rc;
- dev_dbg(&udev->dev, "Enter\n");
+ dev_dbg(&udev->dev, "Enter disconnect\n");
busid_priv = get_busid_priv(udev_busid);
if (!busid_priv) {
@@ -417,7 +427,7 @@ static void stub_disconnect(struct usb_device *udev)
/* get stub_device */
if (!sdev) {
dev_err(&udev->dev, "could not get device");
- return;
+ goto call_put_busid_priv;
}
dev_set_drvdata(&udev->dev, NULL);
@@ -432,12 +442,12 @@ static void stub_disconnect(struct usb_device *udev)
(struct usb_dev_state *) udev);
if (rc) {
dev_dbg(&udev->dev, "unable to release port\n");
- return;
+ goto call_put_busid_priv;
}
/* If usb reset is called from event handler */
if (usbip_in_eh(current))
- return;
+ goto call_put_busid_priv;
/* shutdown the current connection */
shutdown_busid(busid_priv);
@@ -448,12 +458,11 @@ static void stub_disconnect(struct usb_device *udev)
busid_priv->sdev = NULL;
stub_device_free(sdev);
- if (busid_priv->status == STUB_BUSID_ALLOC) {
+ if (busid_priv->status == STUB_BUSID_ALLOC)
busid_priv->status = STUB_BUSID_ADDED;
- } else {
- busid_priv->status = STUB_BUSID_OTHER;
- del_match_busid((char *)udev_busid);
- }
+
+call_put_busid_priv:
+ put_busid_priv(busid_priv);
}
#ifdef CONFIG_PM
diff --git a/drivers/usb/usbip/stub_main.c b/drivers/usb/usbip/stub_main.c
index d41d0cdeec0f..bf8a5feb0ee9 100644
--- a/drivers/usb/usbip/stub_main.c
+++ b/drivers/usb/usbip/stub_main.c
@@ -14,6 +14,7 @@
#define DRIVER_DESC "USB/IP Host Driver"
struct kmem_cache *stub_priv_cache;
+
/*
* busid_tables defines matching busids that usbip can grab. A user can change
* dynamically what device is locally used and what device is exported to a
@@ -25,6 +26,8 @@ static spinlock_t busid_table_lock;
static void init_busid_table(void)
{
+ int i;
+
/*
* This also sets the bus_table[i].status to
* STUB_BUSID_OTHER, which is 0.
@@ -32,6 +35,9 @@ static void init_busid_table(void)
memset(busid_table, 0, sizeof(busid_table));
spin_lock_init(&busid_table_lock);
+
+ for (i = 0; i < MAX_BUSID; i++)
+ spin_lock_init(&busid_table[i].busid_lock);
}
/*
@@ -43,15 +49,20 @@ static int get_busid_idx(const char *busid)
int i;
int idx = -1;
- for (i = 0; i < MAX_BUSID; i++)
+ for (i = 0; i < MAX_BUSID; i++) {
+ spin_lock(&busid_table[i].busid_lock);
if (busid_table[i].name[0])
if (!strncmp(busid_table[i].name, busid, BUSID_SIZE)) {
idx = i;
+ spin_unlock(&busid_table[i].busid_lock);
break;
}
+ spin_unlock(&busid_table[i].busid_lock);
+ }
return idx;
}
+/* Returns holding busid_lock. Should call put_busid_priv() to unlock */
struct bus_id_priv *get_busid_priv(const char *busid)
{
int idx;
@@ -59,13 +70,22 @@ struct bus_id_priv *get_busid_priv(const char *busid)
spin_lock(&busid_table_lock);
idx = get_busid_idx(busid);
- if (idx >= 0)
+ if (idx >= 0) {
bid = &(busid_table[idx]);
+ /* get busid_lock before returning */
+ spin_lock(&bid->busid_lock);
+ }
spin_unlock(&busid_table_lock);
return bid;
}
+void put_busid_priv(struct bus_id_priv *bid)
+{
+ if (bid)
+ spin_unlock(&bid->busid_lock);
+}
+
static int add_match_busid(char *busid)
{
int i;
@@ -78,15 +98,19 @@ static int add_match_busid(char *busid)
goto out;
}
- for (i = 0; i < MAX_BUSID; i++)
+ for (i = 0; i < MAX_BUSID; i++) {
+ spin_lock(&busid_table[i].busid_lock);
if (!busid_table[i].name[0]) {
strlcpy(busid_table[i].name, busid, BUSID_SIZE);
if ((busid_table[i].status != STUB_BUSID_ALLOC) &&
(busid_table[i].status != STUB_BUSID_REMOV))
busid_table[i].status = STUB_BUSID_ADDED;
ret = 0;
+ spin_unlock(&busid_table[i].busid_lock);
break;
}
+ spin_unlock(&busid_table[i].busid_lock);
+ }
out:
spin_unlock(&busid_table_lock);
@@ -107,6 +131,8 @@ int del_match_busid(char *busid)
/* found */
ret = 0;
+ spin_lock(&busid_table[idx].busid_lock);
+
if (busid_table[idx].status == STUB_BUSID_OTHER)
memset(busid_table[idx].name, 0, BUSID_SIZE);
@@ -114,6 +140,7 @@ int del_match_busid(char *busid)
(busid_table[idx].status != STUB_BUSID_ADDED))
busid_table[idx].status = STUB_BUSID_REMOV;
+ spin_unlock(&busid_table[idx].busid_lock);
out:
spin_unlock(&busid_table_lock);
@@ -126,9 +153,12 @@ static ssize_t match_busid_show(struct device_driver *drv, char *buf)
char *out = buf;
spin_lock(&busid_table_lock);
- for (i = 0; i < MAX_BUSID; i++)
+ for (i = 0; i < MAX_BUSID; i++) {
+ spin_lock(&busid_table[i].busid_lock);
if (busid_table[i].name[0])
out += sprintf(out, "%s ", busid_table[i].name);
+ spin_unlock(&busid_table[i].busid_lock);
+ }
spin_unlock(&busid_table_lock);
out += sprintf(out, "\n");
@@ -169,6 +199,51 @@ static ssize_t match_busid_store(struct device_driver *dev, const char *buf,
}
static DRIVER_ATTR_RW(match_busid);
+static int do_rebind(char *busid, struct bus_id_priv *busid_priv)
+{
+ int ret;
+
+ /* device_attach() callers should hold parent lock for USB */
+ if (busid_priv->udev->dev.parent)
+ device_lock(busid_priv->udev->dev.parent);
+ ret = device_attach(&busid_priv->udev->dev);
+ if (busid_priv->udev->dev.parent)
+ device_unlock(busid_priv->udev->dev.parent);
+ if (ret < 0) {
+ dev_err(&busid_priv->udev->dev, "rebind failed\n");
+ return ret;
+ }
+ return 0;
+}
+
+static void stub_device_rebind(void)
+{
+#if IS_MODULE(CONFIG_USBIP_HOST)
+ struct bus_id_priv *busid_priv;
+ int i;
+
+ /* update status to STUB_BUSID_OTHER so probe ignores the device */
+ spin_lock(&busid_table_lock);
+ for (i = 0; i < MAX_BUSID; i++) {
+ if (busid_table[i].name[0] &&
+ busid_table[i].shutdown_busid) {
+ busid_priv = &(busid_table[i]);
+ busid_priv->status = STUB_BUSID_OTHER;
+ }
+ }
+ spin_unlock(&busid_table_lock);
+
+ /* now run rebind - no need to hold locks. driver files are removed */
+ for (i = 0; i < MAX_BUSID; i++) {
+ if (busid_table[i].name[0] &&
+ busid_table[i].shutdown_busid) {
+ busid_priv = &(busid_table[i]);
+ do_rebind(busid_table[i].name, busid_priv);
+ }
+ }
+#endif
+}
+
static ssize_t rebind_store(struct device_driver *dev, const char *buf,
size_t count)
{
@@ -186,16 +261,17 @@ static ssize_t rebind_store(struct device_driver *dev, const char *buf,
if (!bid)
return -ENODEV;
- /* device_attach() callers should hold parent lock for USB */
- if (bid->udev->dev.parent)
- device_lock(bid->udev->dev.parent);
- ret = device_attach(&bid->udev->dev);
- if (bid->udev->dev.parent)
- device_unlock(bid->udev->dev.parent);
- if (ret < 0) {
- dev_err(&bid->udev->dev, "rebind failed\n");
+ /* mark the device for deletion so probe ignores it during rescan */
+ bid->status = STUB_BUSID_OTHER;
+ /* release the busid lock */
+ put_busid_priv(bid);
+
+ ret = do_rebind((char *) buf, bid);
+ if (ret < 0)
return ret;
- }
+
+ /* delete device from busid_table */
+ del_match_busid((char *) buf);
return count;
}
@@ -317,6 +393,9 @@ static void __exit usbip_host_exit(void)
*/
usb_deregister_device_driver(&stub_driver);
+ /* initiate scan to attach devices */
+ stub_device_rebind();
+
kmem_cache_destroy(stub_priv_cache);
}
diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index 5c212bf29640..3c082451ab1a 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -404,6 +404,7 @@ static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr,
{
unsigned long pfn = 0;
long ret, pinned = 0, lock_acct = 0;
+ bool rsvd;
dma_addr_t iova = vaddr - dma->vaddr + dma->iova;
/* This code path is only user initiated */
@@ -414,23 +415,14 @@ static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr,
if (ret)
return ret;
- if (is_invalid_reserved_pfn(*pfn_base)) {
- struct vm_area_struct *vma;
-
- down_read(&current->mm->mmap_sem);
- vma = find_vma_intersection(current->mm, vaddr, vaddr + 1);
- pinned = min_t(long, npage, vma_pages(vma));
- up_read(&current->mm->mmap_sem);
- return pinned;
- }
-
pinned++;
+ rsvd = is_invalid_reserved_pfn(*pfn_base);
/*
* Reserved pages aren't counted against the user, externally pinned
* pages are already counted against the user.
*/
- if (!vfio_find_vpfn(dma, iova)) {
+ if (!rsvd && !vfio_find_vpfn(dma, iova)) {
if (!lock_cap && current->mm->locked_vm + 1 > limit) {
put_pfn(*pfn_base, dma->prot);
pr_warn("%s: RLIMIT_MEMLOCK (%ld) exceeded\n", __func__,
@@ -450,12 +442,13 @@ static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr,
if (ret)
break;
- if (pfn != *pfn_base + pinned) {
+ if (pfn != *pfn_base + pinned ||
+ rsvd != is_invalid_reserved_pfn(pfn)) {
put_pfn(pfn, dma->prot);
break;
}
- if (!vfio_find_vpfn(dma, iova)) {
+ if (!rsvd && !vfio_find_vpfn(dma, iova)) {
if (!lock_cap &&
current->mm->locked_vm + lock_acct + 1 > limit) {
put_pfn(pfn, dma->prot);
@@ -473,8 +466,10 @@ out:
unpin_out:
if (ret) {
- for (pfn = *pfn_base ; pinned ; pfn++, pinned--)
- put_pfn(pfn, dma->prot);
+ if (!rsvd) {
+ for (pfn = *pfn_base ; pinned ; pfn++, pinned--)
+ put_pfn(pfn, dma->prot);
+ }
return ret;
}
diff --git a/drivers/vfio/virqfd.c b/drivers/vfio/virqfd.c
index 085700f1be10..2a1be859ee71 100644
--- a/drivers/vfio/virqfd.c
+++ b/drivers/vfio/virqfd.c
@@ -166,7 +166,7 @@ int vfio_virqfd_enable(void *opaque,
init_waitqueue_func_entry(&virqfd->wait, virqfd_wakeup);
init_poll_funcptr(&virqfd->pt, virqfd_ptable_queue_proc);
- events = irqfd.file->f_op->poll(irqfd.file, &virqfd->pt);
+ events = vfs_poll(irqfd.file, &virqfd->pt);
/*
* Check if there was an event already pending on the eventfd
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 986058a57917..eeaf6739215f 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -105,7 +105,9 @@ struct vhost_net_virtqueue {
/* vhost zerocopy support fields below: */
/* last used idx for outstanding DMA zerocopy buffers */
int upend_idx;
- /* first used idx for DMA done zerocopy buffers */
+ /* For TX, first used idx for DMA done zerocopy buffers
+ * For RX, number of batched heads
+ */
int done_idx;
/* an array of userspace buffers info */
struct ubuf_info *ubuf_info;
@@ -626,6 +628,18 @@ static int sk_has_rx_data(struct sock *sk)
return skb_queue_empty(&sk->sk_receive_queue);
}
+static void vhost_rx_signal_used(struct vhost_net_virtqueue *nvq)
+{
+ struct vhost_virtqueue *vq = &nvq->vq;
+ struct vhost_dev *dev = vq->dev;
+
+ if (!nvq->done_idx)
+ return;
+
+ vhost_add_used_and_signal_n(dev, vq, vq->heads, nvq->done_idx);
+ nvq->done_idx = 0;
+}
+
static int vhost_net_rx_peek_head_len(struct vhost_net *net, struct sock *sk)
{
struct vhost_net_virtqueue *rvq = &net->vqs[VHOST_NET_VQ_RX];
@@ -635,6 +649,8 @@ static int vhost_net_rx_peek_head_len(struct vhost_net *net, struct sock *sk)
int len = peek_head_len(rvq, sk);
if (!len && vq->busyloop_timeout) {
+ /* Flush batched heads first */
+ vhost_rx_signal_used(rvq);
/* Both tx vq and rx socket were polled here */
mutex_lock_nested(&vq->mutex, 1);
vhost_disable_notify(&net->dev, vq);
@@ -762,7 +778,7 @@ static void handle_rx(struct vhost_net *net)
};
size_t total_len = 0;
int err, mergeable;
- s16 headcount, nheads = 0;
+ s16 headcount;
size_t vhost_hlen, sock_hlen;
size_t vhost_len, sock_len;
struct socket *sock;
@@ -790,8 +806,8 @@ static void handle_rx(struct vhost_net *net)
while ((sock_len = vhost_net_rx_peek_head_len(net, sock->sk))) {
sock_len += sock_hlen;
vhost_len = sock_len + vhost_hlen;
- headcount = get_rx_bufs(vq, vq->heads + nheads, vhost_len,
- &in, vq_log, &log,
+ headcount = get_rx_bufs(vq, vq->heads + nvq->done_idx,
+ vhost_len, &in, vq_log, &log,
likely(mergeable) ? UIO_MAXIOV : 1);
/* On error, stop handling until the next kick. */
if (unlikely(headcount < 0))
@@ -862,12 +878,9 @@ static void handle_rx(struct vhost_net *net)
vhost_discard_vq_desc(vq, headcount);
goto out;
}
- nheads += headcount;
- if (nheads > VHOST_RX_BATCH) {
- vhost_add_used_and_signal_n(&net->dev, vq, vq->heads,
- nheads);
- nheads = 0;
- }
+ nvq->done_idx += headcount;
+ if (nvq->done_idx > VHOST_RX_BATCH)
+ vhost_rx_signal_used(nvq);
if (unlikely(vq_log))
vhost_log_write(vq, vq_log, log, vhost_len);
total_len += vhost_len;
@@ -878,9 +891,7 @@ static void handle_rx(struct vhost_net *net)
}
vhost_net_enable_vq(net, vq);
out:
- if (nheads)
- vhost_add_used_and_signal_n(&net->dev, vq, vq->heads,
- nheads);
+ vhost_rx_signal_used(nvq);
mutex_unlock(&vq->mutex);
}
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index f3bd8e941224..895eaa25807c 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -208,7 +208,7 @@ int vhost_poll_start(struct vhost_poll *poll, struct file *file)
if (poll->wqh)
return 0;
- mask = file->f_op->poll(file, &poll->table);
+ mask = vfs_poll(file, &poll->table);
if (mask)
vhost_poll_wakeup(&poll->wait, 0, 0, poll_to_key(mask));
if (mask & EPOLLERR) {
@@ -981,6 +981,7 @@ static int vhost_process_iotlb_msg(struct vhost_dev *dev,
{
int ret = 0;
+ mutex_lock(&dev->mutex);
vhost_dev_lock_vqs(dev);
switch (msg->type) {
case VHOST_IOTLB_UPDATE:
@@ -1016,6 +1017,8 @@ static int vhost_process_iotlb_msg(struct vhost_dev *dev,
}
vhost_dev_unlock_vqs(dev);
+ mutex_unlock(&dev->mutex);
+
return ret;
}
ssize_t vhost_chr_write_iter(struct vhost_dev *dev,
diff --git a/drivers/video/fbdev/core/fbmem.c b/drivers/video/fbdev/core/fbmem.c
index f741ba8df01b..924d0730ffe2 100644
--- a/drivers/video/fbdev/core/fbmem.c
+++ b/drivers/video/fbdev/core/fbmem.c
@@ -713,19 +713,6 @@ static const struct seq_operations proc_fb_seq_ops = {
.show = fb_seq_show,
};
-static int proc_fb_open(struct inode *inode, struct file *file)
-{
- return seq_open(file, &proc_fb_seq_ops);
-}
-
-static const struct file_operations fb_proc_fops = {
- .owner = THIS_MODULE,
- .open = proc_fb_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
/*
* We hold a reference to the fb_info in file->private_data,
* but if the current registered fb has changed, we don't
@@ -1877,7 +1864,7 @@ fbmem_init(void)
{
int ret;
- if (!proc_create("fb", 0, NULL, &fb_proc_fops))
+ if (!proc_create_seq("fb", 0, NULL, &proc_fb_seq_ops))
return -ENOMEM;
ret = register_chrdev(FB_MAJOR, "fb", &fb_fops);
diff --git a/drivers/video/fbdev/via/viafbdev.c b/drivers/video/fbdev/via/viafbdev.c
index badee04ef496..9b45125988fb 100644
--- a/drivers/video/fbdev/via/viafbdev.c
+++ b/drivers/video/fbdev/via/viafbdev.c
@@ -1475,19 +1475,6 @@ static int viafb_sup_odev_proc_show(struct seq_file *m, void *v)
return 0;
}
-static int viafb_sup_odev_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, viafb_sup_odev_proc_show, NULL);
-}
-
-static const struct file_operations viafb_sup_odev_proc_fops = {
- .owner = THIS_MODULE,
- .open = viafb_sup_odev_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
static ssize_t odev_update(const char __user *buffer, size_t count, u32 *odev)
{
char buf[64], *ptr = buf;
@@ -1616,8 +1603,8 @@ static void viafb_init_proc(struct viafb_shared *shared)
&viafb_vt1636_proc_fops);
#endif /* CONFIG_FB_VIA_DIRECT_PROCFS */
- proc_create("supported_output_devices", 0, viafb_entry,
- &viafb_sup_odev_proc_fops);
+ proc_create_single("supported_output_devices", 0, viafb_entry,
+ viafb_sup_odev_proc_show);
iga1_entry = proc_mkdir("iga1", viafb_entry);
shared->iga1_proc_entry = iga1_entry;
proc_create("output_devices", 0, iga1_entry,
diff --git a/drivers/w1/w1_io.c b/drivers/w1/w1_io.c
index 075d120e7b88..0364d3329c52 100644
--- a/drivers/w1/w1_io.c
+++ b/drivers/w1/w1_io.c
@@ -194,6 +194,7 @@ static u8 w1_read_bit(struct w1_master *dev)
* bit 0 = id_bit
* bit 1 = comp_bit
* bit 2 = dir_taken
+ *
* If both bits 0 & 1 are set, the search should be restarted.
*
* Return: bit fields - see above
diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index e1c60899fdbc..a6f9ba85dc4b 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -351,7 +351,7 @@ xen_swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr,
* physical address */
phys = xen_bus_to_phys(dev_addr);
- if (((dev_addr + size - 1 > dma_mask)) ||
+ if (((dev_addr + size - 1 <= dma_mask)) ||
range_straddles_page_boundary(phys, size))
xen_destroy_contiguous_region(phys, order);
diff --git a/drivers/zorro/proc.c b/drivers/zorro/proc.c
index df05a26ab8d8..2e4ca4dc0960 100644
--- a/drivers/zorro/proc.c
+++ b/drivers/zorro/proc.c
@@ -96,19 +96,6 @@ static const struct seq_operations zorro_devices_seq_ops = {
.show = zorro_seq_show,
};
-static int zorro_devices_proc_open(struct inode *inode, struct file *file)
-{
- return seq_open(file, &zorro_devices_seq_ops);
-}
-
-static const struct file_operations zorro_devices_proc_fops = {
- .owner = THIS_MODULE,
- .open = zorro_devices_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
static struct proc_dir_entry *proc_bus_zorro_dir;
static int __init zorro_proc_attach_device(unsigned int slot)
@@ -132,8 +119,8 @@ static int __init zorro_proc_init(void)
if (MACH_IS_AMIGA && AMIGAHW_PRESENT(ZORRO)) {
proc_bus_zorro_dir = proc_mkdir("bus/zorro", NULL);
- proc_create("devices", 0, proc_bus_zorro_dir,
- &zorro_devices_proc_fops);
+ proc_create_seq("devices", 0, proc_bus_zorro_dir,
+ &zorro_devices_seq_ops);
for (slot = 0; slot < zorro_num_autocon; slot++)
zorro_proc_attach_device(slot);
}
diff --git a/drivers/zorro/zorro.c b/drivers/zorro/zorro.c
index 47728477297e..67fa900572a9 100644
--- a/drivers/zorro/zorro.c
+++ b/drivers/zorro/zorro.c
@@ -101,6 +101,7 @@ static void __init mark_region(unsigned long start, unsigned long end,
end = end > Z2RAM_END ? Z2RAM_SIZE : end-Z2RAM_START;
while (start < end) {
u32 chunk = start>>Z2RAM_CHUNKSHIFT;
+
if (flag)
set_bit(chunk, zorro_unused_z2ram);
else
@@ -117,6 +118,7 @@ static struct resource __init *zorro_find_parent_resource(
for (i = 0; i < bridge->num_resources; i++) {
struct resource *r = &bridge->resource[i];
+
if (zorro_resource_start(z) >= r->start &&
zorro_resource_end(z) <= r->end)
return r;
@@ -168,6 +170,7 @@ static int __init amiga_zorro_probe(struct platform_device *pdev)
if (z->id == ZORRO_PROD_GVP_EPC_BASE) {
/* GVP quirk */
unsigned long magic = zi->boardaddr + 0x8000;
+
z->id |= *(u16 *)ZTWO_VADDR(magic) & GVP_PRODMASK;
}
z->slotaddr = zi->slotaddr;
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 9ee534159cc6..42e102e2e74a 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -823,28 +823,21 @@ struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
if (IS_ERR(dfid))
return ERR_CAST(dfid);
- name = dentry->d_name.name;
- fid = p9_client_walk(dfid, 1, &name, 1);
- if (IS_ERR(fid)) {
- if (fid == ERR_PTR(-ENOENT)) {
- d_add(dentry, NULL);
- return NULL;
- }
- return ERR_CAST(fid);
- }
/*
* Make sure we don't use a wrong inode due to parallel
* unlink. For cached mode create calls request for new
* inode. But with cache disabled, lookup should do this.
*/
- if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE)
+ name = dentry->d_name.name;
+ fid = p9_client_walk(dfid, 1, &name, 1);
+ if (fid == ERR_PTR(-ENOENT))
+ inode = NULL;
+ else if (IS_ERR(fid))
+ inode = ERR_CAST(fid);
+ else if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE)
inode = v9fs_get_inode_from_fid(v9ses, fid, dir->i_sb);
else
inode = v9fs_get_new_inode_from_fid(v9ses, fid, dir->i_sb);
- if (IS_ERR(inode)) {
- p9_client_clunk(fid);
- return ERR_CAST(inode);
- }
/*
* If we had a rename on the server and a parallel lookup
* for the new name, then make sure we instantiate with
@@ -853,12 +846,14 @@ struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
* k/b.
*/
res = d_splice_alias(inode, dentry);
- if (!res)
- v9fs_fid_add(dentry, fid);
- else if (!IS_ERR(res))
- v9fs_fid_add(res, fid);
- else
- p9_client_clunk(fid);
+ if (!IS_ERR(fid)) {
+ if (!res)
+ v9fs_fid_add(dentry, fid);
+ else if (!IS_ERR(res))
+ v9fs_fid_add(res, fid);
+ else
+ p9_client_clunk(fid);
+ }
return res;
}
diff --git a/fs/Kconfig b/fs/Kconfig
index bc821a86d965..ac4ac908f001 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -196,7 +196,7 @@ config HUGETLBFS
help
hugetlbfs is a filesystem backing for HugeTLB pages, based on
ramfs. For architectures that support it, say Y here and read
- <file:Documentation/vm/hugetlbpage.txt> for details.
+ <file:Documentation/admin-guide/mm/hugetlbpage.rst> for details.
If unsure, say N.
diff --git a/fs/adfs/dir.c b/fs/adfs/dir.c
index 29444c83da48..e18eff854e1a 100644
--- a/fs/adfs/dir.c
+++ b/fs/adfs/dir.c
@@ -146,20 +146,6 @@ adfs_dir_lookup_byname(struct inode *inode, const struct qstr *name, struct obje
obj->parent_id = inode->i_ino;
- /*
- * '.' is handled by reserved_lookup() in fs/namei.c
- */
- if (name->len == 2 && name->name[0] == '.' && name->name[1] == '.') {
- /*
- * Currently unable to fill in the rest of 'obj',
- * but this is better than nothing. We need to
- * ascend one level to find it's parent.
- */
- obj->name_len = 0;
- obj->file_id = obj->parent_id;
- goto free_out;
- }
-
read_lock(&adfs_dir_lock);
ret = ops->setpos(&dir, 0);
@@ -266,17 +252,17 @@ adfs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
error = adfs_dir_lookup_byname(dir, &dentry->d_name, &obj);
if (error == 0) {
- error = -EACCES;
/*
* This only returns NULL if get_empty_inode
* fails.
*/
inode = adfs_iget(dir->i_sb, &obj);
- if (inode)
- error = 0;
+ if (!inode)
+ inode = ERR_PTR(-EACCES);
+ } else if (error != -ENOENT) {
+ inode = ERR_PTR(error);
}
- d_add(dentry, inode);
- return ERR_PTR(error);
+ return d_splice_alias(inode, dentry);
}
/*
diff --git a/fs/affs/namei.c b/fs/affs/namei.c
index d8aa0ae3d037..41c5749f4db7 100644
--- a/fs/affs/namei.c
+++ b/fs/affs/namei.c
@@ -201,14 +201,16 @@ affs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
struct super_block *sb = dir->i_sb;
struct buffer_head *bh;
struct inode *inode = NULL;
+ struct dentry *res;
pr_debug("%s(\"%pd\")\n", __func__, dentry);
affs_lock_dir(dir);
bh = affs_find_entry(dir, dentry);
- affs_unlock_dir(dir);
- if (IS_ERR(bh))
+ if (IS_ERR(bh)) {
+ affs_unlock_dir(dir);
return ERR_CAST(bh);
+ }
if (bh) {
u32 ino = bh->b_blocknr;
@@ -222,11 +224,12 @@ affs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
}
affs_brelse(bh);
inode = affs_iget(sb, ino);
- if (IS_ERR(inode))
- return ERR_CAST(inode);
}
- d_add(dentry, inode);
- return NULL;
+ res = d_splice_alias(inode, dentry);
+ if (!IS_ERR_OR_NULL(res))
+ res->d_fsdata = dentry->d_fsdata;
+ affs_unlock_dir(dir);
+ return res;
}
int
diff --git a/fs/affs/super.c b/fs/affs/super.c
index e602619aed9d..d1ad11a8a4a5 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -241,6 +241,7 @@ parse_options(char *options, kuid_t *uid, kgid_t *gid, int *mode, int *reserved,
affs_set_opt(*mount_opts, SF_NO_TRUNCATE);
break;
case Opt_prefix:
+ kfree(*prefix);
*prefix = match_strdup(&args[0]);
if (!*prefix)
return 0;
diff --git a/fs/afs/addr_list.c b/fs/afs/addr_list.c
index 3bedfed608a2..7587fb665ff1 100644
--- a/fs/afs/addr_list.c
+++ b/fs/afs/addr_list.c
@@ -121,7 +121,7 @@ struct afs_addr_list *afs_parse_text_addrs(const char *text, size_t len,
p = text;
do {
struct sockaddr_rxrpc *srx = &alist->addrs[alist->nr_addrs];
- char tdelim = delim;
+ const char *q, *stop;
if (*p == delim) {
p++;
@@ -130,28 +130,33 @@ struct afs_addr_list *afs_parse_text_addrs(const char *text, size_t len,
if (*p == '[') {
p++;
- tdelim = ']';
+ q = memchr(p, ']', end - p);
+ } else {
+ for (q = p; q < end; q++)
+ if (*q == '+' || *q == delim)
+ break;
}
- if (in4_pton(p, end - p,
+ if (in4_pton(p, q - p,
(u8 *)&srx->transport.sin6.sin6_addr.s6_addr32[3],
- tdelim, &p)) {
+ -1, &stop)) {
srx->transport.sin6.sin6_addr.s6_addr32[0] = 0;
srx->transport.sin6.sin6_addr.s6_addr32[1] = 0;
srx->transport.sin6.sin6_addr.s6_addr32[2] = htonl(0xffff);
- } else if (in6_pton(p, end - p,
+ } else if (in6_pton(p, q - p,
srx->transport.sin6.sin6_addr.s6_addr,
- tdelim, &p)) {
+ -1, &stop)) {
/* Nothing to do */
} else {
goto bad_address;
}
- if (tdelim == ']') {
- if (p == end || *p != ']')
- goto bad_address;
+ if (stop != q)
+ goto bad_address;
+
+ p = q;
+ if (q < end && *q == ']')
p++;
- }
if (p < end) {
if (*p == '+') {
diff --git a/fs/afs/callback.c b/fs/afs/callback.c
index abd9a84f4e88..571437dcb252 100644
--- a/fs/afs/callback.c
+++ b/fs/afs/callback.c
@@ -23,36 +23,55 @@
/*
* Set up an interest-in-callbacks record for a volume on a server and
* register it with the server.
- * - Called with volume->server_sem held.
+ * - Called with vnode->io_lock held.
*/
int afs_register_server_cb_interest(struct afs_vnode *vnode,
- struct afs_server_entry *entry)
+ struct afs_server_list *slist,
+ unsigned int index)
{
- struct afs_cb_interest *cbi = entry->cb_interest, *vcbi, *new, *x;
+ struct afs_server_entry *entry = &slist->servers[index];
+ struct afs_cb_interest *cbi, *vcbi, *new, *old;
struct afs_server *server = entry->server;
again:
+ if (vnode->cb_interest &&
+ likely(vnode->cb_interest == entry->cb_interest))
+ return 0;
+
+ read_lock(&slist->lock);
+ cbi = afs_get_cb_interest(entry->cb_interest);
+ read_unlock(&slist->lock);
+
vcbi = vnode->cb_interest;
if (vcbi) {
- if (vcbi == cbi)
+ if (vcbi == cbi) {
+ afs_put_cb_interest(afs_v2net(vnode), cbi);
return 0;
+ }
+ /* Use a new interest in the server list for the same server
+ * rather than an old one that's still attached to a vnode.
+ */
if (cbi && vcbi->server == cbi->server) {
write_seqlock(&vnode->cb_lock);
- vnode->cb_interest = afs_get_cb_interest(cbi);
+ old = vnode->cb_interest;
+ vnode->cb_interest = cbi;
write_sequnlock(&vnode->cb_lock);
- afs_put_cb_interest(afs_v2net(vnode), cbi);
+ afs_put_cb_interest(afs_v2net(vnode), old);
return 0;
}
+ /* Re-use the one attached to the vnode. */
if (!cbi && vcbi->server == server) {
- afs_get_cb_interest(vcbi);
- x = cmpxchg(&entry->cb_interest, cbi, vcbi);
- if (x != cbi) {
- cbi = x;
- afs_put_cb_interest(afs_v2net(vnode), vcbi);
+ write_lock(&slist->lock);
+ if (entry->cb_interest) {
+ write_unlock(&slist->lock);
+ afs_put_cb_interest(afs_v2net(vnode), cbi);
goto again;
}
+
+ entry->cb_interest = cbi;
+ write_unlock(&slist->lock);
return 0;
}
}
@@ -72,13 +91,16 @@ again:
list_add_tail(&new->cb_link, &server->cb_interests);
write_unlock(&server->cb_break_lock);
- x = cmpxchg(&entry->cb_interest, cbi, new);
- if (x == cbi) {
+ write_lock(&slist->lock);
+ if (!entry->cb_interest) {
+ entry->cb_interest = afs_get_cb_interest(new);
cbi = new;
+ new = NULL;
} else {
- cbi = x;
- afs_put_cb_interest(afs_v2net(vnode), new);
+ cbi = afs_get_cb_interest(entry->cb_interest);
}
+ write_unlock(&slist->lock);
+ afs_put_cb_interest(afs_v2net(vnode), new);
}
ASSERT(cbi);
@@ -88,11 +110,14 @@ again:
*/
write_seqlock(&vnode->cb_lock);
- vnode->cb_interest = afs_get_cb_interest(cbi);
+ old = vnode->cb_interest;
+ vnode->cb_interest = cbi;
vnode->cb_s_break = cbi->server->cb_s_break;
+ vnode->cb_v_break = vnode->volume->cb_v_break;
clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags);
write_sequnlock(&vnode->cb_lock);
+ afs_put_cb_interest(afs_v2net(vnode), old);
return 0;
}
@@ -171,13 +196,24 @@ static void afs_break_one_callback(struct afs_server *server,
if (cbi->vid != fid->vid)
continue;
- data.volume = NULL;
- data.fid = *fid;
- inode = ilookup5_nowait(cbi->sb, fid->vnode, afs_iget5_test, &data);
- if (inode) {
- vnode = AFS_FS_I(inode);
- afs_break_callback(vnode);
- iput(inode);
+ if (fid->vnode == 0 && fid->unique == 0) {
+ /* The callback break applies to an entire volume. */
+ struct afs_super_info *as = AFS_FS_S(cbi->sb);
+ struct afs_volume *volume = as->volume;
+
+ write_lock(&volume->cb_break_lock);
+ volume->cb_v_break++;
+ write_unlock(&volume->cb_break_lock);
+ } else {
+ data.volume = NULL;
+ data.fid = *fid;
+ inode = ilookup5_nowait(cbi->sb, fid->vnode,
+ afs_iget5_test, &data);
+ if (inode) {
+ vnode = AFS_FS_I(inode);
+ afs_break_callback(vnode);
+ iput(inode);
+ }
}
}
@@ -195,6 +231,8 @@ void afs_break_callbacks(struct afs_server *server, size_t count,
ASSERT(server != NULL);
ASSERTCMP(count, <=, AFSCBMAX);
+ /* TODO: Sort the callback break list by volume ID */
+
for (; count > 0; callbacks++, count--) {
_debug("- Fid { vl=%08x n=%u u=%u } CB { v=%u x=%u t=%u }",
callbacks->fid.vid,
diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c
index 357de908df3a..c332c95a6940 100644
--- a/fs/afs/cmservice.c
+++ b/fs/afs/cmservice.c
@@ -133,21 +133,10 @@ bool afs_cm_incoming_call(struct afs_call *call)
}
/*
- * clean up a cache manager call
+ * Clean up a cache manager call.
*/
static void afs_cm_destructor(struct afs_call *call)
{
- _enter("");
-
- /* Break the callbacks here so that we do it after the final ACK is
- * received. The step number here must match the final number in
- * afs_deliver_cb_callback().
- */
- if (call->unmarshall == 5) {
- ASSERT(call->cm_server && call->count && call->request);
- afs_break_callbacks(call->cm_server, call->count, call->request);
- }
-
kfree(call->buffer);
call->buffer = NULL;
}
@@ -161,14 +150,14 @@ static void SRXAFSCB_CallBack(struct work_struct *work)
_enter("");
- /* be sure to send the reply *before* attempting to spam the AFS server
- * with FSFetchStatus requests on the vnodes with broken callbacks lest
- * the AFS server get into a vicious cycle of trying to break further
- * callbacks because it hadn't received completion of the CBCallBack op
- * yet */
- afs_send_empty_reply(call);
+ /* We need to break the callbacks before sending the reply as the
+ * server holds up change visibility till it receives our reply so as
+ * to maintain cache coherency.
+ */
+ if (call->cm_server)
+ afs_break_callbacks(call->cm_server, call->count, call->request);
- afs_break_callbacks(call->cm_server, call->count, call->request);
+ afs_send_empty_reply(call);
afs_put_call(call);
_leave("");
}
@@ -180,7 +169,6 @@ static int afs_deliver_cb_callback(struct afs_call *call)
{
struct afs_callback_break *cb;
struct sockaddr_rxrpc srx;
- struct afs_server *server;
__be32 *bp;
int ret, loop;
@@ -267,15 +255,6 @@ static int afs_deliver_cb_callback(struct afs_call *call)
call->offset = 0;
call->unmarshall++;
-
- /* Record that the message was unmarshalled successfully so
- * that the call destructor can know do the callback breaking
- * work, even if the final ACK isn't received.
- *
- * If the step number changes, then afs_cm_destructor() must be
- * updated also.
- */
- call->unmarshall++;
case 5:
break;
}
@@ -286,10 +265,9 @@ static int afs_deliver_cb_callback(struct afs_call *call)
/* we'll need the file server record as that tells us which set of
* vnodes to operate upon */
rxrpc_kernel_get_peer(call->net->socket, call->rxcall, &srx);
- server = afs_find_server(call->net, &srx);
- if (!server)
- return -ENOTCONN;
- call->cm_server = server;
+ call->cm_server = afs_find_server(call->net, &srx);
+ if (!call->cm_server)
+ trace_afs_cm_no_server(call, &srx);
return afs_queue_call_work(call);
}
@@ -303,7 +281,8 @@ static void SRXAFSCB_InitCallBackState(struct work_struct *work)
_enter("{%p}", call->cm_server);
- afs_init_callback_state(call->cm_server);
+ if (call->cm_server)
+ afs_init_callback_state(call->cm_server);
afs_send_empty_reply(call);
afs_put_call(call);
_leave("");
@@ -315,7 +294,6 @@ static void SRXAFSCB_InitCallBackState(struct work_struct *work)
static int afs_deliver_cb_init_call_back_state(struct afs_call *call)
{
struct sockaddr_rxrpc srx;
- struct afs_server *server;
int ret;
_enter("");
@@ -328,10 +306,9 @@ static int afs_deliver_cb_init_call_back_state(struct afs_call *call)
/* we'll need the file server record as that tells us which set of
* vnodes to operate upon */
- server = afs_find_server(call->net, &srx);
- if (!server)
- return -ENOTCONN;
- call->cm_server = server;
+ call->cm_server = afs_find_server(call->net, &srx);
+ if (!call->cm_server)
+ trace_afs_cm_no_server(call, &srx);
return afs_queue_call_work(call);
}
@@ -341,8 +318,6 @@ static int afs_deliver_cb_init_call_back_state(struct afs_call *call)
*/
static int afs_deliver_cb_init_call_back_state3(struct afs_call *call)
{
- struct sockaddr_rxrpc srx;
- struct afs_server *server;
struct afs_uuid *r;
unsigned loop;
__be32 *b;
@@ -398,11 +373,11 @@ static int afs_deliver_cb_init_call_back_state3(struct afs_call *call)
/* we'll need the file server record as that tells us which set of
* vnodes to operate upon */
- rxrpc_kernel_get_peer(call->net->socket, call->rxcall, &srx);
- server = afs_find_server(call->net, &srx);
- if (!server)
- return -ENOTCONN;
- call->cm_server = server;
+ rcu_read_lock();
+ call->cm_server = afs_find_server_by_uuid(call->net, call->request);
+ rcu_read_unlock();
+ if (!call->cm_server)
+ trace_afs_cm_no_server_u(call, call->request);
return afs_queue_call_work(call);
}
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index 5889f70d4d27..7d623008157f 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -180,6 +180,7 @@ static int afs_dir_open(struct inode *inode, struct file *file)
* get reclaimed during the iteration.
*/
static struct afs_read *afs_read_dir(struct afs_vnode *dvnode, struct key *key)
+ __acquires(&dvnode->validate_lock)
{
struct afs_read *req;
loff_t i_size;
@@ -261,18 +262,21 @@ retry:
/* If we're going to reload, we need to lock all the pages to prevent
* races.
*/
- if (!test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) {
- ret = -ERESTARTSYS;
- for (i = 0; i < req->nr_pages; i++)
- if (lock_page_killable(req->pages[i]) < 0)
- goto error_unlock;
+ ret = -ERESTARTSYS;
+ if (down_read_killable(&dvnode->validate_lock) < 0)
+ goto error;
- if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags))
- goto success;
+ if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags))
+ goto success;
+
+ up_read(&dvnode->validate_lock);
+ if (down_write_killable(&dvnode->validate_lock) < 0)
+ goto error;
+ if (!test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) {
ret = afs_fetch_data(dvnode, key, req);
if (ret < 0)
- goto error_unlock_all;
+ goto error_unlock;
task_io_account_read(PAGE_SIZE * req->nr_pages);
@@ -284,33 +288,26 @@ retry:
for (i = 0; i < req->nr_pages; i++)
if (!afs_dir_check_page(dvnode, req->pages[i],
req->actual_len))
- goto error_unlock_all;
+ goto error_unlock;
// TODO: Trim excess pages
set_bit(AFS_VNODE_DIR_VALID, &dvnode->flags);
}
+ downgrade_write(&dvnode->validate_lock);
success:
- i = req->nr_pages;
- while (i > 0)
- unlock_page(req->pages[--i]);
return req;
-error_unlock_all:
- i = req->nr_pages;
error_unlock:
- while (i > 0)
- unlock_page(req->pages[--i]);
+ up_write(&dvnode->validate_lock);
error:
afs_put_read(req);
_leave(" = %d", ret);
return ERR_PTR(ret);
content_has_grown:
- i = req->nr_pages;
- while (i > 0)
- unlock_page(req->pages[--i]);
+ up_write(&dvnode->validate_lock);
afs_put_read(req);
goto retry;
}
@@ -473,6 +470,7 @@ static int afs_dir_iterate(struct inode *dir, struct dir_context *ctx,
}
out:
+ up_read(&dvnode->validate_lock);
afs_put_read(req);
_leave(" = %d", ret);
return ret;
@@ -1143,7 +1141,7 @@ static int afs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
ret = -ERESTARTSYS;
if (afs_begin_vnode_operation(&fc, dvnode, key)) {
while (afs_select_fileserver(&fc)) {
- fc.cb_break = dvnode->cb_break + dvnode->cb_s_break;
+ fc.cb_break = afs_calc_vnode_cb_break(dvnode);
afs_fs_create(&fc, dentry->d_name.name, mode, data_version,
&newfid, &newstatus, &newcb);
}
@@ -1213,7 +1211,7 @@ static int afs_rmdir(struct inode *dir, struct dentry *dentry)
ret = -ERESTARTSYS;
if (afs_begin_vnode_operation(&fc, dvnode, key)) {
while (afs_select_fileserver(&fc)) {
- fc.cb_break = dvnode->cb_break + dvnode->cb_s_break;
+ fc.cb_break = afs_calc_vnode_cb_break(dvnode);
afs_fs_remove(&fc, dentry->d_name.name, true,
data_version);
}
@@ -1316,7 +1314,7 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry)
ret = -ERESTARTSYS;
if (afs_begin_vnode_operation(&fc, dvnode, key)) {
while (afs_select_fileserver(&fc)) {
- fc.cb_break = dvnode->cb_break + dvnode->cb_s_break;
+ fc.cb_break = afs_calc_vnode_cb_break(dvnode);
afs_fs_remove(&fc, dentry->d_name.name, false,
data_version);
}
@@ -1373,7 +1371,7 @@ static int afs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
ret = -ERESTARTSYS;
if (afs_begin_vnode_operation(&fc, dvnode, key)) {
while (afs_select_fileserver(&fc)) {
- fc.cb_break = dvnode->cb_break + dvnode->cb_s_break;
+ fc.cb_break = afs_calc_vnode_cb_break(dvnode);
afs_fs_create(&fc, dentry->d_name.name, mode, data_version,
&newfid, &newstatus, &newcb);
}
@@ -1443,8 +1441,8 @@ static int afs_link(struct dentry *from, struct inode *dir,
}
while (afs_select_fileserver(&fc)) {
- fc.cb_break = dvnode->cb_break + dvnode->cb_s_break;
- fc.cb_break_2 = vnode->cb_break + vnode->cb_s_break;
+ fc.cb_break = afs_calc_vnode_cb_break(dvnode);
+ fc.cb_break_2 = afs_calc_vnode_cb_break(vnode);
afs_fs_link(&fc, vnode, dentry->d_name.name, data_version);
}
@@ -1512,7 +1510,7 @@ static int afs_symlink(struct inode *dir, struct dentry *dentry,
ret = -ERESTARTSYS;
if (afs_begin_vnode_operation(&fc, dvnode, key)) {
while (afs_select_fileserver(&fc)) {
- fc.cb_break = dvnode->cb_break + dvnode->cb_s_break;
+ fc.cb_break = afs_calc_vnode_cb_break(dvnode);
afs_fs_symlink(&fc, dentry->d_name.name,
content, data_version,
&newfid, &newstatus);
@@ -1588,8 +1586,8 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry,
}
}
while (afs_select_fileserver(&fc)) {
- fc.cb_break = orig_dvnode->cb_break + orig_dvnode->cb_s_break;
- fc.cb_break_2 = new_dvnode->cb_break + new_dvnode->cb_s_break;
+ fc.cb_break = afs_calc_vnode_cb_break(orig_dvnode);
+ fc.cb_break_2 = afs_calc_vnode_cb_break(new_dvnode);
afs_fs_rename(&fc, old_dentry->d_name.name,
new_dvnode, new_dentry->d_name.name,
orig_data_version, new_data_version);
diff --git a/fs/afs/file.c b/fs/afs/file.c
index c24c08016dd9..7d4f26198573 100644
--- a/fs/afs/file.c
+++ b/fs/afs/file.c
@@ -238,7 +238,7 @@ int afs_fetch_data(struct afs_vnode *vnode, struct key *key, struct afs_read *de
ret = -ERESTARTSYS;
if (afs_begin_vnode_operation(&fc, vnode, key)) {
while (afs_select_fileserver(&fc)) {
- fc.cb_break = vnode->cb_break + vnode->cb_s_break;
+ fc.cb_break = afs_calc_vnode_cb_break(vnode);
afs_fs_fetch_data(&fc, desc);
}
diff --git a/fs/afs/flock.c b/fs/afs/flock.c
index 7a0e017070ec..dc62d15a964b 100644
--- a/fs/afs/flock.c
+++ b/fs/afs/flock.c
@@ -86,7 +86,7 @@ static int afs_set_lock(struct afs_vnode *vnode, struct key *key,
ret = -ERESTARTSYS;
if (afs_begin_vnode_operation(&fc, vnode, key)) {
while (afs_select_fileserver(&fc)) {
- fc.cb_break = vnode->cb_break + vnode->cb_s_break;
+ fc.cb_break = afs_calc_vnode_cb_break(vnode);
afs_fs_set_lock(&fc, type);
}
@@ -117,7 +117,7 @@ static int afs_extend_lock(struct afs_vnode *vnode, struct key *key)
ret = -ERESTARTSYS;
if (afs_begin_vnode_operation(&fc, vnode, key)) {
while (afs_select_current_fileserver(&fc)) {
- fc.cb_break = vnode->cb_break + vnode->cb_s_break;
+ fc.cb_break = afs_calc_vnode_cb_break(vnode);
afs_fs_extend_lock(&fc);
}
@@ -148,7 +148,7 @@ static int afs_release_lock(struct afs_vnode *vnode, struct key *key)
ret = -ERESTARTSYS;
if (afs_begin_vnode_operation(&fc, vnode, key)) {
while (afs_select_current_fileserver(&fc)) {
- fc.cb_break = vnode->cb_break + vnode->cb_s_break;
+ fc.cb_break = afs_calc_vnode_cb_break(vnode);
afs_fs_release_lock(&fc);
}
diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c
index efacdb7c1dee..b273e1d60478 100644
--- a/fs/afs/fsclient.c
+++ b/fs/afs/fsclient.c
@@ -134,6 +134,7 @@ static int xdr_decode_AFSFetchStatus(struct afs_call *call,
struct afs_read *read_req)
{
const struct afs_xdr_AFSFetchStatus *xdr = (const void *)*_bp;
+ bool inline_error = (call->operation_ID == afs_FS_InlineBulkStatus);
u64 data_version, size;
u32 type, abort_code;
u8 flags = 0;
@@ -142,13 +143,32 @@ static int xdr_decode_AFSFetchStatus(struct afs_call *call,
if (vnode)
write_seqlock(&vnode->cb_lock);
+ abort_code = ntohl(xdr->abort_code);
+
if (xdr->if_version != htonl(AFS_FSTATUS_VERSION)) {
+ if (xdr->if_version == htonl(0) &&
+ abort_code != 0 &&
+ inline_error) {
+ /* The OpenAFS fileserver has a bug in FS.InlineBulkStatus
+ * whereby it doesn't set the interface version in the error
+ * case.
+ */
+ status->abort_code = abort_code;
+ ret = 0;
+ goto out;
+ }
+
pr_warn("Unknown AFSFetchStatus version %u\n", ntohl(xdr->if_version));
goto bad;
}
+ if (abort_code != 0 && inline_error) {
+ status->abort_code = abort_code;
+ ret = 0;
+ goto out;
+ }
+
type = ntohl(xdr->type);
- abort_code = ntohl(xdr->abort_code);
switch (type) {
case AFS_FTYPE_FILE:
case AFS_FTYPE_DIR:
@@ -165,13 +185,6 @@ static int xdr_decode_AFSFetchStatus(struct afs_call *call,
}
status->type = type;
break;
- case AFS_FTYPE_INVALID:
- if (abort_code != 0) {
- status->abort_code = abort_code;
- ret = 0;
- goto out;
- }
- /* Fall through */
default:
goto bad;
}
@@ -248,7 +261,7 @@ static void xdr_decode_AFSCallBack(struct afs_call *call,
write_seqlock(&vnode->cb_lock);
- if (call->cb_break == (vnode->cb_break + cbi->server->cb_s_break)) {
+ if (call->cb_break == afs_cb_break_sum(vnode, cbi)) {
vnode->cb_version = ntohl(*bp++);
cb_expiry = ntohl(*bp++);
vnode->cb_type = ntohl(*bp++);
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index 06194cfe9724..479b7fdda124 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -108,7 +108,7 @@ int afs_fetch_status(struct afs_vnode *vnode, struct key *key, bool new_inode)
ret = -ERESTARTSYS;
if (afs_begin_vnode_operation(&fc, vnode, key)) {
while (afs_select_fileserver(&fc)) {
- fc.cb_break = vnode->cb_break + vnode->cb_s_break;
+ fc.cb_break = afs_calc_vnode_cb_break(vnode);
afs_fs_fetch_file_status(&fc, NULL, new_inode);
}
@@ -393,15 +393,18 @@ int afs_validate(struct afs_vnode *vnode, struct key *key)
read_seqlock_excl(&vnode->cb_lock);
if (test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) {
- if (vnode->cb_s_break != vnode->cb_interest->server->cb_s_break) {
+ if (vnode->cb_s_break != vnode->cb_interest->server->cb_s_break ||
+ vnode->cb_v_break != vnode->volume->cb_v_break) {
vnode->cb_s_break = vnode->cb_interest->server->cb_s_break;
+ vnode->cb_v_break = vnode->volume->cb_v_break;
+ valid = false;
} else if (vnode->status.type == AFS_FTYPE_DIR &&
test_bit(AFS_VNODE_DIR_VALID, &vnode->flags) &&
vnode->cb_expires_at - 10 > now) {
- valid = true;
+ valid = true;
} else if (!test_bit(AFS_VNODE_ZAP_DATA, &vnode->flags) &&
vnode->cb_expires_at - 10 > now) {
- valid = true;
+ valid = true;
}
} else if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) {
valid = true;
@@ -415,7 +418,7 @@ int afs_validate(struct afs_vnode *vnode, struct key *key)
if (valid)
goto valid;
- mutex_lock(&vnode->validate_lock);
+ down_write(&vnode->validate_lock);
/* if the promise has expired, we need to check the server again to get
* a new promise - note that if the (parent) directory's metadata was
@@ -444,13 +447,13 @@ int afs_validate(struct afs_vnode *vnode, struct key *key)
* different */
if (test_and_clear_bit(AFS_VNODE_ZAP_DATA, &vnode->flags))
afs_zap_data(vnode);
- mutex_unlock(&vnode->validate_lock);
+ up_write(&vnode->validate_lock);
valid:
_leave(" = 0");
return 0;
error_unlock:
- mutex_unlock(&vnode->validate_lock);
+ up_write(&vnode->validate_lock);
_leave(" = %d", ret);
return ret;
}
@@ -574,7 +577,7 @@ int afs_setattr(struct dentry *dentry, struct iattr *attr)
ret = -ERESTARTSYS;
if (afs_begin_vnode_operation(&fc, vnode, key)) {
while (afs_select_fileserver(&fc)) {
- fc.cb_break = vnode->cb_break + vnode->cb_s_break;
+ fc.cb_break = afs_calc_vnode_cb_break(vnode);
afs_fs_setattr(&fc, attr);
}
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index f8086ec95e24..e3f8a46663db 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -396,6 +396,7 @@ struct afs_server {
#define AFS_SERVER_FL_PROBED 5 /* The fileserver has been probed */
#define AFS_SERVER_FL_PROBING 6 /* Fileserver is being probed */
#define AFS_SERVER_FL_NO_IBULK 7 /* Fileserver doesn't support FS.InlineBulkStatus */
+#define AFS_SERVER_FL_MAY_HAVE_CB 8 /* May have callbacks on this fileserver */
atomic_t usage;
u32 addr_version; /* Address list version */
@@ -433,6 +434,7 @@ struct afs_server_list {
unsigned short index; /* Server currently in use */
unsigned short vnovol_mask; /* Servers to be skipped due to VNOVOL */
unsigned int seq; /* Set to ->servers_seq when installed */
+ rwlock_t lock;
struct afs_server_entry servers[];
};
@@ -459,6 +461,9 @@ struct afs_volume {
rwlock_t servers_lock; /* Lock for ->servers */
unsigned int servers_seq; /* Incremented each time ->servers changes */
+ unsigned cb_v_break; /* Break-everything counter. */
+ rwlock_t cb_break_lock;
+
afs_voltype_t type; /* type of volume */
short error;
char type_force; /* force volume type (suppress R/O -> R/W) */
@@ -494,7 +499,7 @@ struct afs_vnode {
#endif
struct afs_permits __rcu *permit_cache; /* cache of permits so far obtained */
struct mutex io_lock; /* Lock for serialising I/O on this mutex */
- struct mutex validate_lock; /* lock for validating this vnode */
+ struct rw_semaphore validate_lock; /* lock for validating this vnode */
spinlock_t wb_lock; /* lock for wb_keys */
spinlock_t lock; /* waitqueue/flags lock */
unsigned long flags;
@@ -519,6 +524,7 @@ struct afs_vnode {
/* outstanding callback notification on this file */
struct afs_cb_interest *cb_interest; /* Server on which this resides */
unsigned int cb_s_break; /* Mass break counter on ->server */
+ unsigned int cb_v_break; /* Mass break counter on ->volume */
unsigned int cb_break; /* Break counter on vnode */
seqlock_t cb_lock; /* Lock for ->cb_interest, ->status, ->cb_*break */
@@ -648,16 +654,29 @@ extern void afs_init_callback_state(struct afs_server *);
extern void afs_break_callback(struct afs_vnode *);
extern void afs_break_callbacks(struct afs_server *, size_t, struct afs_callback_break*);
-extern int afs_register_server_cb_interest(struct afs_vnode *, struct afs_server_entry *);
+extern int afs_register_server_cb_interest(struct afs_vnode *,
+ struct afs_server_list *, unsigned int);
extern void afs_put_cb_interest(struct afs_net *, struct afs_cb_interest *);
extern void afs_clear_callback_interests(struct afs_net *, struct afs_server_list *);
static inline struct afs_cb_interest *afs_get_cb_interest(struct afs_cb_interest *cbi)
{
- refcount_inc(&cbi->usage);
+ if (cbi)
+ refcount_inc(&cbi->usage);
return cbi;
}
+static inline unsigned int afs_calc_vnode_cb_break(struct afs_vnode *vnode)
+{
+ return vnode->cb_break + vnode->cb_s_break + vnode->cb_v_break;
+}
+
+static inline unsigned int afs_cb_break_sum(struct afs_vnode *vnode,
+ struct afs_cb_interest *cbi)
+{
+ return vnode->cb_break + cbi->server->cb_s_break + vnode->volume->cb_v_break;
+}
+
/*
* cell.c
*/
diff --git a/fs/afs/proc.c b/fs/afs/proc.c
index 839a22280606..3aad32762989 100644
--- a/fs/afs/proc.c
+++ b/fs/afs/proc.c
@@ -62,7 +62,6 @@ static const struct file_operations afs_proc_rootcell_fops = {
.llseek = no_llseek,
};
-static int afs_proc_cell_volumes_open(struct inode *inode, struct file *file);
static void *afs_proc_cell_volumes_start(struct seq_file *p, loff_t *pos);
static void *afs_proc_cell_volumes_next(struct seq_file *p, void *v,
loff_t *pos);
@@ -76,15 +75,6 @@ static const struct seq_operations afs_proc_cell_volumes_ops = {
.show = afs_proc_cell_volumes_show,
};
-static const struct file_operations afs_proc_cell_volumes_fops = {
- .open = afs_proc_cell_volumes_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
-static int afs_proc_cell_vlservers_open(struct inode *inode,
- struct file *file);
static void *afs_proc_cell_vlservers_start(struct seq_file *p, loff_t *pos);
static void *afs_proc_cell_vlservers_next(struct seq_file *p, void *v,
loff_t *pos);
@@ -98,14 +88,6 @@ static const struct seq_operations afs_proc_cell_vlservers_ops = {
.show = afs_proc_cell_vlservers_show,
};
-static const struct file_operations afs_proc_cell_vlservers_fops = {
- .open = afs_proc_cell_vlservers_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
-static int afs_proc_servers_open(struct inode *inode, struct file *file);
static void *afs_proc_servers_start(struct seq_file *p, loff_t *pos);
static void *afs_proc_servers_next(struct seq_file *p, void *v,
loff_t *pos);
@@ -119,13 +101,6 @@ static const struct seq_operations afs_proc_servers_ops = {
.show = afs_proc_servers_show,
};
-static const struct file_operations afs_proc_servers_fops = {
- .open = afs_proc_servers_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
static int afs_proc_sysname_open(struct inode *inode, struct file *file);
static int afs_proc_sysname_release(struct inode *inode, struct file *file);
static void *afs_proc_sysname_start(struct seq_file *p, loff_t *pos);
@@ -152,7 +127,7 @@ static const struct file_operations afs_proc_sysname_fops = {
.write = afs_proc_sysname_write,
};
-static const struct file_operations afs_proc_stats_fops;
+static int afs_proc_stats_show(struct seq_file *m, void *v);
/*
* initialise the /proc/fs/afs/ directory
@@ -167,8 +142,8 @@ int afs_proc_init(struct afs_net *net)
if (!proc_create("cells", 0644, net->proc_afs, &afs_proc_cells_fops) ||
!proc_create("rootcell", 0644, net->proc_afs, &afs_proc_rootcell_fops) ||
- !proc_create("servers", 0644, net->proc_afs, &afs_proc_servers_fops) ||
- !proc_create("stats", 0644, net->proc_afs, &afs_proc_stats_fops) ||
+ !proc_create_seq("servers", 0644, net->proc_afs, &afs_proc_servers_ops) ||
+ !proc_create_single("stats", 0644, net->proc_afs, afs_proc_stats_show) ||
!proc_create("sysname", 0644, net->proc_afs, &afs_proc_sysname_fops))
goto error_tree;
@@ -196,16 +171,7 @@ void afs_proc_cleanup(struct afs_net *net)
*/
static int afs_proc_cells_open(struct inode *inode, struct file *file)
{
- struct seq_file *m;
- int ret;
-
- ret = seq_open(file, &afs_proc_cells_ops);
- if (ret < 0)
- return ret;
-
- m = file->private_data;
- m->private = PDE_DATA(inode);
- return 0;
+ return seq_open(file, &afs_proc_cells_ops);
}
/*
@@ -430,10 +396,11 @@ int afs_proc_cell_setup(struct afs_net *net, struct afs_cell *cell)
if (!dir)
goto error_dir;
- if (!proc_create_data("vlservers", 0, dir,
- &afs_proc_cell_vlservers_fops, cell) ||
- !proc_create_data("volumes", 0, dir,
- &afs_proc_cell_volumes_fops, cell))
+ if (!proc_create_seq_data("vlservers", 0, dir,
+ &afs_proc_cell_vlservers_ops, cell))
+ goto error_tree;
+ if (!proc_create_seq_data("volumes", 0, dir, &afs_proc_cell_volumes_ops,
+ cell))
goto error_tree;
_leave(" = 0");
@@ -459,36 +426,13 @@ void afs_proc_cell_remove(struct afs_net *net, struct afs_cell *cell)
}
/*
- * open "/proc/fs/afs/<cell>/volumes" which provides a summary of extant cells
- */
-static int afs_proc_cell_volumes_open(struct inode *inode, struct file *file)
-{
- struct afs_cell *cell;
- struct seq_file *m;
- int ret;
-
- cell = PDE_DATA(inode);
- if (!cell)
- return -ENOENT;
-
- ret = seq_open(file, &afs_proc_cell_volumes_ops);
- if (ret < 0)
- return ret;
-
- m = file->private_data;
- m->private = cell;
-
- return 0;
-}
-
-/*
* set up the iterator to start reading from the cells list and return the
* first item
*/
static void *afs_proc_cell_volumes_start(struct seq_file *m, loff_t *_pos)
__acquires(cell->proc_lock)
{
- struct afs_cell *cell = m->private;
+ struct afs_cell *cell = PDE_DATA(file_inode(m->file));
_enter("cell=%p pos=%Ld", cell, *_pos);
@@ -502,7 +446,7 @@ static void *afs_proc_cell_volumes_start(struct seq_file *m, loff_t *_pos)
static void *afs_proc_cell_volumes_next(struct seq_file *p, void *v,
loff_t *_pos)
{
- struct afs_cell *cell = p->private;
+ struct afs_cell *cell = PDE_DATA(file_inode(p->file));
_enter("cell=%p pos=%Ld", cell, *_pos);
return seq_list_next(v, &cell->proc_volumes, _pos);
@@ -514,7 +458,7 @@ static void *afs_proc_cell_volumes_next(struct seq_file *p, void *v,
static void afs_proc_cell_volumes_stop(struct seq_file *p, void *v)
__releases(cell->proc_lock)
{
- struct afs_cell *cell = p->private;
+ struct afs_cell *cell = PDE_DATA(file_inode(p->file));
read_unlock(&cell->proc_lock);
}
@@ -530,7 +474,7 @@ static const char afs_vol_types[3][3] = {
*/
static int afs_proc_cell_volumes_show(struct seq_file *m, void *v)
{
- struct afs_cell *cell = m->private;
+ struct afs_cell *cell = PDE_DATA(file_inode(m->file));
struct afs_volume *vol = list_entry(v, struct afs_volume, proc_link);
/* Display header on line 1 */
@@ -547,30 +491,6 @@ static int afs_proc_cell_volumes_show(struct seq_file *m, void *v)
}
/*
- * open "/proc/fs/afs/<cell>/vlservers" which provides a list of volume
- * location server
- */
-static int afs_proc_cell_vlservers_open(struct inode *inode, struct file *file)
-{
- struct afs_cell *cell;
- struct seq_file *m;
- int ret;
-
- cell = PDE_DATA(inode);
- if (!cell)
- return -ENOENT;
-
- ret = seq_open(file, &afs_proc_cell_vlservers_ops);
- if (ret<0)
- return ret;
-
- m = file->private_data;
- m->private = cell;
-
- return 0;
-}
-
-/*
* set up the iterator to start reading from the cells list and return the
* first item
*/
@@ -578,7 +498,7 @@ static void *afs_proc_cell_vlservers_start(struct seq_file *m, loff_t *_pos)
__acquires(rcu)
{
struct afs_addr_list *alist;
- struct afs_cell *cell = m->private;
+ struct afs_cell *cell = PDE_DATA(file_inode(m->file));
loff_t pos = *_pos;
rcu_read_lock();
@@ -603,7 +523,7 @@ static void *afs_proc_cell_vlservers_next(struct seq_file *p, void *v,
loff_t *_pos)
{
struct afs_addr_list *alist;
- struct afs_cell *cell = p->private;
+ struct afs_cell *cell = PDE_DATA(file_inode(p->file));
loff_t pos;
alist = rcu_dereference(cell->vl_addrs);
@@ -644,15 +564,6 @@ static int afs_proc_cell_vlservers_show(struct seq_file *m, void *v)
}
/*
- * open "/proc/fs/afs/servers" which provides a summary of active
- * servers
- */
-static int afs_proc_servers_open(struct inode *inode, struct file *file)
-{
- return seq_open(file, &afs_proc_servers_ops);
-}
-
-/*
* Set up the iterator to start reading from the server list and return the
* first item.
*/
@@ -931,18 +842,3 @@ static int afs_proc_stats_show(struct seq_file *m, void *v)
atomic_long_read(&net->n_store_bytes));
return 0;
}
-
-/*
- * Open "/proc/fs/afs/stats" to allow reading of the stat counters.
- */
-static int afs_proc_stats_open(struct inode *inode, struct file *file)
-{
- return single_open(file, afs_proc_stats_show, NULL);
-}
-
-static const struct file_operations afs_proc_stats_fops = {
- .open = afs_proc_stats_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
diff --git a/fs/afs/rotate.c b/fs/afs/rotate.c
index ac0feac9d746..e065bc0768e6 100644
--- a/fs/afs/rotate.c
+++ b/fs/afs/rotate.c
@@ -179,7 +179,7 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc)
*/
if (fc->flags & AFS_FS_CURSOR_VNOVOL) {
fc->ac.error = -EREMOTEIO;
- goto failed;
+ goto next_server;
}
write_lock(&vnode->volume->servers_lock);
@@ -201,7 +201,7 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc)
*/
if (vnode->volume->servers == fc->server_list) {
fc->ac.error = -EREMOTEIO;
- goto failed;
+ goto next_server;
}
/* Try again */
@@ -350,8 +350,8 @@ use_server:
* break request before we've finished decoding the reply and
* installing the vnode.
*/
- fc->ac.error = afs_register_server_cb_interest(
- vnode, &fc->server_list->servers[fc->index]);
+ fc->ac.error = afs_register_server_cb_interest(vnode, fc->server_list,
+ fc->index);
if (fc->ac.error < 0)
goto failed;
@@ -369,8 +369,16 @@ use_server:
if (!test_bit(AFS_SERVER_FL_PROBED, &server->flags)) {
fc->ac.alist = afs_get_addrlist(alist);
- if (!afs_probe_fileserver(fc))
- goto failed;
+ if (!afs_probe_fileserver(fc)) {
+ switch (fc->ac.error) {
+ case -ENOMEM:
+ case -ERESTARTSYS:
+ case -EINTR:
+ goto failed;
+ default:
+ goto next_server;
+ }
+ }
}
if (!fc->ac.alist)
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index 5c6263972ec9..08735948f15d 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -41,6 +41,7 @@ int afs_open_socket(struct afs_net *net)
{
struct sockaddr_rxrpc srx;
struct socket *socket;
+ unsigned int min_level;
int ret;
_enter("");
@@ -60,6 +61,12 @@ int afs_open_socket(struct afs_net *net)
srx.transport.sin6.sin6_family = AF_INET6;
srx.transport.sin6.sin6_port = htons(AFS_CM_PORT);
+ min_level = RXRPC_SECURITY_ENCRYPT;
+ ret = kernel_setsockopt(socket, SOL_RXRPC, RXRPC_MIN_SECURITY_LEVEL,
+ (void *)&min_level, sizeof(min_level));
+ if (ret < 0)
+ goto error_2;
+
ret = kernel_bind(socket, (struct sockaddr *) &srx, sizeof(srx));
if (ret == -EADDRINUSE) {
srx.transport.sin6.sin6_port = 0;
@@ -482,8 +489,12 @@ static void afs_deliver_to_call(struct afs_call *call)
state = READ_ONCE(call->state);
switch (ret) {
case 0:
- if (state == AFS_CALL_CL_PROC_REPLY)
+ if (state == AFS_CALL_CL_PROC_REPLY) {
+ if (call->cbi)
+ set_bit(AFS_SERVER_FL_MAY_HAVE_CB,
+ &call->cbi->server->flags);
goto call_complete;
+ }
ASSERTCMP(state, >, AFS_CALL_CL_PROC_REPLY);
goto done;
case -EINPROGRESS:
@@ -493,11 +504,6 @@ static void afs_deliver_to_call(struct afs_call *call)
case -ECONNABORTED:
ASSERTCMP(state, ==, AFS_CALL_COMPLETE);
goto done;
- case -ENOTCONN:
- abort_code = RX_CALL_DEAD;
- rxrpc_kernel_abort_call(call->net->socket, call->rxcall,
- abort_code, ret, "KNC");
- goto local_abort;
case -ENOTSUPP:
abort_code = RXGEN_OPCODE;
rxrpc_kernel_abort_call(call->net->socket, call->rxcall,
diff --git a/fs/afs/security.c b/fs/afs/security.c
index cea2fff313dc..81dfedb7879f 100644
--- a/fs/afs/security.c
+++ b/fs/afs/security.c
@@ -147,8 +147,7 @@ void afs_cache_permit(struct afs_vnode *vnode, struct key *key,
break;
}
- if (cb_break != (vnode->cb_break +
- vnode->cb_interest->server->cb_s_break)) {
+ if (cb_break != afs_cb_break_sum(vnode, vnode->cb_interest)) {
changed = true;
break;
}
@@ -178,7 +177,7 @@ void afs_cache_permit(struct afs_vnode *vnode, struct key *key,
}
}
- if (cb_break != (vnode->cb_break + vnode->cb_interest->server->cb_s_break))
+ if (cb_break != afs_cb_break_sum(vnode, vnode->cb_interest))
goto someone_else_changed_it;
/* We need a ref on any permits list we want to copy as we'll have to
@@ -257,7 +256,7 @@ found:
spin_lock(&vnode->lock);
zap = rcu_access_pointer(vnode->permit_cache);
- if (cb_break == (vnode->cb_break + vnode->cb_interest->server->cb_s_break) &&
+ if (cb_break == afs_cb_break_sum(vnode, vnode->cb_interest) &&
zap == permits)
rcu_assign_pointer(vnode->permit_cache, replacement);
else
@@ -373,18 +372,14 @@ int afs_permission(struct inode *inode, int mask)
mask, access, S_ISDIR(inode->i_mode) ? "dir" : "file");
if (S_ISDIR(inode->i_mode)) {
- if (mask & MAY_EXEC) {
+ if (mask & (MAY_EXEC | MAY_READ | MAY_CHDIR)) {
if (!(access & AFS_ACE_LOOKUP))
goto permission_denied;
- } else if (mask & MAY_READ) {
- if (!(access & AFS_ACE_LOOKUP))
- goto permission_denied;
- } else if (mask & MAY_WRITE) {
+ }
+ if (mask & MAY_WRITE) {
if (!(access & (AFS_ACE_DELETE | /* rmdir, unlink, rename from */
AFS_ACE_INSERT))) /* create, mkdir, symlink, rename to */
goto permission_denied;
- } else {
- BUG();
}
} else {
if (!(access & AFS_ACE_LOOKUP))
diff --git a/fs/afs/server.c b/fs/afs/server.c
index 629c74986cff..3af4625e2f8c 100644
--- a/fs/afs/server.c
+++ b/fs/afs/server.c
@@ -67,12 +67,6 @@ struct afs_server *afs_find_server(struct afs_net *net,
sizeof(struct in6_addr));
if (diff == 0)
goto found;
- if (diff < 0) {
- // TODO: Sort the list
- //if (i == alist->nr_ipv4)
- // goto not_found;
- break;
- }
}
}
} else {
@@ -87,17 +81,10 @@ struct afs_server *afs_find_server(struct afs_net *net,
(u32 __force)b->sin6_addr.s6_addr32[3]);
if (diff == 0)
goto found;
- if (diff < 0) {
- // TODO: Sort the list
- //if (i == 0)
- // goto not_found;
- break;
- }
}
}
}
- //not_found:
server = NULL;
found:
if (server && !atomic_inc_not_zero(&server->usage))
@@ -395,14 +382,16 @@ static void afs_destroy_server(struct afs_net *net, struct afs_server *server)
struct afs_addr_list *alist = rcu_access_pointer(server->addresses);
struct afs_addr_cursor ac = {
.alist = alist,
- .addr = &alist->addrs[0],
.start = alist->index,
- .index = alist->index,
+ .index = 0,
+ .addr = &alist->addrs[alist->index],
.error = 0,
};
_enter("%p", server);
- afs_fs_give_up_all_callbacks(net, server, &ac, NULL);
+ if (test_bit(AFS_SERVER_FL_MAY_HAVE_CB, &server->flags))
+ afs_fs_give_up_all_callbacks(net, server, &ac, NULL);
+
call_rcu(&server->rcu, afs_server_rcu);
afs_dec_servers_outstanding(net);
}
diff --git a/fs/afs/server_list.c b/fs/afs/server_list.c
index 0f8dc4c8f07c..8a5760aa5832 100644
--- a/fs/afs/server_list.c
+++ b/fs/afs/server_list.c
@@ -49,6 +49,7 @@ struct afs_server_list *afs_alloc_server_list(struct afs_cell *cell,
goto error;
refcount_set(&slist->usage, 1);
+ rwlock_init(&slist->lock);
/* Make sure a records exists for each server in the list. */
for (i = 0; i < vldb->nr_servers; i++) {
@@ -64,9 +65,11 @@ struct afs_server_list *afs_alloc_server_list(struct afs_cell *cell,
goto error_2;
}
- /* Insertion-sort by server pointer */
+ /* Insertion-sort by UUID */
for (j = 0; j < slist->nr_servers; j++)
- if (slist->servers[j].server >= server)
+ if (memcmp(&slist->servers[j].server->uuid,
+ &server->uuid,
+ sizeof(server->uuid)) >= 0)
break;
if (j < slist->nr_servers) {
if (slist->servers[j].server == server) {
diff --git a/fs/afs/super.c b/fs/afs/super.c
index 65081ec3c36e..9e5d7966621c 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -590,7 +590,7 @@ static void afs_i_init_once(void *_vnode)
memset(vnode, 0, sizeof(*vnode));
inode_init_once(&vnode->vfs_inode);
mutex_init(&vnode->io_lock);
- mutex_init(&vnode->validate_lock);
+ init_rwsem(&vnode->validate_lock);
spin_lock_init(&vnode->wb_lock);
spin_lock_init(&vnode->lock);
INIT_LIST_HEAD(&vnode->wb_keys);
@@ -688,7 +688,7 @@ static int afs_statfs(struct dentry *dentry, struct kstatfs *buf)
if (afs_begin_vnode_operation(&fc, vnode, key)) {
fc.flags |= AFS_FS_CURSOR_NO_VSLEEP;
while (afs_select_fileserver(&fc)) {
- fc.cb_break = vnode->cb_break + vnode->cb_s_break;
+ fc.cb_break = afs_calc_vnode_cb_break(vnode);
afs_fs_get_volume_status(&fc, &vs);
}
diff --git a/fs/afs/vlclient.c b/fs/afs/vlclient.c
index 1ed7e2fd2f35..c3b740813fc7 100644
--- a/fs/afs/vlclient.c
+++ b/fs/afs/vlclient.c
@@ -23,7 +23,7 @@ static int afs_deliver_vl_get_entry_by_name_u(struct afs_call *call)
struct afs_uvldbentry__xdr *uvldb;
struct afs_vldb_entry *entry;
bool new_only = false;
- u32 tmp, nr_servers;
+ u32 tmp, nr_servers, vlflags;
int i, ret;
_enter("");
@@ -55,6 +55,7 @@ static int afs_deliver_vl_get_entry_by_name_u(struct afs_call *call)
new_only = true;
}
+ vlflags = ntohl(uvldb->flags);
for (i = 0; i < nr_servers; i++) {
struct afs_uuid__xdr *xdr;
struct afs_uuid *uuid;
@@ -64,12 +65,13 @@ static int afs_deliver_vl_get_entry_by_name_u(struct afs_call *call)
if (tmp & AFS_VLSF_DONTUSE ||
(new_only && !(tmp & AFS_VLSF_NEWREPSITE)))
continue;
- if (tmp & AFS_VLSF_RWVOL)
+ if (tmp & AFS_VLSF_RWVOL) {
entry->fs_mask[i] |= AFS_VOL_VTM_RW;
+ if (vlflags & AFS_VLF_BACKEXISTS)
+ entry->fs_mask[i] |= AFS_VOL_VTM_BAK;
+ }
if (tmp & AFS_VLSF_ROVOL)
entry->fs_mask[i] |= AFS_VOL_VTM_RO;
- if (tmp & AFS_VLSF_BACKVOL)
- entry->fs_mask[i] |= AFS_VOL_VTM_BAK;
if (!entry->fs_mask[i])
continue;
@@ -89,15 +91,14 @@ static int afs_deliver_vl_get_entry_by_name_u(struct afs_call *call)
for (i = 0; i < AFS_MAXTYPES; i++)
entry->vid[i] = ntohl(uvldb->volumeId[i]);
- tmp = ntohl(uvldb->flags);
- if (tmp & AFS_VLF_RWEXISTS)
+ if (vlflags & AFS_VLF_RWEXISTS)
__set_bit(AFS_VLDB_HAS_RW, &entry->flags);
- if (tmp & AFS_VLF_ROEXISTS)
+ if (vlflags & AFS_VLF_ROEXISTS)
__set_bit(AFS_VLDB_HAS_RO, &entry->flags);
- if (tmp & AFS_VLF_BACKEXISTS)
+ if (vlflags & AFS_VLF_BACKEXISTS)
__set_bit(AFS_VLDB_HAS_BAK, &entry->flags);
- if (!(tmp & (AFS_VLF_RWEXISTS | AFS_VLF_ROEXISTS | AFS_VLF_BACKEXISTS))) {
+ if (!(vlflags & (AFS_VLF_RWEXISTS | AFS_VLF_ROEXISTS | AFS_VLF_BACKEXISTS))) {
entry->error = -ENOMEDIUM;
__set_bit(AFS_VLDB_QUERY_ERROR, &entry->flags);
}
diff --git a/fs/afs/write.c b/fs/afs/write.c
index c164698dc304..8b39e6ebb40b 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -351,7 +351,7 @@ found_key:
ret = -ERESTARTSYS;
if (afs_begin_vnode_operation(&fc, vnode, wbk->key)) {
while (afs_select_fileserver(&fc)) {
- fc.cb_break = vnode->cb_break + vnode->cb_s_break;
+ fc.cb_break = afs_calc_vnode_cb_break(vnode);
afs_fs_store_data(&fc, mapping, first, last, offset, to);
}
diff --git a/fs/aio.c b/fs/aio.c
index 88d7927ffbc6..b850e92ee0d5 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -5,6 +5,7 @@
* Implements an efficient asynchronous io interface.
*
* Copyright 2000, 2001, 2002 Red Hat, Inc. All Rights Reserved.
+ * Copyright 2018 Christoph Hellwig.
*
* See ../COPYING for licensing terms.
*/
@@ -46,6 +47,8 @@
#include "internal.h"
+#define KIOCB_KEY 0
+
#define AIO_RING_MAGIC 0xa10a10a1
#define AIO_RING_COMPAT_FEATURES 1
#define AIO_RING_INCOMPAT_FEATURES 0
@@ -156,21 +159,29 @@ struct kioctx {
unsigned id;
};
-/*
- * We use ki_cancel == KIOCB_CANCELLED to indicate that a kiocb has been either
- * cancelled or completed (this makes a certain amount of sense because
- * successful cancellation - io_cancel() - does deliver the completion to
- * userspace).
- *
- * And since most things don't implement kiocb cancellation and we'd really like
- * kiocb completion to be lockless when possible, we use ki_cancel to
- * synchronize cancellation and completion - we only set it to KIOCB_CANCELLED
- * with xchg() or cmpxchg(), see batch_complete_aio() and kiocb_cancel().
- */
-#define KIOCB_CANCELLED ((void *) (~0ULL))
+struct fsync_iocb {
+ struct work_struct work;
+ struct file *file;
+ bool datasync;
+};
+
+struct poll_iocb {
+ struct file *file;
+ __poll_t events;
+ struct wait_queue_head *head;
+
+ union {
+ struct wait_queue_entry wait;
+ struct work_struct work;
+ };
+};
struct aio_kiocb {
- struct kiocb common;
+ union {
+ struct kiocb rw;
+ struct fsync_iocb fsync;
+ struct poll_iocb poll;
+ };
struct kioctx *ki_ctx;
kiocb_cancel_fn *ki_cancel;
@@ -264,9 +275,6 @@ static int __init aio_setup(void)
kiocb_cachep = KMEM_CACHE(aio_kiocb, SLAB_HWCACHE_ALIGN|SLAB_PANIC);
kioctx_cachep = KMEM_CACHE(kioctx,SLAB_HWCACHE_ALIGN|SLAB_PANIC);
-
- pr_debug("sizeof(struct page) = %zu\n", sizeof(struct page));
-
return 0;
}
__initcall(aio_setup);
@@ -552,42 +560,20 @@ static int aio_setup_ring(struct kioctx *ctx, unsigned int nr_events)
void kiocb_set_cancel_fn(struct kiocb *iocb, kiocb_cancel_fn *cancel)
{
- struct aio_kiocb *req = container_of(iocb, struct aio_kiocb, common);
+ struct aio_kiocb *req = container_of(iocb, struct aio_kiocb, rw);
struct kioctx *ctx = req->ki_ctx;
unsigned long flags;
- spin_lock_irqsave(&ctx->ctx_lock, flags);
-
- if (!req->ki_list.next)
- list_add(&req->ki_list, &ctx->active_reqs);
+ if (WARN_ON_ONCE(!list_empty(&req->ki_list)))
+ return;
+ spin_lock_irqsave(&ctx->ctx_lock, flags);
+ list_add_tail(&req->ki_list, &ctx->active_reqs);
req->ki_cancel = cancel;
-
spin_unlock_irqrestore(&ctx->ctx_lock, flags);
}
EXPORT_SYMBOL(kiocb_set_cancel_fn);
-static int kiocb_cancel(struct aio_kiocb *kiocb)
-{
- kiocb_cancel_fn *old, *cancel;
-
- /*
- * Don't want to set kiocb->ki_cancel = KIOCB_CANCELLED unless it
- * actually has a cancel function, hence the cmpxchg()
- */
-
- cancel = READ_ONCE(kiocb->ki_cancel);
- do {
- if (!cancel || cancel == KIOCB_CANCELLED)
- return -EINVAL;
-
- old = cancel;
- cancel = cmpxchg(&kiocb->ki_cancel, old, KIOCB_CANCELLED);
- } while (cancel != old);
-
- return cancel(&kiocb->common);
-}
-
/*
* free_ioctx() should be RCU delayed to synchronize against the RCU
* protected lookup_ioctx() and also needs process context to call
@@ -634,9 +620,8 @@ static void free_ioctx_users(struct percpu_ref *ref)
while (!list_empty(&ctx->active_reqs)) {
req = list_first_entry(&ctx->active_reqs,
struct aio_kiocb, ki_list);
-
+ req->ki_cancel(&req->rw);
list_del_init(&req->ki_list);
- kiocb_cancel(req);
}
spin_unlock_irq(&ctx->ctx_lock);
@@ -1042,7 +1027,7 @@ static inline struct aio_kiocb *aio_get_req(struct kioctx *ctx)
goto out_put;
percpu_ref_get(&ctx->reqs);
-
+ INIT_LIST_HEAD(&req->ki_list);
req->ki_ctx = ctx;
return req;
out_put:
@@ -1050,15 +1035,6 @@ out_put:
return NULL;
}
-static void kiocb_free(struct aio_kiocb *req)
-{
- if (req->common.ki_filp)
- fput(req->common.ki_filp);
- if (req->ki_eventfd != NULL)
- eventfd_ctx_put(req->ki_eventfd);
- kmem_cache_free(kiocb_cachep, req);
-}
-
static struct kioctx *lookup_ioctx(unsigned long ctx_id)
{
struct aio_ring __user *ring = (void __user *)ctx_id;
@@ -1078,8 +1054,8 @@ static struct kioctx *lookup_ioctx(unsigned long ctx_id)
ctx = rcu_dereference(table->table[id]);
if (ctx && ctx->user_id == ctx_id) {
- percpu_ref_get(&ctx->users);
- ret = ctx;
+ if (percpu_ref_tryget_live(&ctx->users))
+ ret = ctx;
}
out:
rcu_read_unlock();
@@ -1089,44 +1065,14 @@ out:
/* aio_complete
* Called when the io request on the given iocb is complete.
*/
-static void aio_complete(struct kiocb *kiocb, long res, long res2)
+static void aio_complete(struct aio_kiocb *iocb, long res, long res2)
{
- struct aio_kiocb *iocb = container_of(kiocb, struct aio_kiocb, common);
struct kioctx *ctx = iocb->ki_ctx;
struct aio_ring *ring;
struct io_event *ev_page, *event;
unsigned tail, pos, head;
unsigned long flags;
- if (kiocb->ki_flags & IOCB_WRITE) {
- struct file *file = kiocb->ki_filp;
-
- /*
- * Tell lockdep we inherited freeze protection from submission
- * thread.
- */
- if (S_ISREG(file_inode(file)->i_mode))
- __sb_writers_acquired(file_inode(file)->i_sb, SB_FREEZE_WRITE);
- file_end_write(file);
- }
-
- /*
- * Special case handling for sync iocbs:
- * - events go directly into the iocb for fast handling
- * - the sync task with the iocb in its stack holds the single iocb
- * ref, no other paths have a way to get another ref
- * - the sync task helpfully left a reference to itself in the iocb
- */
- BUG_ON(is_sync_kiocb(kiocb));
-
- if (iocb->ki_list.next) {
- unsigned long flags;
-
- spin_lock_irqsave(&ctx->ctx_lock, flags);
- list_del(&iocb->ki_list);
- spin_unlock_irqrestore(&ctx->ctx_lock, flags);
- }
-
/*
* Add a completion event to the ring buffer. Must be done holding
* ctx->completion_lock to prevent other code from messing with the tail
@@ -1180,11 +1126,12 @@ static void aio_complete(struct kiocb *kiocb, long res, long res2)
* eventfd. The eventfd_signal() function is safe to be called
* from IRQ context.
*/
- if (iocb->ki_eventfd != NULL)
+ if (iocb->ki_eventfd) {
eventfd_signal(iocb->ki_eventfd, 1);
+ eventfd_ctx_put(iocb->ki_eventfd);
+ }
- /* everything turned out well, dispose of the aiocb. */
- kiocb_free(iocb);
+ kmem_cache_free(kiocb_cachep, iocb);
/*
* We have to order our ring_info tail store above and test
@@ -1250,14 +1197,13 @@ static long aio_read_events_ring(struct kioctx *ctx,
if (head == tail)
break;
- avail = min(avail, nr - ret);
- avail = min_t(long, avail, AIO_EVENTS_PER_PAGE -
- ((head + AIO_EVENTS_OFFSET) % AIO_EVENTS_PER_PAGE));
-
pos = head + AIO_EVENTS_OFFSET;
page = ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE];
pos %= AIO_EVENTS_PER_PAGE;
+ avail = min(avail, nr - ret);
+ avail = min_t(long, avail, AIO_EVENTS_PER_PAGE - pos);
+
ev = kmap(page);
copy_ret = copy_to_user(event + ret, ev + pos,
sizeof(*ev) * avail);
@@ -1328,10 +1274,6 @@ static long read_events(struct kioctx *ctx, long min_nr, long nr,
wait_event_interruptible_hrtimeout(ctx->wait,
aio_read_events(ctx, min_nr, nr, event, &ret),
until);
-
- if (!ret && signal_pending(current))
- ret = -EINTR;
-
return ret;
}
@@ -1447,6 +1389,58 @@ SYSCALL_DEFINE1(io_destroy, aio_context_t, ctx)
return -EINVAL;
}
+static void aio_remove_iocb(struct aio_kiocb *iocb)
+{
+ struct kioctx *ctx = iocb->ki_ctx;
+ unsigned long flags;
+
+ spin_lock_irqsave(&ctx->ctx_lock, flags);
+ list_del(&iocb->ki_list);
+ spin_unlock_irqrestore(&ctx->ctx_lock, flags);
+}
+
+static void aio_complete_rw(struct kiocb *kiocb, long res, long res2)
+{
+ struct aio_kiocb *iocb = container_of(kiocb, struct aio_kiocb, rw);
+
+ if (!list_empty_careful(&iocb->ki_list))
+ aio_remove_iocb(iocb);
+
+ if (kiocb->ki_flags & IOCB_WRITE) {
+ struct inode *inode = file_inode(kiocb->ki_filp);
+
+ /*
+ * Tell lockdep we inherited freeze protection from submission
+ * thread.
+ */
+ if (S_ISREG(inode->i_mode))
+ __sb_writers_acquired(inode->i_sb, SB_FREEZE_WRITE);
+ file_end_write(kiocb->ki_filp);
+ }
+
+ fput(kiocb->ki_filp);
+ aio_complete(iocb, res, res2);
+}
+
+static int aio_prep_rw(struct kiocb *req, struct iocb *iocb)
+{
+ int ret;
+
+ req->ki_filp = fget(iocb->aio_fildes);
+ if (unlikely(!req->ki_filp))
+ return -EBADF;
+ req->ki_complete = aio_complete_rw;
+ req->ki_pos = iocb->aio_offset;
+ req->ki_flags = iocb_flags(req->ki_filp);
+ if (iocb->aio_flags & IOCB_FLAG_RESFD)
+ req->ki_flags |= IOCB_EVENTFD;
+ req->ki_hint = file_write_hint(req->ki_filp);
+ ret = kiocb_set_rw_flags(req, iocb->aio_rw_flags);
+ if (unlikely(ret))
+ fput(req->ki_filp);
+ return ret;
+}
+
static int aio_setup_rw(int rw, struct iocb *iocb, struct iovec **iovec,
bool vectored, bool compat, struct iov_iter *iter)
{
@@ -1466,11 +1460,11 @@ static int aio_setup_rw(int rw, struct iocb *iocb, struct iovec **iovec,
return import_iovec(rw, buf, len, UIO_FASTIOV, iovec, iter);
}
-static inline ssize_t aio_ret(struct kiocb *req, ssize_t ret)
+static inline void aio_rw_done(struct kiocb *req, ssize_t ret)
{
switch (ret) {
case -EIOCBQUEUED:
- return ret;
+ break;
case -ERESTARTSYS:
case -ERESTARTNOINTR:
case -ERESTARTNOHAND:
@@ -1482,85 +1476,270 @@ static inline ssize_t aio_ret(struct kiocb *req, ssize_t ret)
ret = -EINTR;
/*FALLTHRU*/
default:
- aio_complete(req, ret, 0);
- return 0;
+ aio_complete_rw(req, ret, 0);
}
}
static ssize_t aio_read(struct kiocb *req, struct iocb *iocb, bool vectored,
bool compat)
{
- struct file *file = req->ki_filp;
struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
struct iov_iter iter;
+ struct file *file;
ssize_t ret;
+ ret = aio_prep_rw(req, iocb);
+ if (ret)
+ return ret;
+ file = req->ki_filp;
+
+ ret = -EBADF;
if (unlikely(!(file->f_mode & FMODE_READ)))
- return -EBADF;
+ goto out_fput;
+ ret = -EINVAL;
if (unlikely(!file->f_op->read_iter))
- return -EINVAL;
+ goto out_fput;
ret = aio_setup_rw(READ, iocb, &iovec, vectored, compat, &iter);
if (ret)
- return ret;
+ goto out_fput;
ret = rw_verify_area(READ, file, &req->ki_pos, iov_iter_count(&iter));
if (!ret)
- ret = aio_ret(req, call_read_iter(file, req, &iter));
+ aio_rw_done(req, call_read_iter(file, req, &iter));
kfree(iovec);
+out_fput:
+ if (unlikely(ret))
+ fput(file);
return ret;
}
static ssize_t aio_write(struct kiocb *req, struct iocb *iocb, bool vectored,
bool compat)
{
- struct file *file = req->ki_filp;
struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
struct iov_iter iter;
+ struct file *file;
ssize_t ret;
+ ret = aio_prep_rw(req, iocb);
+ if (ret)
+ return ret;
+ file = req->ki_filp;
+
+ ret = -EBADF;
if (unlikely(!(file->f_mode & FMODE_WRITE)))
- return -EBADF;
+ goto out_fput;
+ ret = -EINVAL;
if (unlikely(!file->f_op->write_iter))
- return -EINVAL;
+ goto out_fput;
ret = aio_setup_rw(WRITE, iocb, &iovec, vectored, compat, &iter);
if (ret)
- return ret;
+ goto out_fput;
ret = rw_verify_area(WRITE, file, &req->ki_pos, iov_iter_count(&iter));
if (!ret) {
- req->ki_flags |= IOCB_WRITE;
- file_start_write(file);
- ret = aio_ret(req, call_write_iter(file, req, &iter));
/*
- * We release freeze protection in aio_complete(). Fool lockdep
- * by telling it the lock got released so that it doesn't
- * complain about held lock when we return to userspace.
+ * Open-code file_start_write here to grab freeze protection,
+ * which will be released by another thread in
+ * aio_complete_rw(). Fool lockdep by telling it the lock got
+ * released so that it doesn't complain about the held lock when
+ * we return to userspace.
*/
- if (S_ISREG(file_inode(file)->i_mode))
+ if (S_ISREG(file_inode(file)->i_mode)) {
+ __sb_start_write(file_inode(file)->i_sb, SB_FREEZE_WRITE, true);
__sb_writers_release(file_inode(file)->i_sb, SB_FREEZE_WRITE);
+ }
+ req->ki_flags |= IOCB_WRITE;
+ aio_rw_done(req, call_write_iter(file, req, &iter));
}
kfree(iovec);
+out_fput:
+ if (unlikely(ret))
+ fput(file);
return ret;
}
+static void aio_fsync_work(struct work_struct *work)
+{
+ struct fsync_iocb *req = container_of(work, struct fsync_iocb, work);
+ int ret;
+
+ ret = vfs_fsync(req->file, req->datasync);
+ fput(req->file);
+ aio_complete(container_of(req, struct aio_kiocb, fsync), ret, 0);
+}
+
+static int aio_fsync(struct fsync_iocb *req, struct iocb *iocb, bool datasync)
+{
+ if (unlikely(iocb->aio_buf || iocb->aio_offset || iocb->aio_nbytes ||
+ iocb->aio_rw_flags))
+ return -EINVAL;
+ req->file = fget(iocb->aio_fildes);
+ if (unlikely(!req->file))
+ return -EBADF;
+ if (unlikely(!req->file->f_op->fsync)) {
+ fput(req->file);
+ return -EINVAL;
+ }
+
+ req->datasync = datasync;
+ INIT_WORK(&req->work, aio_fsync_work);
+ schedule_work(&req->work);
+ return 0;
+}
+
+/* need to use list_del_init so we can check if item was present */
+static inline bool __aio_poll_remove(struct poll_iocb *req)
+{
+ if (list_empty(&req->wait.entry))
+ return false;
+ list_del_init(&req->wait.entry);
+ return true;
+}
+
+static inline void __aio_poll_complete(struct aio_kiocb *iocb, __poll_t mask)
+{
+ fput(iocb->poll.file);
+ aio_complete(iocb, mangle_poll(mask), 0);
+}
+
+static void aio_poll_work(struct work_struct *work)
+{
+ struct aio_kiocb *iocb = container_of(work, struct aio_kiocb, poll.work);
+
+ if (!list_empty_careful(&iocb->ki_list))
+ aio_remove_iocb(iocb);
+ __aio_poll_complete(iocb, iocb->poll.events);
+}
+
+static int aio_poll_cancel(struct kiocb *iocb)
+{
+ struct aio_kiocb *aiocb = container_of(iocb, struct aio_kiocb, rw);
+ struct poll_iocb *req = &aiocb->poll;
+ struct wait_queue_head *head = req->head;
+ bool found = false;
+
+ spin_lock(&head->lock);
+ found = __aio_poll_remove(req);
+ spin_unlock(&head->lock);
+
+ if (found) {
+ req->events = 0;
+ INIT_WORK(&req->work, aio_poll_work);
+ schedule_work(&req->work);
+ }
+ return 0;
+}
+
+static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
+ void *key)
+{
+ struct poll_iocb *req = container_of(wait, struct poll_iocb, wait);
+ struct aio_kiocb *iocb = container_of(req, struct aio_kiocb, poll);
+ struct file *file = req->file;
+ __poll_t mask = key_to_poll(key);
+
+ assert_spin_locked(&req->head->lock);
+
+ /* for instances that support it check for an event match first: */
+ if (mask && !(mask & req->events))
+ return 0;
+
+ mask = file->f_op->poll_mask(file, req->events);
+ if (!mask)
+ return 0;
+
+ __aio_poll_remove(req);
+
+ /*
+ * Try completing without a context switch if we can acquire ctx_lock
+ * without spinning. Otherwise we need to defer to a workqueue to
+ * avoid a deadlock due to the lock order.
+ */
+ if (spin_trylock(&iocb->ki_ctx->ctx_lock)) {
+ list_del_init(&iocb->ki_list);
+ spin_unlock(&iocb->ki_ctx->ctx_lock);
+
+ __aio_poll_complete(iocb, mask);
+ } else {
+ req->events = mask;
+ INIT_WORK(&req->work, aio_poll_work);
+ schedule_work(&req->work);
+ }
+
+ return 1;
+}
+
+static ssize_t aio_poll(struct aio_kiocb *aiocb, struct iocb *iocb)
+{
+ struct kioctx *ctx = aiocb->ki_ctx;
+ struct poll_iocb *req = &aiocb->poll;
+ __poll_t mask;
+
+ /* reject any unknown events outside the normal event mask. */
+ if ((u16)iocb->aio_buf != iocb->aio_buf)
+ return -EINVAL;
+ /* reject fields that are not defined for poll */
+ if (iocb->aio_offset || iocb->aio_nbytes || iocb->aio_rw_flags)
+ return -EINVAL;
+
+ req->events = demangle_poll(iocb->aio_buf) | EPOLLERR | EPOLLHUP;
+ req->file = fget(iocb->aio_fildes);
+ if (unlikely(!req->file))
+ return -EBADF;
+ if (!file_has_poll_mask(req->file))
+ goto out_fail;
+
+ req->head = req->file->f_op->get_poll_head(req->file, req->events);
+ if (!req->head)
+ goto out_fail;
+ if (IS_ERR(req->head)) {
+ mask = EPOLLERR;
+ goto done;
+ }
+
+ init_waitqueue_func_entry(&req->wait, aio_poll_wake);
+ aiocb->ki_cancel = aio_poll_cancel;
+
+ spin_lock_irq(&ctx->ctx_lock);
+ spin_lock(&req->head->lock);
+ mask = req->file->f_op->poll_mask(req->file, req->events);
+ if (!mask) {
+ __add_wait_queue(req->head, &req->wait);
+ list_add_tail(&aiocb->ki_list, &ctx->active_reqs);
+ }
+ spin_unlock(&req->head->lock);
+ spin_unlock_irq(&ctx->ctx_lock);
+done:
+ if (mask)
+ __aio_poll_complete(aiocb, mask);
+ return 0;
+out_fail:
+ fput(req->file);
+ return -EINVAL; /* same as no support for IOCB_CMD_POLL */
+}
+
static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
- struct iocb *iocb, bool compat)
+ bool compat)
{
struct aio_kiocb *req;
- struct file *file;
+ struct iocb iocb;
ssize_t ret;
+ if (unlikely(copy_from_user(&iocb, user_iocb, sizeof(iocb))))
+ return -EFAULT;
+
/* enforce forwards compatibility on users */
- if (unlikely(iocb->aio_reserved2)) {
+ if (unlikely(iocb.aio_reserved2)) {
pr_debug("EINVAL: reserve field set\n");
return -EINVAL;
}
/* prevent overflows */
if (unlikely(
- (iocb->aio_buf != (unsigned long)iocb->aio_buf) ||
- (iocb->aio_nbytes != (size_t)iocb->aio_nbytes) ||
- ((ssize_t)iocb->aio_nbytes < 0)
+ (iocb.aio_buf != (unsigned long)iocb.aio_buf) ||
+ (iocb.aio_nbytes != (size_t)iocb.aio_nbytes) ||
+ ((ssize_t)iocb.aio_nbytes < 0)
)) {
pr_debug("EINVAL: overflow check\n");
return -EINVAL;
@@ -1570,37 +1749,19 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
if (unlikely(!req))
return -EAGAIN;
- req->common.ki_filp = file = fget(iocb->aio_fildes);
- if (unlikely(!req->common.ki_filp)) {
- ret = -EBADF;
- goto out_put_req;
- }
- req->common.ki_pos = iocb->aio_offset;
- req->common.ki_complete = aio_complete;
- req->common.ki_flags = iocb_flags(req->common.ki_filp);
- req->common.ki_hint = file_write_hint(file);
-
- if (iocb->aio_flags & IOCB_FLAG_RESFD) {
+ if (iocb.aio_flags & IOCB_FLAG_RESFD) {
/*
* If the IOCB_FLAG_RESFD flag of aio_flags is set, get an
* instance of the file* now. The file descriptor must be
* an eventfd() fd, and will be signaled for each completed
* event using the eventfd_signal() function.
*/
- req->ki_eventfd = eventfd_ctx_fdget((int) iocb->aio_resfd);
+ req->ki_eventfd = eventfd_ctx_fdget((int) iocb.aio_resfd);
if (IS_ERR(req->ki_eventfd)) {
ret = PTR_ERR(req->ki_eventfd);
req->ki_eventfd = NULL;
goto out_put_req;
}
-
- req->common.ki_flags |= IOCB_EVENTFD;
- }
-
- ret = kiocb_set_rw_flags(&req->common, iocb->aio_rw_flags);
- if (unlikely(ret)) {
- pr_debug("EINVAL: aio_rw_flags\n");
- goto out_put_req;
}
ret = put_user(KIOCB_KEY, &user_iocb->aio_key);
@@ -1610,41 +1771,67 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
}
req->ki_user_iocb = user_iocb;
- req->ki_user_data = iocb->aio_data;
+ req->ki_user_data = iocb.aio_data;
- get_file(file);
- switch (iocb->aio_lio_opcode) {
+ switch (iocb.aio_lio_opcode) {
case IOCB_CMD_PREAD:
- ret = aio_read(&req->common, iocb, false, compat);
+ ret = aio_read(&req->rw, &iocb, false, compat);
break;
case IOCB_CMD_PWRITE:
- ret = aio_write(&req->common, iocb, false, compat);
+ ret = aio_write(&req->rw, &iocb, false, compat);
break;
case IOCB_CMD_PREADV:
- ret = aio_read(&req->common, iocb, true, compat);
+ ret = aio_read(&req->rw, &iocb, true, compat);
break;
case IOCB_CMD_PWRITEV:
- ret = aio_write(&req->common, iocb, true, compat);
+ ret = aio_write(&req->rw, &iocb, true, compat);
+ break;
+ case IOCB_CMD_FSYNC:
+ ret = aio_fsync(&req->fsync, &iocb, false);
+ break;
+ case IOCB_CMD_FDSYNC:
+ ret = aio_fsync(&req->fsync, &iocb, true);
+ break;
+ case IOCB_CMD_POLL:
+ ret = aio_poll(req, &iocb);
break;
default:
- pr_debug("invalid aio operation %d\n", iocb->aio_lio_opcode);
+ pr_debug("invalid aio operation %d\n", iocb.aio_lio_opcode);
ret = -EINVAL;
break;
}
- fput(file);
- if (ret && ret != -EIOCBQUEUED)
+ /*
+ * If ret is 0, we'd either done aio_complete() ourselves or have
+ * arranged for that to be done asynchronously. Anything non-zero
+ * means that we need to destroy req ourselves.
+ */
+ if (ret)
goto out_put_req;
return 0;
out_put_req:
put_reqs_available(ctx, 1);
percpu_ref_put(&ctx->reqs);
- kiocb_free(req);
+ if (req->ki_eventfd)
+ eventfd_ctx_put(req->ki_eventfd);
+ kmem_cache_free(kiocb_cachep, req);
return ret;
}
-static long do_io_submit(aio_context_t ctx_id, long nr,
- struct iocb __user *__user *iocbpp, bool compat)
+/* sys_io_submit:
+ * Queue the nr iocbs pointed to by iocbpp for processing. Returns
+ * the number of iocbs queued. May return -EINVAL if the aio_context
+ * specified by ctx_id is invalid, if nr is < 0, if the iocb at
+ * *iocbpp[0] is not properly initialized, if the operation specified
+ * is invalid for the file descriptor in the iocb. May fail with
+ * -EFAULT if any of the data structures point to invalid data. May
+ * fail with -EBADF if the file descriptor specified in the first
+ * iocb is invalid. May fail with -EAGAIN if insufficient resources
+ * are available to queue any iocbs. Will return 0 if nr is 0. Will
+ * fail with -ENOSYS if not implemented.
+ */
+SYSCALL_DEFINE3(io_submit, aio_context_t, ctx_id, long, nr,
+ struct iocb __user * __user *, iocbpp)
{
struct kioctx *ctx;
long ret = 0;
@@ -1654,39 +1841,25 @@ static long do_io_submit(aio_context_t ctx_id, long nr,
if (unlikely(nr < 0))
return -EINVAL;
- if (unlikely(nr > LONG_MAX/sizeof(*iocbpp)))
- nr = LONG_MAX/sizeof(*iocbpp);
-
- if (unlikely(!access_ok(VERIFY_READ, iocbpp, (nr*sizeof(*iocbpp)))))
- return -EFAULT;
-
ctx = lookup_ioctx(ctx_id);
if (unlikely(!ctx)) {
pr_debug("EINVAL: invalid context id\n");
return -EINVAL;
}
- blk_start_plug(&plug);
+ if (nr > ctx->nr_events)
+ nr = ctx->nr_events;
- /*
- * AKPM: should this return a partial result if some of the IOs were
- * successfully submitted?
- */
- for (i=0; i<nr; i++) {
+ blk_start_plug(&plug);
+ for (i = 0; i < nr; i++) {
struct iocb __user *user_iocb;
- struct iocb tmp;
- if (unlikely(__get_user(user_iocb, iocbpp + i))) {
+ if (unlikely(get_user(user_iocb, iocbpp + i))) {
ret = -EFAULT;
break;
}
- if (unlikely(copy_from_user(&tmp, user_iocb, sizeof(tmp)))) {
- ret = -EFAULT;
- break;
- }
-
- ret = io_submit_one(ctx, user_iocb, &tmp, compat);
+ ret = io_submit_one(ctx, user_iocb, false);
if (ret)
break;
}
@@ -1696,59 +1869,44 @@ static long do_io_submit(aio_context_t ctx_id, long nr,
return i ? i : ret;
}
-/* sys_io_submit:
- * Queue the nr iocbs pointed to by iocbpp for processing. Returns
- * the number of iocbs queued. May return -EINVAL if the aio_context
- * specified by ctx_id is invalid, if nr is < 0, if the iocb at
- * *iocbpp[0] is not properly initialized, if the operation specified
- * is invalid for the file descriptor in the iocb. May fail with
- * -EFAULT if any of the data structures point to invalid data. May
- * fail with -EBADF if the file descriptor specified in the first
- * iocb is invalid. May fail with -EAGAIN if insufficient resources
- * are available to queue any iocbs. Will return 0 if nr is 0. Will
- * fail with -ENOSYS if not implemented.
- */
-SYSCALL_DEFINE3(io_submit, aio_context_t, ctx_id, long, nr,
- struct iocb __user * __user *, iocbpp)
-{
- return do_io_submit(ctx_id, nr, iocbpp, 0);
-}
-
#ifdef CONFIG_COMPAT
-static inline long
-copy_iocb(long nr, u32 __user *ptr32, struct iocb __user * __user *ptr64)
+COMPAT_SYSCALL_DEFINE3(io_submit, compat_aio_context_t, ctx_id,
+ int, nr, compat_uptr_t __user *, iocbpp)
{
- compat_uptr_t uptr;
- int i;
+ struct kioctx *ctx;
+ long ret = 0;
+ int i = 0;
+ struct blk_plug plug;
- for (i = 0; i < nr; ++i) {
- if (get_user(uptr, ptr32 + i))
- return -EFAULT;
- if (put_user(compat_ptr(uptr), ptr64 + i))
- return -EFAULT;
+ if (unlikely(nr < 0))
+ return -EINVAL;
+
+ ctx = lookup_ioctx(ctx_id);
+ if (unlikely(!ctx)) {
+ pr_debug("EINVAL: invalid context id\n");
+ return -EINVAL;
}
- return 0;
-}
-#define MAX_AIO_SUBMITS (PAGE_SIZE/sizeof(struct iocb *))
+ if (nr > ctx->nr_events)
+ nr = ctx->nr_events;
-COMPAT_SYSCALL_DEFINE3(io_submit, compat_aio_context_t, ctx_id,
- int, nr, u32 __user *, iocb)
-{
- struct iocb __user * __user *iocb64;
- long ret;
+ blk_start_plug(&plug);
+ for (i = 0; i < nr; i++) {
+ compat_uptr_t user_iocb;
- if (unlikely(nr < 0))
- return -EINVAL;
+ if (unlikely(get_user(user_iocb, iocbpp + i))) {
+ ret = -EFAULT;
+ break;
+ }
- if (nr > MAX_AIO_SUBMITS)
- nr = MAX_AIO_SUBMITS;
+ ret = io_submit_one(ctx, compat_ptr(user_iocb), true);
+ if (ret)
+ break;
+ }
+ blk_finish_plug(&plug);
- iocb64 = compat_alloc_user_space(nr * sizeof(*iocb64));
- ret = copy_iocb(nr, iocb, iocb64);
- if (!ret)
- ret = do_io_submit(ctx_id, nr, iocb64, 1);
- return ret;
+ percpu_ref_put(&ctx->users);
+ return i ? i : ret;
}
#endif
@@ -1756,15 +1914,12 @@ COMPAT_SYSCALL_DEFINE3(io_submit, compat_aio_context_t, ctx_id,
* Finds a given iocb for cancellation.
*/
static struct aio_kiocb *
-lookup_kiocb(struct kioctx *ctx, struct iocb __user *iocb, u32 key)
+lookup_kiocb(struct kioctx *ctx, struct iocb __user *iocb)
{
struct aio_kiocb *kiocb;
assert_spin_locked(&ctx->ctx_lock);
- if (key != KIOCB_KEY)
- return NULL;
-
/* TODO: use a hash or array, this sucks. */
list_for_each_entry(kiocb, &ctx->active_reqs, ki_list) {
if (kiocb->ki_user_iocb == iocb)
@@ -1788,25 +1943,24 @@ SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb,
{
struct kioctx *ctx;
struct aio_kiocb *kiocb;
+ int ret = -EINVAL;
u32 key;
- int ret;
- ret = get_user(key, &iocb->aio_key);
- if (unlikely(ret))
+ if (unlikely(get_user(key, &iocb->aio_key)))
return -EFAULT;
+ if (unlikely(key != KIOCB_KEY))
+ return -EINVAL;
ctx = lookup_ioctx(ctx_id);
if (unlikely(!ctx))
return -EINVAL;
spin_lock_irq(&ctx->ctx_lock);
-
- kiocb = lookup_kiocb(ctx, iocb, key);
- if (kiocb)
- ret = kiocb_cancel(kiocb);
- else
- ret = -EINVAL;
-
+ kiocb = lookup_kiocb(ctx, iocb);
+ if (kiocb) {
+ ret = kiocb->ki_cancel(&kiocb->rw);
+ list_del_init(&kiocb->ki_list);
+ }
spin_unlock_irq(&ctx->ctx_lock);
if (!ret) {
@@ -1861,13 +2015,60 @@ SYSCALL_DEFINE5(io_getevents, aio_context_t, ctx_id,
struct timespec __user *, timeout)
{
struct timespec64 ts;
+ int ret;
+
+ if (timeout && unlikely(get_timespec64(&ts, timeout)))
+ return -EFAULT;
+
+ ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &ts : NULL);
+ if (!ret && signal_pending(current))
+ ret = -EINTR;
+ return ret;
+}
+
+SYSCALL_DEFINE6(io_pgetevents,
+ aio_context_t, ctx_id,
+ long, min_nr,
+ long, nr,
+ struct io_event __user *, events,
+ struct timespec __user *, timeout,
+ const struct __aio_sigset __user *, usig)
+{
+ struct __aio_sigset ksig = { NULL, };
+ sigset_t ksigmask, sigsaved;
+ struct timespec64 ts;
+ int ret;
+
+ if (timeout && unlikely(get_timespec64(&ts, timeout)))
+ return -EFAULT;
+
+ if (usig && copy_from_user(&ksig, usig, sizeof(ksig)))
+ return -EFAULT;
- if (timeout) {
- if (unlikely(get_timespec64(&ts, timeout)))
+ if (ksig.sigmask) {
+ if (ksig.sigsetsize != sizeof(sigset_t))
+ return -EINVAL;
+ if (copy_from_user(&ksigmask, ksig.sigmask, sizeof(ksigmask)))
return -EFAULT;
+ sigdelsetmask(&ksigmask, sigmask(SIGKILL) | sigmask(SIGSTOP));
+ sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
+ }
+
+ ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &ts : NULL);
+ if (signal_pending(current)) {
+ if (ksig.sigmask) {
+ current->saved_sigmask = sigsaved;
+ set_restore_sigmask();
+ }
+
+ if (!ret)
+ ret = -ERESTARTNOHAND;
+ } else {
+ if (ksig.sigmask)
+ sigprocmask(SIG_SETMASK, &sigsaved, NULL);
}
- return do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &ts : NULL);
+ return ret;
}
#ifdef CONFIG_COMPAT
@@ -1878,13 +2079,64 @@ COMPAT_SYSCALL_DEFINE5(io_getevents, compat_aio_context_t, ctx_id,
struct compat_timespec __user *, timeout)
{
struct timespec64 t;
+ int ret;
+
+ if (timeout && compat_get_timespec64(&t, timeout))
+ return -EFAULT;
+
+ ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &t : NULL);
+ if (!ret && signal_pending(current))
+ ret = -EINTR;
+ return ret;
+}
+
+
+struct __compat_aio_sigset {
+ compat_sigset_t __user *sigmask;
+ compat_size_t sigsetsize;
+};
+
+COMPAT_SYSCALL_DEFINE6(io_pgetevents,
+ compat_aio_context_t, ctx_id,
+ compat_long_t, min_nr,
+ compat_long_t, nr,
+ struct io_event __user *, events,
+ struct compat_timespec __user *, timeout,
+ const struct __compat_aio_sigset __user *, usig)
+{
+ struct __compat_aio_sigset ksig = { NULL, };
+ sigset_t ksigmask, sigsaved;
+ struct timespec64 t;
+ int ret;
+
+ if (timeout && compat_get_timespec64(&t, timeout))
+ return -EFAULT;
+
+ if (usig && copy_from_user(&ksig, usig, sizeof(ksig)))
+ return -EFAULT;
- if (timeout) {
- if (compat_get_timespec64(&t, timeout))
+ if (ksig.sigmask) {
+ if (ksig.sigsetsize != sizeof(compat_sigset_t))
+ return -EINVAL;
+ if (get_compat_sigset(&ksigmask, ksig.sigmask))
return -EFAULT;
+ sigdelsetmask(&ksigmask, sigmask(SIGKILL) | sigmask(SIGSTOP));
+ sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
+ }
+ ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &t : NULL);
+ if (signal_pending(current)) {
+ if (ksig.sigmask) {
+ current->saved_sigmask = sigsaved;
+ set_restore_sigmask();
+ }
+ if (!ret)
+ ret = -ERESTARTNOHAND;
+ } else {
+ if (ksig.sigmask)
+ sigprocmask(SIG_SETMASK, &sigsaved, NULL);
}
- return do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &t : NULL);
+ return ret;
}
#endif
diff --git a/fs/attr.c b/fs/attr.c
index 12ffdb6fb63c..d0b4d34878fb 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -18,6 +18,32 @@
#include <linux/evm.h>
#include <linux/ima.h>
+static bool chown_ok(const struct inode *inode, kuid_t uid)
+{
+ if (uid_eq(current_fsuid(), inode->i_uid) &&
+ uid_eq(uid, inode->i_uid))
+ return true;
+ if (capable_wrt_inode_uidgid(inode, CAP_CHOWN))
+ return true;
+ if (uid_eq(inode->i_uid, INVALID_UID) &&
+ ns_capable(inode->i_sb->s_user_ns, CAP_CHOWN))
+ return true;
+ return false;
+}
+
+static bool chgrp_ok(const struct inode *inode, kgid_t gid)
+{
+ if (uid_eq(current_fsuid(), inode->i_uid) &&
+ (in_group_p(gid) || gid_eq(gid, inode->i_gid)))
+ return true;
+ if (capable_wrt_inode_uidgid(inode, CAP_CHOWN))
+ return true;
+ if (gid_eq(inode->i_gid, INVALID_GID) &&
+ ns_capable(inode->i_sb->s_user_ns, CAP_CHOWN))
+ return true;
+ return false;
+}
+
/**
* setattr_prepare - check if attribute changes to a dentry are allowed
* @dentry: dentry to check
@@ -52,17 +78,11 @@ int setattr_prepare(struct dentry *dentry, struct iattr *attr)
goto kill_priv;
/* Make sure a caller can chown. */
- if ((ia_valid & ATTR_UID) &&
- (!uid_eq(current_fsuid(), inode->i_uid) ||
- !uid_eq(attr->ia_uid, inode->i_uid)) &&
- !capable_wrt_inode_uidgid(inode, CAP_CHOWN))
+ if ((ia_valid & ATTR_UID) && !chown_ok(inode, attr->ia_uid))
return -EPERM;
/* Make sure caller can chgrp. */
- if ((ia_valid & ATTR_GID) &&
- (!uid_eq(current_fsuid(), inode->i_uid) ||
- (!in_group_p(attr->ia_gid) && !gid_eq(attr->ia_gid, inode->i_gid))) &&
- !capable_wrt_inode_uidgid(inode, CAP_CHOWN))
+ if ((ia_valid & ATTR_GID) && !chgrp_ok(inode, attr->ia_gid))
return -EPERM;
/* Make sure a caller can chmod. */
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index af2832aaeec5..4700b4534439 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -198,23 +198,16 @@ befs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
if (ret == BEFS_BT_NOT_FOUND) {
befs_debug(sb, "<--- %s %pd not found", __func__, dentry);
- d_add(dentry, NULL);
- return ERR_PTR(-ENOENT);
-
+ inode = NULL;
} else if (ret != BEFS_OK || offset == 0) {
befs_error(sb, "<--- %s Error", __func__);
- return ERR_PTR(-ENODATA);
+ inode = ERR_PTR(-ENODATA);
+ } else {
+ inode = befs_iget(dir->i_sb, (ino_t) offset);
}
-
- inode = befs_iget(dir->i_sb, (ino_t) offset);
- if (IS_ERR(inode))
- return ERR_CAST(inode);
-
- d_add(dentry, inode);
-
befs_debug(sb, "<--- %s", __func__);
- return NULL;
+ return d_splice_alias(inode, dentry);
}
static int
diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c
index ee832ca5f734..f32f21c3bbc7 100644
--- a/fs/bfs/dir.c
+++ b/fs/bfs/dir.c
@@ -21,10 +21,9 @@
#define dprintf(x...)
#endif
-static int bfs_add_entry(struct inode *dir, const unsigned char *name,
- int namelen, int ino);
+static int bfs_add_entry(struct inode *dir, const struct qstr *child, int ino);
static struct buffer_head *bfs_find_entry(struct inode *dir,
- const unsigned char *name, int namelen,
+ const struct qstr *child,
struct bfs_dirent **res_dir);
static int bfs_readdir(struct file *f, struct dir_context *ctx)
@@ -111,8 +110,7 @@ static int bfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
mark_inode_dirty(inode);
bfs_dump_imap("create", s);
- err = bfs_add_entry(dir, dentry->d_name.name, dentry->d_name.len,
- inode->i_ino);
+ err = bfs_add_entry(dir, &dentry->d_name, inode->i_ino);
if (err) {
inode_dec_link_count(inode);
mutex_unlock(&info->bfs_lock);
@@ -136,19 +134,14 @@ static struct dentry *bfs_lookup(struct inode *dir, struct dentry *dentry,
return ERR_PTR(-ENAMETOOLONG);
mutex_lock(&info->bfs_lock);
- bh = bfs_find_entry(dir, dentry->d_name.name, dentry->d_name.len, &de);
+ bh = bfs_find_entry(dir, &dentry->d_name, &de);
if (bh) {
unsigned long ino = (unsigned long)le16_to_cpu(de->ino);
brelse(bh);
inode = bfs_iget(dir->i_sb, ino);
- if (IS_ERR(inode)) {
- mutex_unlock(&info->bfs_lock);
- return ERR_CAST(inode);
- }
}
mutex_unlock(&info->bfs_lock);
- d_add(dentry, inode);
- return NULL;
+ return d_splice_alias(inode, dentry);
}
static int bfs_link(struct dentry *old, struct inode *dir,
@@ -159,8 +152,7 @@ static int bfs_link(struct dentry *old, struct inode *dir,
int err;
mutex_lock(&info->bfs_lock);
- err = bfs_add_entry(dir, new->d_name.name, new->d_name.len,
- inode->i_ino);
+ err = bfs_add_entry(dir, &new->d_name, inode->i_ino);
if (err) {
mutex_unlock(&info->bfs_lock);
return err;
@@ -183,7 +175,7 @@ static int bfs_unlink(struct inode *dir, struct dentry *dentry)
struct bfs_sb_info *info = BFS_SB(inode->i_sb);
mutex_lock(&info->bfs_lock);
- bh = bfs_find_entry(dir, dentry->d_name.name, dentry->d_name.len, &de);
+ bh = bfs_find_entry(dir, &dentry->d_name, &de);
if (!bh || (le16_to_cpu(de->ino) != inode->i_ino))
goto out_brelse;
@@ -228,27 +220,21 @@ static int bfs_rename(struct inode *old_dir, struct dentry *old_dentry,
info = BFS_SB(old_inode->i_sb);
mutex_lock(&info->bfs_lock);
- old_bh = bfs_find_entry(old_dir,
- old_dentry->d_name.name,
- old_dentry->d_name.len, &old_de);
+ old_bh = bfs_find_entry(old_dir, &old_dentry->d_name, &old_de);
if (!old_bh || (le16_to_cpu(old_de->ino) != old_inode->i_ino))
goto end_rename;
error = -EPERM;
new_inode = d_inode(new_dentry);
- new_bh = bfs_find_entry(new_dir,
- new_dentry->d_name.name,
- new_dentry->d_name.len, &new_de);
+ new_bh = bfs_find_entry(new_dir, &new_dentry->d_name, &new_de);
if (new_bh && !new_inode) {
brelse(new_bh);
new_bh = NULL;
}
if (!new_bh) {
- error = bfs_add_entry(new_dir,
- new_dentry->d_name.name,
- new_dentry->d_name.len,
+ error = bfs_add_entry(new_dir, &new_dentry->d_name,
old_inode->i_ino);
if (error)
goto end_rename;
@@ -278,9 +264,10 @@ const struct inode_operations bfs_dir_inops = {
.rename = bfs_rename,
};
-static int bfs_add_entry(struct inode *dir, const unsigned char *name,
- int namelen, int ino)
+static int bfs_add_entry(struct inode *dir, const struct qstr *child, int ino)
{
+ const unsigned char *name = child->name;
+ int namelen = child->len;
struct buffer_head *bh;
struct bfs_dirent *de;
int block, sblock, eblock, off, pos;
@@ -332,12 +319,14 @@ static inline int bfs_namecmp(int len, const unsigned char *name,
}
static struct buffer_head *bfs_find_entry(struct inode *dir,
- const unsigned char *name, int namelen,
+ const struct qstr *child,
struct bfs_dirent **res_dir)
{
unsigned long block = 0, offset = 0;
struct buffer_head *bh = NULL;
struct bfs_dirent *de;
+ const unsigned char *name = child->name;
+ int namelen = child->len;
*res_dir = NULL;
if (namelen > BFS_NAMELEN)
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 7ec920e27065..bef6934b6189 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -272,7 +272,7 @@ struct blkdev_dio {
struct bio bio;
};
-static struct bio_set *blkdev_dio_pool __read_mostly;
+static struct bio_set blkdev_dio_pool;
static void blkdev_bio_end_io(struct bio *bio)
{
@@ -334,7 +334,7 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
(bdev_logical_block_size(bdev) - 1))
return -EINVAL;
- bio = bio_alloc_bioset(GFP_KERNEL, nr_pages, blkdev_dio_pool);
+ bio = bio_alloc_bioset(GFP_KERNEL, nr_pages, &blkdev_dio_pool);
bio_get(bio); /* extra ref for the completion handler */
dio = container_of(bio, struct blkdev_dio, bio);
@@ -432,10 +432,7 @@ blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
static __init int blkdev_init(void)
{
- blkdev_dio_pool = bioset_create(4, offsetof(struct blkdev_dio, bio), BIOSET_NEED_BVECS);
- if (!blkdev_dio_pool)
- return -ENOMEM;
- return 0;
+ return bioset_init(&blkdev_dio_pool, 4, offsetof(struct blkdev_dio, bio), BIOSET_NEED_BVECS);
}
module_init(blkdev_init);
@@ -1322,27 +1319,30 @@ static void flush_disk(struct block_device *bdev, bool kill_dirty)
* check_disk_size_change - checks for disk size change and adjusts bdev size.
* @disk: struct gendisk to check
* @bdev: struct bdev to adjust.
+ * @verbose: if %true log a message about a size change if there is any
*
* This routine checks to see if the bdev size does not match the disk size
* and adjusts it if it differs. When shrinking the bdev size, its all caches
* are freed.
*/
-void check_disk_size_change(struct gendisk *disk, struct block_device *bdev)
+void check_disk_size_change(struct gendisk *disk, struct block_device *bdev,
+ bool verbose)
{
loff_t disk_size, bdev_size;
disk_size = (loff_t)get_capacity(disk) << 9;
bdev_size = i_size_read(bdev->bd_inode);
if (disk_size != bdev_size) {
- printk(KERN_INFO
- "%s: detected capacity change from %lld to %lld\n",
- disk->disk_name, bdev_size, disk_size);
+ if (verbose) {
+ printk(KERN_INFO
+ "%s: detected capacity change from %lld to %lld\n",
+ disk->disk_name, bdev_size, disk_size);
+ }
i_size_write(bdev->bd_inode, disk_size);
if (bdev_size > disk_size)
flush_disk(bdev, false);
}
}
-EXPORT_SYMBOL(check_disk_size_change);
/**
* revalidate_disk - wrapper for lower-level driver's revalidate_disk call-back
@@ -1364,7 +1364,7 @@ int revalidate_disk(struct gendisk *disk)
return ret;
mutex_lock(&bdev->bd_mutex);
- check_disk_size_change(disk, bdev);
+ check_disk_size_change(disk, bdev, ret == 0);
bdev->bd_invalidated = 0;
mutex_unlock(&bdev->bd_mutex);
bdput(bdev);
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index 234bae55b85d..7e075343daa5 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -19,17 +19,17 @@
* ordered operations list so that we make sure to flush out any
* new data the application may have written before commit.
*/
-#define BTRFS_INODE_ORDERED_DATA_CLOSE 0
-#define BTRFS_INODE_ORPHAN_META_RESERVED 1
-#define BTRFS_INODE_DUMMY 2
-#define BTRFS_INODE_IN_DEFRAG 3
-#define BTRFS_INODE_HAS_ORPHAN_ITEM 4
-#define BTRFS_INODE_HAS_ASYNC_EXTENT 5
-#define BTRFS_INODE_NEEDS_FULL_SYNC 6
-#define BTRFS_INODE_COPY_EVERYTHING 7
-#define BTRFS_INODE_IN_DELALLOC_LIST 8
-#define BTRFS_INODE_READDIO_NEED_LOCK 9
-#define BTRFS_INODE_HAS_PROPS 10
+enum {
+ BTRFS_INODE_ORDERED_DATA_CLOSE = 0,
+ BTRFS_INODE_DUMMY,
+ BTRFS_INODE_IN_DEFRAG,
+ BTRFS_INODE_HAS_ASYNC_EXTENT,
+ BTRFS_INODE_NEEDS_FULL_SYNC,
+ BTRFS_INODE_COPY_EVERYTHING,
+ BTRFS_INODE_IN_DELALLOC_LIST,
+ BTRFS_INODE_READDIO_NEED_LOCK,
+ BTRFS_INODE_HAS_PROPS,
+};
/* in memory btrfs inode */
struct btrfs_inode {
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 1061575a7d25..d3e447b45bf7 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -990,12 +990,7 @@ static void __free_workspace(int type, struct list_head *workspace,
btrfs_compress_op[idx]->free_workspace(workspace);
atomic_dec(total_ws);
wake:
- /*
- * Make sure counter is updated before we wake up waiters.
- */
- smp_mb();
- if (waitqueue_active(ws_wait))
- wake_up(ws_wait);
+ cond_wake_up(ws_wait);
}
static void free_workspace(int type, struct list_head *ws)
diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h
index cc605f7b23fb..ddda9b80bf20 100644
--- a/fs/btrfs/compression.h
+++ b/fs/btrfs/compression.h
@@ -6,6 +6,8 @@
#ifndef BTRFS_COMPRESSION_H
#define BTRFS_COMPRESSION_H
+#include <linux/sizes.h>
+
/*
* We want to make sure that amount of RAM required to uncompress an extent is
* reasonable, so we limit the total size in ram of a compressed extent to
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 3fd44835b386..4bc326df472e 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -2330,7 +2330,7 @@ static noinline void unlock_up(struct btrfs_path *path, int level,
no_skips = 1;
t = path->nodes[i];
- if (i >= lowest_unlock && i > skip_level && path->locks[i]) {
+ if (i >= lowest_unlock && i > skip_level) {
btrfs_tree_unlock_rw(t, path->locks[i]);
path->locks[i] = 0;
if (write_lock_level &&
@@ -2432,14 +2432,11 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p,
btrfs_unlock_up_safe(p, level + 1);
btrfs_set_path_blocking(p);
- free_extent_buffer(tmp);
if (p->reada != READA_NONE)
reada_for_search(fs_info, p, level, slot, key->objectid);
- btrfs_release_path(p);
-
ret = -EAGAIN;
- tmp = read_tree_block(fs_info, blocknr, 0, parent_level - 1,
+ tmp = read_tree_block(fs_info, blocknr, gen, parent_level - 1,
&first_key);
if (!IS_ERR(tmp)) {
/*
@@ -2448,12 +2445,14 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p,
* and give up so that our caller doesn't loop forever
* on our EAGAINs.
*/
- if (!btrfs_buffer_uptodate(tmp, 0, 0))
+ if (!extent_buffer_uptodate(tmp))
ret = -EIO;
free_extent_buffer(tmp);
} else {
ret = PTR_ERR(tmp);
}
+
+ btrfs_release_path(p);
return ret;
}
@@ -2599,6 +2598,78 @@ int btrfs_find_item(struct btrfs_root *fs_root, struct btrfs_path *path,
return 0;
}
+static struct extent_buffer *btrfs_search_slot_get_root(struct btrfs_root *root,
+ struct btrfs_path *p,
+ int write_lock_level)
+{
+ struct btrfs_fs_info *fs_info = root->fs_info;
+ struct extent_buffer *b;
+ int root_lock;
+ int level = 0;
+
+ /* We try very hard to do read locks on the root */
+ root_lock = BTRFS_READ_LOCK;
+
+ if (p->search_commit_root) {
+ /* The commit roots are read only so we always do read locks */
+ if (p->need_commit_sem)
+ down_read(&fs_info->commit_root_sem);
+ b = root->commit_root;
+ extent_buffer_get(b);
+ level = btrfs_header_level(b);
+ if (p->need_commit_sem)
+ up_read(&fs_info->commit_root_sem);
+ /*
+ * Ensure that all callers have set skip_locking when
+ * p->search_commit_root = 1.
+ */
+ ASSERT(p->skip_locking == 1);
+
+ goto out;
+ }
+
+ if (p->skip_locking) {
+ b = btrfs_root_node(root);
+ level = btrfs_header_level(b);
+ goto out;
+ }
+
+ /*
+ * If the level is set to maximum, we can skip trying to get the read
+ * lock.
+ */
+ if (write_lock_level < BTRFS_MAX_LEVEL) {
+ /*
+ * We don't know the level of the root node until we actually
+ * have it read locked
+ */
+ b = btrfs_read_lock_root_node(root);
+ level = btrfs_header_level(b);
+ if (level > write_lock_level)
+ goto out;
+
+ /* Whoops, must trade for write lock */
+ btrfs_tree_read_unlock(b);
+ free_extent_buffer(b);
+ }
+
+ b = btrfs_lock_root_node(root);
+ root_lock = BTRFS_WRITE_LOCK;
+
+ /* The level might have changed, check again */
+ level = btrfs_header_level(b);
+
+out:
+ p->nodes[level] = b;
+ if (!p->skip_locking)
+ p->locks[level] = root_lock;
+ /*
+ * Callers are responsible for dropping b's references.
+ */
+ return b;
+}
+
+
/*
* btrfs_search_slot - look for a key in a tree and perform necessary
* modifications to preserve tree invariants.
@@ -2635,7 +2706,6 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root,
int err;
int level;
int lowest_unlock = 1;
- int root_lock;
/* everything at write_lock_level or lower must be write locked */
int write_lock_level = 0;
u8 lowest_level = 0;
@@ -2673,50 +2743,7 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root,
again:
prev_cmp = -1;
- /*
- * we try very hard to do read locks on the root
- */
- root_lock = BTRFS_READ_LOCK;
- level = 0;
- if (p->search_commit_root) {
- /*
- * the commit roots are read only
- * so we always do read locks
- */
- if (p->need_commit_sem)
- down_read(&fs_info->commit_root_sem);
- b = root->commit_root;
- extent_buffer_get(b);
- level = btrfs_header_level(b);
- if (p->need_commit_sem)
- up_read(&fs_info->commit_root_sem);
- if (!p->skip_locking)
- btrfs_tree_read_lock(b);
- } else {
- if (p->skip_locking) {
- b = btrfs_root_node(root);
- level = btrfs_header_level(b);
- } else {
- /* we don't know the level of the root node
- * until we actually have it read locked
- */
- b = btrfs_read_lock_root_node(root);
- level = btrfs_header_level(b);
- if (level <= write_lock_level) {
- /* whoops, must trade for write lock */
- btrfs_tree_read_unlock(b);
- free_extent_buffer(b);
- b = btrfs_lock_root_node(root);
- root_lock = BTRFS_WRITE_LOCK;
-
- /* the level might have changed, check again */
- level = btrfs_header_level(b);
- }
- }
- }
- p->nodes[level] = b;
- if (!p->skip_locking)
- p->locks[level] = root_lock;
+ b = btrfs_search_slot_get_root(root, p, write_lock_level);
while (b) {
level = btrfs_header_level(b);
@@ -5414,12 +5441,24 @@ int btrfs_compare_trees(struct btrfs_root *left_root,
down_read(&fs_info->commit_root_sem);
left_level = btrfs_header_level(left_root->commit_root);
left_root_level = left_level;
- left_path->nodes[left_level] = left_root->commit_root;
+ left_path->nodes[left_level] =
+ btrfs_clone_extent_buffer(left_root->commit_root);
+ if (!left_path->nodes[left_level]) {
+ up_read(&fs_info->commit_root_sem);
+ ret = -ENOMEM;
+ goto out;
+ }
extent_buffer_get(left_path->nodes[left_level]);
right_level = btrfs_header_level(right_root->commit_root);
right_root_level = right_level;
- right_path->nodes[right_level] = right_root->commit_root;
+ right_path->nodes[right_level] =
+ btrfs_clone_extent_buffer(right_root->commit_root);
+ if (!right_path->nodes[right_level]) {
+ up_read(&fs_info->commit_root_sem);
+ ret = -ENOMEM;
+ goto out;
+ }
extent_buffer_get(right_path->nodes[right_level]);
up_read(&fs_info->commit_root_sem);
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 2771cc56a622..f4bf7874c24a 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -739,6 +739,12 @@ struct btrfs_delayed_root;
*/
#define BTRFS_FS_NEED_ASYNC_COMMIT 17
+/*
+ * Indicate that balance has been set up from the ioctl and is in the main
+ * phase. The fs_info::balance_ctl is initialized.
+ */
+#define BTRFS_FS_BALANCE_RUNNING 18
+
struct btrfs_fs_info {
u8 fsid[BTRFS_FSID_SIZE];
u8 chunk_tree_uuid[BTRFS_UUID_SIZE];
@@ -838,7 +844,6 @@ struct btrfs_fs_info {
struct mutex transaction_kthread_mutex;
struct mutex cleaner_mutex;
struct mutex chunk_mutex;
- struct mutex volume_mutex;
/*
* this is taken to make sure we don't set block groups ro after
@@ -1004,7 +1009,6 @@ struct btrfs_fs_info {
/* restriper state */
spinlock_t balance_lock;
struct mutex balance_mutex;
- atomic_t balance_running;
atomic_t balance_pause_req;
atomic_t balance_cancel_req;
struct btrfs_balance_control *balance_ctl;
@@ -1219,9 +1223,6 @@ struct btrfs_root {
spinlock_t log_extents_lock[2];
struct list_head logged_list[2];
- spinlock_t orphan_lock;
- atomic_t orphan_inodes;
- struct btrfs_block_rsv *orphan_block_rsv;
int orphan_cleanup_state;
spinlock_t inode_lock;
@@ -2764,13 +2765,9 @@ void btrfs_delalloc_release_space(struct inode *inode,
void btrfs_free_reserved_data_space_noquota(struct inode *inode, u64 start,
u64 len);
void btrfs_trans_release_chunk_metadata(struct btrfs_trans_handle *trans);
-int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans,
- struct btrfs_inode *inode);
-void btrfs_orphan_release_metadata(struct btrfs_inode *inode);
int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
struct btrfs_block_rsv *rsv,
- int nitems,
- u64 *qgroup_reserved, bool use_global_rsv);
+ int nitems, bool use_global_rsv);
void btrfs_subvolume_release_metadata(struct btrfs_fs_info *fs_info,
struct btrfs_block_rsv *rsv);
void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes,
@@ -2828,7 +2825,7 @@ void btrfs_wait_for_snapshot_creation(struct btrfs_root *root);
void check_system_chunk(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, const u64 type);
u64 add_new_free_space(struct btrfs_block_group_cache *block_group,
- struct btrfs_fs_info *info, u64 start, u64 end);
+ u64 start, u64 end);
/* ctree.c */
int btrfs_bin_search(struct extent_buffer *eb, const struct btrfs_key *key,
@@ -3042,11 +3039,9 @@ void btrfs_update_root_times(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
/* uuid-tree.c */
-int btrfs_uuid_tree_add(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info, u8 *uuid, u8 type,
+int btrfs_uuid_tree_add(struct btrfs_trans_handle *trans, u8 *uuid, u8 type,
u64 subid);
-int btrfs_uuid_tree_rem(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info, u8 *uuid, u8 type,
+int btrfs_uuid_tree_remove(struct btrfs_trans_handle *trans, u8 *uuid, u8 type,
u64 subid);
int btrfs_uuid_tree_iterate(struct btrfs_fs_info *fs_info,
int (*check_func)(struct btrfs_fs_info *, u8 *, u8,
@@ -3163,18 +3158,6 @@ void btrfs_extent_item_to_extent_map(struct btrfs_inode *inode,
struct extent_map *em);
/* inode.c */
-struct btrfs_delalloc_work {
- struct inode *inode;
- int delay_iput;
- struct completion completion;
- struct list_head list;
- struct btrfs_work work;
-};
-
-struct btrfs_delalloc_work *btrfs_alloc_delalloc_work(struct inode *inode,
- int delay_iput);
-void btrfs_wait_and_free_delalloc_work(struct btrfs_delalloc_work *work);
-
struct extent_map *btrfs_get_extent_fiemap(struct btrfs_inode *inode,
struct page *page, size_t pg_offset, u64 start,
u64 len, int create);
@@ -3182,6 +3165,8 @@ noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,
u64 *orig_start, u64 *orig_block_len,
u64 *ram_bytes);
+void __btrfs_del_delalloc_inode(struct btrfs_root *root,
+ struct btrfs_inode *inode);
struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry);
int btrfs_set_inode_index(struct btrfs_inode *dir, u64 *index);
int btrfs_unlink_inode(struct btrfs_trans_handle *trans,
@@ -3191,10 +3176,7 @@ int btrfs_unlink_inode(struct btrfs_trans_handle *trans,
int btrfs_add_link(struct btrfs_trans_handle *trans,
struct btrfs_inode *parent_inode, struct btrfs_inode *inode,
const char *name, int name_len, int add_backref, u64 index);
-int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct inode *dir, u64 objectid,
- const char *name, int name_len);
+int btrfs_delete_subvolume(struct inode *dir, struct dentry *dentry);
int btrfs_truncate_block(struct inode *inode, loff_t from, loff_t len,
int front);
int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
@@ -3202,9 +3184,8 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
struct inode *inode, u64 new_size,
u32 min_type);
-int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput);
-int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, int delay_iput,
- int nr);
+int btrfs_start_delalloc_inodes(struct btrfs_root *root);
+int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, int nr);
int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end,
unsigned int extra_bits,
struct extent_state **cached_state, int dedupe);
@@ -3238,10 +3219,7 @@ int btrfs_update_inode_fallback(struct btrfs_trans_handle *trans,
int btrfs_orphan_add(struct btrfs_trans_handle *trans,
struct btrfs_inode *inode);
int btrfs_orphan_cleanup(struct btrfs_root *root);
-void btrfs_orphan_commit_root(struct btrfs_trans_handle *trans,
- struct btrfs_root *root);
int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size);
-void btrfs_invalidate_inodes(struct btrfs_root *root);
void btrfs_add_delayed_iput(struct inode *inode);
void btrfs_run_delayed_iputs(struct btrfs_fs_info *fs_info);
int btrfs_prealloc_file_range(struct inode *inode, int mode,
@@ -3260,14 +3238,14 @@ void btrfs_test_inode_set_ops(struct inode *inode);
long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
long btrfs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
int btrfs_ioctl_get_supported_features(void __user *arg);
-void btrfs_update_iflags(struct inode *inode);
+void btrfs_sync_inode_flags_to_i_flags(struct inode *inode);
int btrfs_is_empty_uuid(u8 *uuid);
int btrfs_defrag_file(struct inode *inode, struct file *file,
struct btrfs_ioctl_defrag_range_args *range,
u64 newer_than, unsigned long max_pages);
void btrfs_get_block_group_info(struct list_head *groups_list,
struct btrfs_ioctl_space_info *space);
-void update_ioctl_balance_args(struct btrfs_fs_info *fs_info, int lock,
+void btrfs_update_ioctl_balance_args(struct btrfs_fs_info *fs_info,
struct btrfs_ioctl_balance_args *bargs);
ssize_t btrfs_dedupe_file_range(struct file *src_file, u64 loff, u64 olen,
struct file *dst_file, u64 dst_loff);
@@ -3765,4 +3743,26 @@ static inline int btrfs_is_testing(struct btrfs_fs_info *fs_info)
return 0;
}
+static inline void cond_wake_up(struct wait_queue_head *wq)
+{
+ /*
+ * This implies a full smp_mb barrier, see comments for
+ * waitqueue_active why.
+ */
+ if (wq_has_sleeper(wq))
+ wake_up(wq);
+}
+
+static inline void cond_wake_up_nomb(struct wait_queue_head *wq)
+{
+ /*
+ * Special case for conditional wakeup where the barrier required for
+ * waitqueue_active is implied by some of the preceding code. Eg. one
+ * of such atomic operations (atomic_dec_and_return, ...), or a
+ * unlock/lock sequence, etc.
+ */
+ if (waitqueue_active(wq))
+ wake_up(wq);
+}
+
#endif
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index a8d492dbd3e7..fe6caa7e698b 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -460,13 +460,10 @@ static void finish_one_item(struct btrfs_delayed_root *delayed_root)
{
int seq = atomic_inc_return(&delayed_root->items_seq);
- /*
- * atomic_dec_return implies a barrier for waitqueue_active
- */
+ /* atomic_dec_return implies a barrier */
if ((atomic_dec_return(&delayed_root->items) <
- BTRFS_DELAYED_BACKGROUND || seq % BTRFS_DELAYED_BATCH == 0) &&
- waitqueue_active(&delayed_root->wait))
- wake_up(&delayed_root->wait);
+ BTRFS_DELAYED_BACKGROUND || seq % BTRFS_DELAYED_BATCH == 0))
+ cond_wake_up_nomb(&delayed_root->wait);
}
static void __btrfs_remove_delayed_item(struct btrfs_delayed_item *delayed_item)
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
index e1b0651686f7..03dec673d12a 100644
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -286,10 +286,10 @@ static bool merge_ref(struct btrfs_trans_handle *trans,
}
void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
struct btrfs_delayed_ref_root *delayed_refs,
struct btrfs_delayed_ref_head *head)
{
+ struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_delayed_ref_node *ref;
struct rb_node *node;
u64 seq = 0;
@@ -323,9 +323,7 @@ again:
}
}
-int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info,
- struct btrfs_delayed_ref_root *delayed_refs,
- u64 seq)
+int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info, u64 seq)
{
struct seq_list *elem;
int ret = 0;
@@ -336,10 +334,9 @@ int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info,
struct seq_list, list);
if (seq >= elem->seq) {
btrfs_debug(fs_info,
- "holding back delayed_ref %#x.%x, lowest is %#x.%x (%p)",
+ "holding back delayed_ref %#x.%x, lowest is %#x.%x",
(u32)(seq >> 32), (u32)seq,
- (u32)(elem->seq >> 32), (u32)elem->seq,
- delayed_refs);
+ (u32)(elem->seq >> 32), (u32)elem->seq);
ret = 1;
}
}
@@ -529,33 +526,20 @@ update_existing_head_ref(struct btrfs_delayed_ref_root *delayed_refs,
spin_unlock(&existing->lock);
}
-/*
- * helper function to actually insert a head node into the rbtree.
- * this does all the dirty work in terms of maintaining the correct
- * overall modification count.
- */
-static noinline struct btrfs_delayed_ref_head *
-add_delayed_ref_head(struct btrfs_fs_info *fs_info,
- struct btrfs_trans_handle *trans,
- struct btrfs_delayed_ref_head *head_ref,
- struct btrfs_qgroup_extent_record *qrecord,
- u64 bytenr, u64 num_bytes, u64 ref_root, u64 reserved,
- int action, int is_data, int is_system,
- int *qrecord_inserted_ret,
- int *old_ref_mod, int *new_ref_mod)
-
+static void init_delayed_ref_head(struct btrfs_delayed_ref_head *head_ref,
+ struct btrfs_qgroup_extent_record *qrecord,
+ u64 bytenr, u64 num_bytes, u64 ref_root,
+ u64 reserved, int action, bool is_data,
+ bool is_system)
{
- struct btrfs_delayed_ref_head *existing;
- struct btrfs_delayed_ref_root *delayed_refs;
int count_mod = 1;
int must_insert_reserved = 0;
- int qrecord_inserted = 0;
/* If reserved is provided, it must be a data extent. */
BUG_ON(!is_data && reserved);
/*
- * the head node stores the sum of all the mods, so dropping a ref
+ * The head node stores the sum of all the mods, so dropping a ref
* should drop the sum in the head node by one.
*/
if (action == BTRFS_UPDATE_DELAYED_HEAD)
@@ -564,12 +548,11 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info,
count_mod = -1;
/*
- * BTRFS_ADD_DELAYED_EXTENT means that we need to update
- * the reserved accounting when the extent is finally added, or
- * if a later modification deletes the delayed ref without ever
- * inserting the extent into the extent allocation tree.
- * ref->must_insert_reserved is the flag used to record
- * that accounting mods are required.
+ * BTRFS_ADD_DELAYED_EXTENT means that we need to update the reserved
+ * accounting when the extent is finally added, or if a later
+ * modification deletes the delayed ref without ever inserting the
+ * extent into the extent allocation tree. ref->must_insert_reserved
+ * is the flag used to record that accounting mods are required.
*
* Once we record must_insert_reserved, switch the action to
* BTRFS_ADD_DELAYED_REF because other special casing is not required.
@@ -579,8 +562,6 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info,
else
must_insert_reserved = 0;
- delayed_refs = &trans->transaction->delayed_refs;
-
refcount_set(&head_ref->refs, 1);
head_ref->bytenr = bytenr;
head_ref->num_bytes = num_bytes;
@@ -598,7 +579,6 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info,
spin_lock_init(&head_ref->lock);
mutex_init(&head_ref->mutex);
- /* Record qgroup extent info if provided */
if (qrecord) {
if (ref_root && reserved) {
head_ref->qgroup_ref_root = ref_root;
@@ -608,20 +588,44 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info,
qrecord->bytenr = bytenr;
qrecord->num_bytes = num_bytes;
qrecord->old_roots = NULL;
+ }
+}
+
+/*
+ * helper function to actually insert a head node into the rbtree.
+ * this does all the dirty work in terms of maintaining the correct
+ * overall modification count.
+ */
+static noinline struct btrfs_delayed_ref_head *
+add_delayed_ref_head(struct btrfs_trans_handle *trans,
+ struct btrfs_delayed_ref_head *head_ref,
+ struct btrfs_qgroup_extent_record *qrecord,
+ int action, int *qrecord_inserted_ret,
+ int *old_ref_mod, int *new_ref_mod)
+{
+ struct btrfs_delayed_ref_head *existing;
+ struct btrfs_delayed_ref_root *delayed_refs;
+ int qrecord_inserted = 0;
- if(btrfs_qgroup_trace_extent_nolock(fs_info,
+ delayed_refs = &trans->transaction->delayed_refs;
+
+ /* Record qgroup extent info if provided */
+ if (qrecord) {
+ if (btrfs_qgroup_trace_extent_nolock(trans->fs_info,
delayed_refs, qrecord))
kfree(qrecord);
else
qrecord_inserted = 1;
}
- trace_add_delayed_ref_head(fs_info, head_ref, action);
+ trace_add_delayed_ref_head(trans->fs_info, head_ref, action);
existing = htree_insert(&delayed_refs->href_root,
&head_ref->href_node);
if (existing) {
- WARN_ON(ref_root && reserved && existing->qgroup_ref_root
+ WARN_ON(qrecord && head_ref->qgroup_ref_root
+ && head_ref->qgroup_reserved
+ && existing->qgroup_ref_root
&& existing->qgroup_reserved);
update_existing_head_ref(delayed_refs, existing, head_ref,
old_ref_mod);
@@ -634,8 +638,8 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info,
} else {
if (old_ref_mod)
*old_ref_mod = 0;
- if (is_data && count_mod < 0)
- delayed_refs->pending_csums += num_bytes;
+ if (head_ref->is_data && head_ref->ref_mod < 0)
+ delayed_refs->pending_csums += head_ref->num_bytes;
delayed_refs->num_heads++;
delayed_refs->num_heads_ready++;
atomic_inc(&delayed_refs->num_entries);
@@ -645,90 +649,48 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info,
*qrecord_inserted_ret = qrecord_inserted;
if (new_ref_mod)
*new_ref_mod = head_ref->total_ref_mod;
- return head_ref;
-}
-
-/*
- * helper to insert a delayed tree ref into the rbtree.
- */
-static noinline void
-add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
- struct btrfs_trans_handle *trans,
- struct btrfs_delayed_ref_head *head_ref,
- struct btrfs_delayed_ref_node *ref, u64 bytenr,
- u64 num_bytes, u64 parent, u64 ref_root, int level,
- int action)
-{
- struct btrfs_delayed_tree_ref *full_ref;
- struct btrfs_delayed_ref_root *delayed_refs;
- u64 seq = 0;
- int ret;
-
- if (action == BTRFS_ADD_DELAYED_EXTENT)
- action = BTRFS_ADD_DELAYED_REF;
- if (is_fstree(ref_root))
- seq = atomic64_read(&fs_info->tree_mod_seq);
- delayed_refs = &trans->transaction->delayed_refs;
-
- /* first set the basic ref node struct up */
- refcount_set(&ref->refs, 1);
- ref->bytenr = bytenr;
- ref->num_bytes = num_bytes;
- ref->ref_mod = 1;
- ref->action = action;
- ref->is_head = 0;
- ref->in_tree = 1;
- ref->seq = seq;
- RB_CLEAR_NODE(&ref->ref_node);
- INIT_LIST_HEAD(&ref->add_list);
-
- full_ref = btrfs_delayed_node_to_tree_ref(ref);
- full_ref->parent = parent;
- full_ref->root = ref_root;
- if (parent)
- ref->type = BTRFS_SHARED_BLOCK_REF_KEY;
- else
- ref->type = BTRFS_TREE_BLOCK_REF_KEY;
- full_ref->level = level;
-
- trace_add_delayed_tree_ref(fs_info, ref, full_ref, action);
-
- ret = insert_delayed_ref(trans, delayed_refs, head_ref, ref);
-
- /*
- * XXX: memory should be freed at the same level allocated.
- * But bad practice is anywhere... Follow it now. Need cleanup.
- */
- if (ret > 0)
- kmem_cache_free(btrfs_delayed_tree_ref_cachep, full_ref);
+ return head_ref;
}
/*
- * helper to insert a delayed data ref into the rbtree.
+ * init_delayed_ref_common - Initialize the structure which represents a
+ * modification to a an extent.
+ *
+ * @fs_info: Internal to the mounted filesystem mount structure.
+ *
+ * @ref: The structure which is going to be initialized.
+ *
+ * @bytenr: The logical address of the extent for which a modification is
+ * going to be recorded.
+ *
+ * @num_bytes: Size of the extent whose modification is being recorded.
+ *
+ * @ref_root: The id of the root where this modification has originated, this
+ * can be either one of the well-known metadata trees or the
+ * subvolume id which references this extent.
+ *
+ * @action: Can be one of BTRFS_ADD_DELAYED_REF/BTRFS_DROP_DELAYED_REF or
+ * BTRFS_ADD_DELAYED_EXTENT
+ *
+ * @ref_type: Holds the type of the extent which is being recorded, can be
+ * one of BTRFS_SHARED_BLOCK_REF_KEY/BTRFS_TREE_BLOCK_REF_KEY
+ * when recording a metadata extent or BTRFS_SHARED_DATA_REF_KEY/
+ * BTRFS_EXTENT_DATA_REF_KEY when recording data extent
*/
-static noinline void
-add_delayed_data_ref(struct btrfs_fs_info *fs_info,
- struct btrfs_trans_handle *trans,
- struct btrfs_delayed_ref_head *head_ref,
- struct btrfs_delayed_ref_node *ref, u64 bytenr,
- u64 num_bytes, u64 parent, u64 ref_root, u64 owner,
- u64 offset, int action)
+static void init_delayed_ref_common(struct btrfs_fs_info *fs_info,
+ struct btrfs_delayed_ref_node *ref,
+ u64 bytenr, u64 num_bytes, u64 ref_root,
+ int action, u8 ref_type)
{
- struct btrfs_delayed_data_ref *full_ref;
- struct btrfs_delayed_ref_root *delayed_refs;
u64 seq = 0;
- int ret;
if (action == BTRFS_ADD_DELAYED_EXTENT)
action = BTRFS_ADD_DELAYED_REF;
- delayed_refs = &trans->transaction->delayed_refs;
-
if (is_fstree(ref_root))
seq = atomic64_read(&fs_info->tree_mod_seq);
- /* first set the basic ref node struct up */
refcount_set(&ref->refs, 1);
ref->bytenr = bytenr;
ref->num_bytes = num_bytes;
@@ -737,25 +699,9 @@ add_delayed_data_ref(struct btrfs_fs_info *fs_info,
ref->is_head = 0;
ref->in_tree = 1;
ref->seq = seq;
+ ref->type = ref_type;
RB_CLEAR_NODE(&ref->ref_node);
INIT_LIST_HEAD(&ref->add_list);
-
- full_ref = btrfs_delayed_node_to_data_ref(ref);
- full_ref->parent = parent;
- full_ref->root = ref_root;
- if (parent)
- ref->type = BTRFS_SHARED_DATA_REF_KEY;
- else
- ref->type = BTRFS_EXTENT_DATA_REF_KEY;
-
- full_ref->objectid = owner;
- full_ref->offset = offset;
-
- trace_add_delayed_data_ref(fs_info, ref, full_ref, action);
-
- ret = insert_delayed_ref(trans, delayed_refs, head_ref, ref);
- if (ret > 0)
- kmem_cache_free(btrfs_delayed_data_ref_cachep, full_ref);
}
/*
@@ -775,13 +721,25 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
struct btrfs_delayed_ref_root *delayed_refs;
struct btrfs_qgroup_extent_record *record = NULL;
int qrecord_inserted;
- int is_system = (ref_root == BTRFS_CHUNK_TREE_OBJECTID);
+ bool is_system = (ref_root == BTRFS_CHUNK_TREE_OBJECTID);
+ int ret;
+ u8 ref_type;
BUG_ON(extent_op && extent_op->is_data);
ref = kmem_cache_alloc(btrfs_delayed_tree_ref_cachep, GFP_NOFS);
if (!ref)
return -ENOMEM;
+ if (parent)
+ ref_type = BTRFS_SHARED_BLOCK_REF_KEY;
+ else
+ ref_type = BTRFS_TREE_BLOCK_REF_KEY;
+ init_delayed_ref_common(fs_info, &ref->node, bytenr, num_bytes,
+ ref_root, action, ref_type);
+ ref->root = ref_root;
+ ref->parent = parent;
+ ref->level = level;
+
head_ref = kmem_cache_alloc(btrfs_delayed_ref_head_cachep, GFP_NOFS);
if (!head_ref)
goto free_ref;
@@ -793,6 +751,8 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
goto free_head_ref;
}
+ init_delayed_ref_head(head_ref, record, bytenr, num_bytes,
+ ref_root, 0, action, false, is_system);
head_ref->extent_op = extent_op;
delayed_refs = &trans->transaction->delayed_refs;
@@ -802,15 +762,19 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
* insert both the head node and the new ref without dropping
* the spin lock
*/
- head_ref = add_delayed_ref_head(fs_info, trans, head_ref, record,
- bytenr, num_bytes, 0, 0, action, 0,
- is_system, &qrecord_inserted,
+ head_ref = add_delayed_ref_head(trans, head_ref, record,
+ action, &qrecord_inserted,
old_ref_mod, new_ref_mod);
- add_delayed_tree_ref(fs_info, trans, head_ref, &ref->node, bytenr,
- num_bytes, parent, ref_root, level, action);
+ ret = insert_delayed_ref(trans, delayed_refs, head_ref, &ref->node);
spin_unlock(&delayed_refs->lock);
+ trace_add_delayed_tree_ref(fs_info, &ref->node, ref,
+ action == BTRFS_ADD_DELAYED_EXTENT ?
+ BTRFS_ADD_DELAYED_REF : action);
+ if (ret > 0)
+ kmem_cache_free(btrfs_delayed_tree_ref_cachep, ref);
+
if (qrecord_inserted)
btrfs_qgroup_trace_extent_post(fs_info, record);
@@ -839,11 +803,25 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
struct btrfs_delayed_ref_root *delayed_refs;
struct btrfs_qgroup_extent_record *record = NULL;
int qrecord_inserted;
+ int ret;
+ u8 ref_type;
ref = kmem_cache_alloc(btrfs_delayed_data_ref_cachep, GFP_NOFS);
if (!ref)
return -ENOMEM;
+ if (parent)
+ ref_type = BTRFS_SHARED_DATA_REF_KEY;
+ else
+ ref_type = BTRFS_EXTENT_DATA_REF_KEY;
+ init_delayed_ref_common(fs_info, &ref->node, bytenr, num_bytes,
+ ref_root, action, ref_type);
+ ref->root = ref_root;
+ ref->parent = parent;
+ ref->objectid = owner;
+ ref->offset = offset;
+
+
head_ref = kmem_cache_alloc(btrfs_delayed_ref_head_cachep, GFP_NOFS);
if (!head_ref) {
kmem_cache_free(btrfs_delayed_data_ref_cachep, ref);
@@ -861,6 +839,8 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
}
}
+ init_delayed_ref_head(head_ref, record, bytenr, num_bytes, ref_root,
+ reserved, action, true, false);
head_ref->extent_op = NULL;
delayed_refs = &trans->transaction->delayed_refs;
@@ -870,16 +850,20 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
* insert both the head node and the new ref without dropping
* the spin lock
*/
- head_ref = add_delayed_ref_head(fs_info, trans, head_ref, record,
- bytenr, num_bytes, ref_root, reserved,
- action, 1, 0, &qrecord_inserted,
+ head_ref = add_delayed_ref_head(trans, head_ref, record,
+ action, &qrecord_inserted,
old_ref_mod, new_ref_mod);
- add_delayed_data_ref(fs_info, trans, head_ref, &ref->node, bytenr,
- num_bytes, parent, ref_root, owner, offset,
- action);
+ ret = insert_delayed_ref(trans, delayed_refs, head_ref, &ref->node);
spin_unlock(&delayed_refs->lock);
+ trace_add_delayed_data_ref(trans->fs_info, &ref->node, ref,
+ action == BTRFS_ADD_DELAYED_EXTENT ?
+ BTRFS_ADD_DELAYED_REF : action);
+ if (ret > 0)
+ kmem_cache_free(btrfs_delayed_data_ref_cachep, ref);
+
+
if (qrecord_inserted)
return btrfs_qgroup_trace_extent_post(fs_info, record);
return 0;
@@ -897,19 +881,16 @@ int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
if (!head_ref)
return -ENOMEM;
+ init_delayed_ref_head(head_ref, NULL, bytenr, num_bytes, 0, 0,
+ BTRFS_UPDATE_DELAYED_HEAD, extent_op->is_data,
+ false);
head_ref->extent_op = extent_op;
delayed_refs = &trans->transaction->delayed_refs;
spin_lock(&delayed_refs->lock);
- /*
- * extent_ops just modify the flags of an extent and they don't result
- * in ref count changes, hence it's safe to pass false/0 for is_system
- * argument
- */
- add_delayed_ref_head(fs_info, trans, head_ref, NULL, bytenr,
- num_bytes, 0, 0, BTRFS_UPDATE_DELAYED_HEAD,
- extent_op->is_data, 0, NULL, NULL, NULL);
+ add_delayed_ref_head(trans, head_ref, NULL, BTRFS_UPDATE_DELAYED_HEAD,
+ NULL, NULL, NULL);
spin_unlock(&delayed_refs->lock);
return 0;
diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h
index 7f00db50bd24..ea1aecb6a50d 100644
--- a/fs/btrfs/delayed-ref.h
+++ b/fs/btrfs/delayed-ref.h
@@ -251,7 +251,6 @@ int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
u64 bytenr, u64 num_bytes,
struct btrfs_delayed_extent_op *extent_op);
void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
struct btrfs_delayed_ref_root *delayed_refs,
struct btrfs_delayed_ref_head *head);
@@ -269,9 +268,7 @@ static inline void btrfs_delayed_ref_unlock(struct btrfs_delayed_ref_head *head)
struct btrfs_delayed_ref_head *
btrfs_select_ref_head(struct btrfs_trans_handle *trans);
-int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info,
- struct btrfs_delayed_ref_root *delayed_refs,
- u64 seq);
+int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info, u64 seq);
/*
* helper functions to cast a node into its container
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index f82be266ba4b..e2ba0419297a 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -33,8 +33,6 @@ static void btrfs_dev_replace_update_device_in_mapping_tree(
struct btrfs_device *srcdev,
struct btrfs_device *tgtdev);
static int btrfs_dev_replace_kthread(void *data);
-static int btrfs_dev_replace_continue_on_mount(struct btrfs_fs_info *fs_info);
-
int btrfs_init_dev_replace(struct btrfs_fs_info *fs_info)
{
@@ -179,6 +177,105 @@ out:
}
/*
+ * Initialize a new device for device replace target from a given source dev
+ * and path.
+ *
+ * Return 0 and new device in @device_out, otherwise return < 0
+ */
+static int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
+ const char *device_path,
+ struct btrfs_device *srcdev,
+ struct btrfs_device **device_out)
+{
+ struct btrfs_device *device;
+ struct block_device *bdev;
+ struct list_head *devices;
+ struct rcu_string *name;
+ u64 devid = BTRFS_DEV_REPLACE_DEVID;
+ int ret = 0;
+
+ *device_out = NULL;
+ if (fs_info->fs_devices->seeding) {
+ btrfs_err(fs_info, "the filesystem is a seed filesystem!");
+ return -EINVAL;
+ }
+
+ bdev = blkdev_get_by_path(device_path, FMODE_WRITE | FMODE_EXCL,
+ fs_info->bdev_holder);
+ if (IS_ERR(bdev)) {
+ btrfs_err(fs_info, "target device %s is invalid!", device_path);
+ return PTR_ERR(bdev);
+ }
+
+ filemap_write_and_wait(bdev->bd_inode->i_mapping);
+
+ devices = &fs_info->fs_devices->devices;
+ list_for_each_entry(device, devices, dev_list) {
+ if (device->bdev == bdev) {
+ btrfs_err(fs_info,
+ "target device is in the filesystem!");
+ ret = -EEXIST;
+ goto error;
+ }
+ }
+
+
+ if (i_size_read(bdev->bd_inode) <
+ btrfs_device_get_total_bytes(srcdev)) {
+ btrfs_err(fs_info,
+ "target device is smaller than source device!");
+ ret = -EINVAL;
+ goto error;
+ }
+
+
+ device = btrfs_alloc_device(NULL, &devid, NULL);
+ if (IS_ERR(device)) {
+ ret = PTR_ERR(device);
+ goto error;
+ }
+
+ name = rcu_string_strdup(device_path, GFP_KERNEL);
+ if (!name) {
+ btrfs_free_device(device);
+ ret = -ENOMEM;
+ goto error;
+ }
+ rcu_assign_pointer(device->name, name);
+
+ mutex_lock(&fs_info->fs_devices->device_list_mutex);
+ set_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state);
+ device->generation = 0;
+ device->io_width = fs_info->sectorsize;
+ device->io_align = fs_info->sectorsize;
+ device->sector_size = fs_info->sectorsize;
+ device->total_bytes = btrfs_device_get_total_bytes(srcdev);
+ device->disk_total_bytes = btrfs_device_get_disk_total_bytes(srcdev);
+ device->bytes_used = btrfs_device_get_bytes_used(srcdev);
+ device->commit_total_bytes = srcdev->commit_total_bytes;
+ device->commit_bytes_used = device->bytes_used;
+ device->fs_info = fs_info;
+ device->bdev = bdev;
+ set_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state);
+ set_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state);
+ device->mode = FMODE_EXCL;
+ device->dev_stats_valid = 1;
+ set_blocksize(device->bdev, BTRFS_BDEV_BLOCKSIZE);
+ device->fs_devices = fs_info->fs_devices;
+ list_add(&device->dev_list, &fs_info->fs_devices->devices);
+ fs_info->fs_devices->num_devices++;
+ fs_info->fs_devices->open_devices++;
+ mutex_unlock(&fs_info->fs_devices->device_list_mutex);
+
+ *device_out = device;
+ return 0;
+
+error:
+ blkdev_put(bdev, FMODE_EXCL);
+ return ret;
+}
+
+/*
* called from commit_transaction. Writes changed device replace state to
* disk.
*/
@@ -317,18 +414,13 @@ int btrfs_dev_replace_start(struct btrfs_fs_info *fs_info,
struct btrfs_device *tgt_device = NULL;
struct btrfs_device *src_device = NULL;
- /* the disk copy procedure reuses the scrub code */
- mutex_lock(&fs_info->volume_mutex);
ret = btrfs_find_device_by_devspec(fs_info, srcdevid,
srcdev_name, &src_device);
- if (ret) {
- mutex_unlock(&fs_info->volume_mutex);
+ if (ret)
return ret;
- }
ret = btrfs_init_dev_replace_tgtdev(fs_info, tgtdev_name,
src_device, &tgt_device);
- mutex_unlock(&fs_info->volume_mutex);
if (ret)
return ret;
@@ -360,7 +452,6 @@ int btrfs_dev_replace_start(struct btrfs_fs_info *fs_info,
dev_replace->cont_reading_from_srcdev_mode = read_src;
WARN_ON(!src_device);
dev_replace->srcdev = src_device;
- WARN_ON(!tgt_device);
dev_replace->tgtdev = tgt_device;
btrfs_info_in_rcu(fs_info,
@@ -503,7 +594,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
* flush all outstanding I/O and inode extent mappings before the
* copy operation is declared as being finished
*/
- ret = btrfs_start_delalloc_roots(fs_info, 0, -1);
+ ret = btrfs_start_delalloc_roots(fs_info, -1);
if (ret) {
mutex_unlock(&dev_replace->lock_finishing_cancel_unmount);
return ret;
@@ -518,7 +609,6 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
ret = btrfs_commit_transaction(trans);
WARN_ON(ret);
- mutex_lock(&uuid_mutex);
/* keep away write_all_supers() during the finishing procedure */
mutex_lock(&fs_info->fs_devices->device_list_mutex);
mutex_lock(&fs_info->chunk_mutex);
@@ -545,7 +635,6 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
btrfs_dev_replace_write_unlock(dev_replace);
mutex_unlock(&fs_info->chunk_mutex);
mutex_unlock(&fs_info->fs_devices->device_list_mutex);
- mutex_unlock(&uuid_mutex);
btrfs_rm_dev_replace_blocked(fs_info);
if (tgt_device)
btrfs_destroy_dev_replace_tgtdev(fs_info, tgt_device);
@@ -596,7 +685,6 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
*/
mutex_unlock(&fs_info->chunk_mutex);
mutex_unlock(&fs_info->fs_devices->device_list_mutex);
- mutex_unlock(&uuid_mutex);
/* replace the sysfs entry */
btrfs_sysfs_rm_device_link(fs_info->fs_devices, src_device);
@@ -800,7 +888,17 @@ int btrfs_resume_dev_replace_async(struct btrfs_fs_info *fs_info)
}
btrfs_dev_replace_write_unlock(dev_replace);
- WARN_ON(test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags));
+ /*
+ * This could collide with a paused balance, but the exclusive op logic
+ * should never allow both to start and pause. We don't want to allow
+ * dev-replace to start anyway.
+ */
+ if (test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags)) {
+ btrfs_info(fs_info,
+ "cannot resume dev-replace, other exclusive operation running");
+ return 0;
+ }
+
task = kthread_run(btrfs_dev_replace_kthread, fs_info, "btrfs-devrepl");
return PTR_ERR_OR_ZERO(task);
}
@@ -810,6 +908,7 @@ static int btrfs_dev_replace_kthread(void *data)
struct btrfs_fs_info *fs_info = data;
struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
u64 progress;
+ int ret;
progress = btrfs_dev_replace_progress(fs_info);
progress = div_u64(progress, 10);
@@ -820,23 +919,14 @@ static int btrfs_dev_replace_kthread(void *data)
btrfs_dev_name(dev_replace->tgtdev),
(unsigned int)progress);
- btrfs_dev_replace_continue_on_mount(fs_info);
- clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
-
- return 0;
-}
-
-static int btrfs_dev_replace_continue_on_mount(struct btrfs_fs_info *fs_info)
-{
- struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
- int ret;
-
ret = btrfs_scrub_dev(fs_info, dev_replace->srcdev->devid,
dev_replace->committed_cursor_left,
btrfs_device_get_total_bytes(dev_replace->srcdev),
&dev_replace->scrub_progress, 0, 1);
ret = btrfs_dev_replace_finishing(fs_info, ret);
WARN_ON(ret);
+
+ clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
return 0;
}
@@ -916,9 +1006,9 @@ void btrfs_dev_replace_clear_lock_blocking(
ASSERT(atomic_read(&dev_replace->read_locks) > 0);
ASSERT(atomic_read(&dev_replace->blocking_readers) > 0);
read_lock(&dev_replace->lock);
- if (atomic_dec_and_test(&dev_replace->blocking_readers) &&
- waitqueue_active(&dev_replace->read_lock_wq))
- wake_up(&dev_replace->read_lock_wq);
+ /* Barrier implied by atomic_dec_and_test */
+ if (atomic_dec_and_test(&dev_replace->blocking_readers))
+ cond_wake_up_nomb(&dev_replace->read_lock_wq);
}
void btrfs_bio_counter_inc_noblocked(struct btrfs_fs_info *fs_info)
@@ -929,9 +1019,7 @@ void btrfs_bio_counter_inc_noblocked(struct btrfs_fs_info *fs_info)
void btrfs_bio_counter_sub(struct btrfs_fs_info *fs_info, s64 amount)
{
percpu_counter_sub(&fs_info->bio_counter, amount);
-
- if (waitqueue_active(&fs_info->replace_wait))
- wake_up(&fs_info->replace_wait);
+ cond_wake_up_nomb(&fs_info->replace_wait);
}
void btrfs_bio_counter_inc_blocked(struct btrfs_fs_info *fs_info)
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 60caa68c3618..205092dc9390 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -55,7 +55,6 @@
static const struct extent_io_ops btree_extent_io_ops;
static void end_workqueue_fn(struct btrfs_work *work);
static void free_fs_root(struct btrfs_root *root);
-static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info);
static void btrfs_destroy_ordered_extents(struct btrfs_root *root);
static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
struct btrfs_fs_info *fs_info);
@@ -416,7 +415,7 @@ static int btrfs_check_super_csum(struct btrfs_fs_info *fs_info,
static int verify_level_key(struct btrfs_fs_info *fs_info,
struct extent_buffer *eb, int level,
- struct btrfs_key *first_key)
+ struct btrfs_key *first_key, u64 parent_transid)
{
int found_level;
struct btrfs_key found_key;
@@ -454,10 +453,11 @@ static int verify_level_key(struct btrfs_fs_info *fs_info,
if (ret) {
WARN_ON(1);
btrfs_err(fs_info,
-"tree first key mismatch detected, bytenr=%llu key expected=(%llu, %u, %llu) has=(%llu, %u, %llu)",
- eb->start, first_key->objectid, first_key->type,
- first_key->offset, found_key.objectid,
- found_key.type, found_key.offset);
+"tree first key mismatch detected, bytenr=%llu parent_transid=%llu key expected=(%llu,%u,%llu) has=(%llu,%u,%llu)",
+ eb->start, parent_transid, first_key->objectid,
+ first_key->type, first_key->offset,
+ found_key.objectid, found_key.type,
+ found_key.offset);
}
#endif
return ret;
@@ -493,7 +493,7 @@ static int btree_read_extent_buffer_pages(struct btrfs_fs_info *fs_info,
parent_transid, 0))
ret = -EIO;
else if (verify_level_key(fs_info, eb, level,
- first_key))
+ first_key, parent_transid))
ret = -EUCLEAN;
else
break;
@@ -1185,7 +1185,6 @@ static void __setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info,
root->inode_tree = RB_ROOT;
INIT_RADIX_TREE(&root->delayed_nodes_tree, GFP_ATOMIC);
root->block_rsv = NULL;
- root->orphan_block_rsv = NULL;
INIT_LIST_HEAD(&root->dirty_list);
INIT_LIST_HEAD(&root->root_list);
@@ -1195,7 +1194,6 @@ static void __setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info,
INIT_LIST_HEAD(&root->ordered_root);
INIT_LIST_HEAD(&root->logged_list[0]);
INIT_LIST_HEAD(&root->logged_list[1]);
- spin_lock_init(&root->orphan_lock);
spin_lock_init(&root->inode_lock);
spin_lock_init(&root->delalloc_lock);
spin_lock_init(&root->ordered_extent_lock);
@@ -1216,7 +1214,6 @@ static void __setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info,
atomic_set(&root->log_commit[1], 0);
atomic_set(&root->log_writers, 0);
atomic_set(&root->log_batch, 0);
- atomic_set(&root->orphan_inodes, 0);
refcount_set(&root->refs, 1);
atomic_set(&root->will_be_snapshotted, 0);
root->log_transid = 0;
@@ -2164,7 +2161,6 @@ static void btrfs_init_balance(struct btrfs_fs_info *fs_info)
{
spin_lock_init(&fs_info->balance_lock);
mutex_init(&fs_info->balance_mutex);
- atomic_set(&fs_info->balance_running, 0);
atomic_set(&fs_info->balance_pause_req, 0);
atomic_set(&fs_info->balance_cancel_req, 0);
fs_info->balance_ctl = NULL;
@@ -2442,6 +2438,211 @@ out:
return ret;
}
+/*
+ * Real super block validation
+ * NOTE: super csum type and incompat features will not be checked here.
+ *
+ * @sb: super block to check
+ * @mirror_num: the super block number to check its bytenr:
+ * 0 the primary (1st) sb
+ * 1, 2 2nd and 3rd backup copy
+ * -1 skip bytenr check
+ */
+static int validate_super(struct btrfs_fs_info *fs_info,
+ struct btrfs_super_block *sb, int mirror_num)
+{
+ u64 nodesize = btrfs_super_nodesize(sb);
+ u64 sectorsize = btrfs_super_sectorsize(sb);
+ int ret = 0;
+
+ if (btrfs_super_magic(sb) != BTRFS_MAGIC) {
+ btrfs_err(fs_info, "no valid FS found");
+ ret = -EINVAL;
+ }
+ if (btrfs_super_flags(sb) & ~BTRFS_SUPER_FLAG_SUPP) {
+ btrfs_err(fs_info, "unrecognized or unsupported super flag: %llu",
+ btrfs_super_flags(sb) & ~BTRFS_SUPER_FLAG_SUPP);
+ ret = -EINVAL;
+ }
+ if (btrfs_super_root_level(sb) >= BTRFS_MAX_LEVEL) {
+ btrfs_err(fs_info, "tree_root level too big: %d >= %d",
+ btrfs_super_root_level(sb), BTRFS_MAX_LEVEL);
+ ret = -EINVAL;
+ }
+ if (btrfs_super_chunk_root_level(sb) >= BTRFS_MAX_LEVEL) {
+ btrfs_err(fs_info, "chunk_root level too big: %d >= %d",
+ btrfs_super_chunk_root_level(sb), BTRFS_MAX_LEVEL);
+ ret = -EINVAL;
+ }
+ if (btrfs_super_log_root_level(sb) >= BTRFS_MAX_LEVEL) {
+ btrfs_err(fs_info, "log_root level too big: %d >= %d",
+ btrfs_super_log_root_level(sb), BTRFS_MAX_LEVEL);
+ ret = -EINVAL;
+ }
+
+ /*
+ * Check sectorsize and nodesize first, other check will need it.
+ * Check all possible sectorsize(4K, 8K, 16K, 32K, 64K) here.
+ */
+ if (!is_power_of_2(sectorsize) || sectorsize < 4096 ||
+ sectorsize > BTRFS_MAX_METADATA_BLOCKSIZE) {
+ btrfs_err(fs_info, "invalid sectorsize %llu", sectorsize);
+ ret = -EINVAL;
+ }
+ /* Only PAGE SIZE is supported yet */
+ if (sectorsize != PAGE_SIZE) {
+ btrfs_err(fs_info,
+ "sectorsize %llu not supported yet, only support %lu",
+ sectorsize, PAGE_SIZE);
+ ret = -EINVAL;
+ }
+ if (!is_power_of_2(nodesize) || nodesize < sectorsize ||
+ nodesize > BTRFS_MAX_METADATA_BLOCKSIZE) {
+ btrfs_err(fs_info, "invalid nodesize %llu", nodesize);
+ ret = -EINVAL;
+ }
+ if (nodesize != le32_to_cpu(sb->__unused_leafsize)) {
+ btrfs_err(fs_info, "invalid leafsize %u, should be %llu",
+ le32_to_cpu(sb->__unused_leafsize), nodesize);
+ ret = -EINVAL;
+ }
+
+ /* Root alignment check */
+ if (!IS_ALIGNED(btrfs_super_root(sb), sectorsize)) {
+ btrfs_warn(fs_info, "tree_root block unaligned: %llu",
+ btrfs_super_root(sb));
+ ret = -EINVAL;
+ }
+ if (!IS_ALIGNED(btrfs_super_chunk_root(sb), sectorsize)) {
+ btrfs_warn(fs_info, "chunk_root block unaligned: %llu",
+ btrfs_super_chunk_root(sb));
+ ret = -EINVAL;
+ }
+ if (!IS_ALIGNED(btrfs_super_log_root(sb), sectorsize)) {
+ btrfs_warn(fs_info, "log_root block unaligned: %llu",
+ btrfs_super_log_root(sb));
+ ret = -EINVAL;
+ }
+
+ if (memcmp(fs_info->fsid, sb->dev_item.fsid, BTRFS_FSID_SIZE) != 0) {
+ btrfs_err(fs_info,
+ "dev_item UUID does not match fsid: %pU != %pU",
+ fs_info->fsid, sb->dev_item.fsid);
+ ret = -EINVAL;
+ }
+
+ /*
+ * Hint to catch really bogus numbers, bitflips or so, more exact checks are
+ * done later
+ */
+ if (btrfs_super_bytes_used(sb) < 6 * btrfs_super_nodesize(sb)) {
+ btrfs_err(fs_info, "bytes_used is too small %llu",
+ btrfs_super_bytes_used(sb));
+ ret = -EINVAL;
+ }
+ if (!is_power_of_2(btrfs_super_stripesize(sb))) {
+ btrfs_err(fs_info, "invalid stripesize %u",
+ btrfs_super_stripesize(sb));
+ ret = -EINVAL;
+ }
+ if (btrfs_super_num_devices(sb) > (1UL << 31))
+ btrfs_warn(fs_info, "suspicious number of devices: %llu",
+ btrfs_super_num_devices(sb));
+ if (btrfs_super_num_devices(sb) == 0) {
+ btrfs_err(fs_info, "number of devices is 0");
+ ret = -EINVAL;
+ }
+
+ if (mirror_num >= 0 &&
+ btrfs_super_bytenr(sb) != btrfs_sb_offset(mirror_num)) {
+ btrfs_err(fs_info, "super offset mismatch %llu != %u",
+ btrfs_super_bytenr(sb), BTRFS_SUPER_INFO_OFFSET);
+ ret = -EINVAL;
+ }
+
+ /*
+ * Obvious sys_chunk_array corruptions, it must hold at least one key
+ * and one chunk
+ */
+ if (btrfs_super_sys_array_size(sb) > BTRFS_SYSTEM_CHUNK_ARRAY_SIZE) {
+ btrfs_err(fs_info, "system chunk array too big %u > %u",
+ btrfs_super_sys_array_size(sb),
+ BTRFS_SYSTEM_CHUNK_ARRAY_SIZE);
+ ret = -EINVAL;
+ }
+ if (btrfs_super_sys_array_size(sb) < sizeof(struct btrfs_disk_key)
+ + sizeof(struct btrfs_chunk)) {
+ btrfs_err(fs_info, "system chunk array too small %u < %zu",
+ btrfs_super_sys_array_size(sb),
+ sizeof(struct btrfs_disk_key)
+ + sizeof(struct btrfs_chunk));
+ ret = -EINVAL;
+ }
+
+ /*
+ * The generation is a global counter, we'll trust it more than the others
+ * but it's still possible that it's the one that's wrong.
+ */
+ if (btrfs_super_generation(sb) < btrfs_super_chunk_root_generation(sb))
+ btrfs_warn(fs_info,
+ "suspicious: generation < chunk_root_generation: %llu < %llu",
+ btrfs_super_generation(sb),
+ btrfs_super_chunk_root_generation(sb));
+ if (btrfs_super_generation(sb) < btrfs_super_cache_generation(sb)
+ && btrfs_super_cache_generation(sb) != (u64)-1)
+ btrfs_warn(fs_info,
+ "suspicious: generation < cache_generation: %llu < %llu",
+ btrfs_super_generation(sb),
+ btrfs_super_cache_generation(sb));
+
+ return ret;
+}
+
+/*
+ * Validation of super block at mount time.
+ * Some checks already done early at mount time, like csum type and incompat
+ * flags will be skipped.
+ */
+static int btrfs_validate_mount_super(struct btrfs_fs_info *fs_info)
+{
+ return validate_super(fs_info, fs_info->super_copy, 0);
+}
+
+/*
+ * Validation of super block at write time.
+ * Some checks like bytenr check will be skipped as their values will be
+ * overwritten soon.
+ * Extra checks like csum type and incompat flags will be done here.
+ */
+static int btrfs_validate_write_super(struct btrfs_fs_info *fs_info,
+ struct btrfs_super_block *sb)
+{
+ int ret;
+
+ ret = validate_super(fs_info, sb, -1);
+ if (ret < 0)
+ goto out;
+ if (btrfs_super_csum_type(sb) != BTRFS_CSUM_TYPE_CRC32) {
+ ret = -EUCLEAN;
+ btrfs_err(fs_info, "invalid csum type, has %u want %u",
+ btrfs_super_csum_type(sb), BTRFS_CSUM_TYPE_CRC32);
+ goto out;
+ }
+ if (btrfs_super_incompat_flags(sb) & ~BTRFS_FEATURE_INCOMPAT_SUPP) {
+ ret = -EUCLEAN;
+ btrfs_err(fs_info,
+ "invalid incompat flags, has 0x%llx valid mask 0x%llx",
+ btrfs_super_incompat_flags(sb),
+ (unsigned long long)BTRFS_FEATURE_INCOMPAT_SUPP);
+ goto out;
+ }
+out:
+ if (ret < 0)
+ btrfs_err(fs_info,
+ "super block corruption detected before writing it to disk");
+ return ret;
+}
+
int open_ctree(struct super_block *sb,
struct btrfs_fs_devices *fs_devices,
char *options)
@@ -2601,7 +2802,6 @@ int open_ctree(struct super_block *sb,
mutex_init(&fs_info->chunk_mutex);
mutex_init(&fs_info->transaction_kthread_mutex);
mutex_init(&fs_info->cleaner_mutex);
- mutex_init(&fs_info->volume_mutex);
mutex_init(&fs_info->ro_block_group_mutex);
init_rwsem(&fs_info->commit_root_sem);
init_rwsem(&fs_info->cleanup_work_sem);
@@ -2668,7 +2868,7 @@ int open_ctree(struct super_block *sb,
memcpy(fs_info->fsid, fs_info->super_copy->fsid, BTRFS_FSID_SIZE);
- ret = btrfs_check_super_valid(fs_info);
+ ret = btrfs_validate_mount_super(fs_info);
if (ret) {
btrfs_err(fs_info, "superblock contains fatal errors");
err = -EINVAL;
@@ -3523,7 +3723,7 @@ int btrfs_get_num_tolerated_disk_barrier_failures(u64 flags)
for (raid_type = 0; raid_type < BTRFS_NR_RAID_TYPES; raid_type++) {
if (raid_type == BTRFS_RAID_SINGLE)
continue;
- if (!(flags & btrfs_raid_group[raid_type]))
+ if (!(flags & btrfs_raid_array[raid_type].bg_flag))
continue;
min_tolerated = min(min_tolerated,
btrfs_raid_array[raid_type].
@@ -3603,6 +3803,14 @@ int write_all_supers(struct btrfs_fs_info *fs_info, int max_mirrors)
flags = btrfs_super_flags(sb);
btrfs_set_super_flags(sb, flags | BTRFS_HEADER_FLAG_WRITTEN);
+ ret = btrfs_validate_write_super(fs_info, sb);
+ if (ret < 0) {
+ mutex_unlock(&fs_info->fs_devices->device_list_mutex);
+ btrfs_handle_fs_error(fs_info, -EUCLEAN,
+ "unexpected superblock corruption detected");
+ return -EUCLEAN;
+ }
+
ret = write_dev_supers(dev, sb, max_mirrors);
if (ret)
total_errors++;
@@ -3674,8 +3882,6 @@ static void free_fs_root(struct btrfs_root *root)
{
iput(root->ino_cache_inode);
WARN_ON(!RB_EMPTY_ROOT(&root->inode_tree));
- btrfs_free_block_rsv(root->fs_info, root->orphan_block_rsv);
- root->orphan_block_rsv = NULL;
if (root->anon_dev)
free_anon_bdev(root->anon_dev);
if (root->subv_writers)
@@ -3766,7 +3972,6 @@ int btrfs_commit_super(struct btrfs_fs_info *fs_info)
void close_ctree(struct btrfs_fs_info *fs_info)
{
- struct btrfs_root *root = fs_info->tree_root;
int ret;
set_bit(BTRFS_FS_CLOSING_START, &fs_info->flags);
@@ -3818,6 +4023,7 @@ void close_ctree(struct btrfs_fs_info *fs_info)
set_bit(BTRFS_FS_CLOSING_DONE, &fs_info->flags);
btrfs_free_qgroup_config(fs_info);
+ ASSERT(list_empty(&fs_info->delalloc_roots));
if (percpu_counter_sum(&fs_info->delalloc_bytes)) {
btrfs_info(fs_info, "at unmount delalloc count %lld",
@@ -3861,9 +4067,6 @@ void close_ctree(struct btrfs_fs_info *fs_info)
btrfs_free_stripe_hash_table(fs_info);
btrfs_free_ref_cache(fs_info);
- __btrfs_free_block_rsv(root->orphan_block_rsv);
- root->orphan_block_rsv = NULL;
-
while (!list_empty(&fs_info->pinned_chunks)) {
struct extent_map *em;
@@ -3974,166 +4177,17 @@ int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid, int level,
level, first_key);
}
-static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info)
-{
- struct btrfs_super_block *sb = fs_info->super_copy;
- u64 nodesize = btrfs_super_nodesize(sb);
- u64 sectorsize = btrfs_super_sectorsize(sb);
- int ret = 0;
-
- if (btrfs_super_magic(sb) != BTRFS_MAGIC) {
- btrfs_err(fs_info, "no valid FS found");
- ret = -EINVAL;
- }
- if (btrfs_super_flags(sb) & ~BTRFS_SUPER_FLAG_SUPP) {
- btrfs_err(fs_info, "unrecognized or unsupported super flag: %llu",
- btrfs_super_flags(sb) & ~BTRFS_SUPER_FLAG_SUPP);
- ret = -EINVAL;
- }
- if (btrfs_super_root_level(sb) >= BTRFS_MAX_LEVEL) {
- btrfs_err(fs_info, "tree_root level too big: %d >= %d",
- btrfs_super_root_level(sb), BTRFS_MAX_LEVEL);
- ret = -EINVAL;
- }
- if (btrfs_super_chunk_root_level(sb) >= BTRFS_MAX_LEVEL) {
- btrfs_err(fs_info, "chunk_root level too big: %d >= %d",
- btrfs_super_chunk_root_level(sb), BTRFS_MAX_LEVEL);
- ret = -EINVAL;
- }
- if (btrfs_super_log_root_level(sb) >= BTRFS_MAX_LEVEL) {
- btrfs_err(fs_info, "log_root level too big: %d >= %d",
- btrfs_super_log_root_level(sb), BTRFS_MAX_LEVEL);
- ret = -EINVAL;
- }
-
- /*
- * Check sectorsize and nodesize first, other check will need it.
- * Check all possible sectorsize(4K, 8K, 16K, 32K, 64K) here.
- */
- if (!is_power_of_2(sectorsize) || sectorsize < 4096 ||
- sectorsize > BTRFS_MAX_METADATA_BLOCKSIZE) {
- btrfs_err(fs_info, "invalid sectorsize %llu", sectorsize);
- ret = -EINVAL;
- }
- /* Only PAGE SIZE is supported yet */
- if (sectorsize != PAGE_SIZE) {
- btrfs_err(fs_info,
- "sectorsize %llu not supported yet, only support %lu",
- sectorsize, PAGE_SIZE);
- ret = -EINVAL;
- }
- if (!is_power_of_2(nodesize) || nodesize < sectorsize ||
- nodesize > BTRFS_MAX_METADATA_BLOCKSIZE) {
- btrfs_err(fs_info, "invalid nodesize %llu", nodesize);
- ret = -EINVAL;
- }
- if (nodesize != le32_to_cpu(sb->__unused_leafsize)) {
- btrfs_err(fs_info, "invalid leafsize %u, should be %llu",
- le32_to_cpu(sb->__unused_leafsize), nodesize);
- ret = -EINVAL;
- }
-
- /* Root alignment check */
- if (!IS_ALIGNED(btrfs_super_root(sb), sectorsize)) {
- btrfs_warn(fs_info, "tree_root block unaligned: %llu",
- btrfs_super_root(sb));
- ret = -EINVAL;
- }
- if (!IS_ALIGNED(btrfs_super_chunk_root(sb), sectorsize)) {
- btrfs_warn(fs_info, "chunk_root block unaligned: %llu",
- btrfs_super_chunk_root(sb));
- ret = -EINVAL;
- }
- if (!IS_ALIGNED(btrfs_super_log_root(sb), sectorsize)) {
- btrfs_warn(fs_info, "log_root block unaligned: %llu",
- btrfs_super_log_root(sb));
- ret = -EINVAL;
- }
-
- if (memcmp(fs_info->fsid, sb->dev_item.fsid, BTRFS_FSID_SIZE) != 0) {
- btrfs_err(fs_info,
- "dev_item UUID does not match fsid: %pU != %pU",
- fs_info->fsid, sb->dev_item.fsid);
- ret = -EINVAL;
- }
-
- /*
- * Hint to catch really bogus numbers, bitflips or so, more exact checks are
- * done later
- */
- if (btrfs_super_bytes_used(sb) < 6 * btrfs_super_nodesize(sb)) {
- btrfs_err(fs_info, "bytes_used is too small %llu",
- btrfs_super_bytes_used(sb));
- ret = -EINVAL;
- }
- if (!is_power_of_2(btrfs_super_stripesize(sb))) {
- btrfs_err(fs_info, "invalid stripesize %u",
- btrfs_super_stripesize(sb));
- ret = -EINVAL;
- }
- if (btrfs_super_num_devices(sb) > (1UL << 31))
- btrfs_warn(fs_info, "suspicious number of devices: %llu",
- btrfs_super_num_devices(sb));
- if (btrfs_super_num_devices(sb) == 0) {
- btrfs_err(fs_info, "number of devices is 0");
- ret = -EINVAL;
- }
-
- if (btrfs_super_bytenr(sb) != BTRFS_SUPER_INFO_OFFSET) {
- btrfs_err(fs_info, "super offset mismatch %llu != %u",
- btrfs_super_bytenr(sb), BTRFS_SUPER_INFO_OFFSET);
- ret = -EINVAL;
- }
-
- /*
- * Obvious sys_chunk_array corruptions, it must hold at least one key
- * and one chunk
- */
- if (btrfs_super_sys_array_size(sb) > BTRFS_SYSTEM_CHUNK_ARRAY_SIZE) {
- btrfs_err(fs_info, "system chunk array too big %u > %u",
- btrfs_super_sys_array_size(sb),
- BTRFS_SYSTEM_CHUNK_ARRAY_SIZE);
- ret = -EINVAL;
- }
- if (btrfs_super_sys_array_size(sb) < sizeof(struct btrfs_disk_key)
- + sizeof(struct btrfs_chunk)) {
- btrfs_err(fs_info, "system chunk array too small %u < %zu",
- btrfs_super_sys_array_size(sb),
- sizeof(struct btrfs_disk_key)
- + sizeof(struct btrfs_chunk));
- ret = -EINVAL;
- }
-
- /*
- * The generation is a global counter, we'll trust it more than the others
- * but it's still possible that it's the one that's wrong.
- */
- if (btrfs_super_generation(sb) < btrfs_super_chunk_root_generation(sb))
- btrfs_warn(fs_info,
- "suspicious: generation < chunk_root_generation: %llu < %llu",
- btrfs_super_generation(sb),
- btrfs_super_chunk_root_generation(sb));
- if (btrfs_super_generation(sb) < btrfs_super_cache_generation(sb)
- && btrfs_super_cache_generation(sb) != (u64)-1)
- btrfs_warn(fs_info,
- "suspicious: generation < cache_generation: %llu < %llu",
- btrfs_super_generation(sb),
- btrfs_super_cache_generation(sb));
-
- return ret;
-}
-
static void btrfs_error_commit_super(struct btrfs_fs_info *fs_info)
{
+ /* cleanup FS via transaction */
+ btrfs_cleanup_transaction(fs_info);
+
mutex_lock(&fs_info->cleaner_mutex);
btrfs_run_delayed_iputs(fs_info);
mutex_unlock(&fs_info->cleaner_mutex);
down_write(&fs_info->cleanup_work_sem);
up_write(&fs_info->cleanup_work_sem);
-
- /* cleanup FS via transaction */
- btrfs_cleanup_transaction(fs_info);
}
static void btrfs_destroy_ordered_extents(struct btrfs_root *root)
@@ -4258,19 +4312,23 @@ static void btrfs_destroy_delalloc_inodes(struct btrfs_root *root)
list_splice_init(&root->delalloc_inodes, &splice);
while (!list_empty(&splice)) {
+ struct inode *inode = NULL;
btrfs_inode = list_first_entry(&splice, struct btrfs_inode,
delalloc_inodes);
-
- list_del_init(&btrfs_inode->delalloc_inodes);
- clear_bit(BTRFS_INODE_IN_DELALLOC_LIST,
- &btrfs_inode->runtime_flags);
+ __btrfs_del_delalloc_inode(root, btrfs_inode);
spin_unlock(&root->delalloc_lock);
- btrfs_invalidate_inodes(btrfs_inode->root);
-
+ /*
+ * Make sure we get a live inode and that it'll not disappear
+ * meanwhile.
+ */
+ inode = igrab(&btrfs_inode->vfs_inode);
+ if (inode) {
+ invalidate_inode_pages2(inode->i_mapping);
+ iput(inode);
+ }
spin_lock(&root->delalloc_lock);
}
-
spin_unlock(&root->delalloc_lock);
}
@@ -4286,7 +4344,6 @@ static void btrfs_destroy_all_delalloc_inodes(struct btrfs_fs_info *fs_info)
while (!list_empty(&splice)) {
root = list_first_entry(&splice, struct btrfs_root,
delalloc_root);
- list_del_init(&root->delalloc_root);
root = btrfs_grab_fs_root(root);
BUG_ON(!root);
spin_unlock(&fs_info->delalloc_root_lock);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index e2f16b68fcbf..3d9fe58c0080 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -66,10 +66,8 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
u64 flags, u64 owner, u64 offset,
struct btrfs_key *ins, int ref_mod);
static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
- u64 parent, u64 root_objectid,
- u64 flags, struct btrfs_disk_key *key,
- int level, struct btrfs_key *ins);
+ struct btrfs_delayed_ref_node *node,
+ struct btrfs_delayed_extent_op *extent_op);
static int do_chunk_alloc(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 flags,
int force);
@@ -256,7 +254,7 @@ static int exclude_super_stripes(struct btrfs_fs_info *fs_info,
for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
bytenr = btrfs_sb_offset(i);
ret = btrfs_rmap_block(fs_info, cache->key.objectid,
- bytenr, 0, &logical, &nr, &stripe_len);
+ bytenr, &logical, &nr, &stripe_len);
if (ret)
return ret;
@@ -343,8 +341,9 @@ static void fragment_free_space(struct btrfs_block_group_cache *block_group)
* since their free space will be released as soon as the transaction commits.
*/
u64 add_new_free_space(struct btrfs_block_group_cache *block_group,
- struct btrfs_fs_info *info, u64 start, u64 end)
+ u64 start, u64 end)
{
+ struct btrfs_fs_info *info = block_group->fs_info;
u64 extent_start, extent_end, size, total_added = 0;
int ret;
@@ -489,8 +488,7 @@ next:
if (key.type == BTRFS_EXTENT_ITEM_KEY ||
key.type == BTRFS_METADATA_ITEM_KEY) {
- total_found += add_new_free_space(block_group,
- fs_info, last,
+ total_found += add_new_free_space(block_group, last,
key.objectid);
if (key.type == BTRFS_METADATA_ITEM_KEY)
last = key.objectid +
@@ -508,7 +506,7 @@ next:
}
ret = 0;
- total_found += add_new_free_space(block_group, fs_info, last,
+ total_found += add_new_free_space(block_group, last,
block_group->key.objectid +
block_group->key.offset);
caching_ctl->progress = (u64)-1;
@@ -744,12 +742,12 @@ static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
}
static void add_pinned_bytes(struct btrfs_fs_info *fs_info, s64 num_bytes,
- u64 owner, u64 root_objectid)
+ bool metadata, u64 root_objectid)
{
struct btrfs_space_info *space_info;
u64 flags;
- if (owner < BTRFS_FIRST_FREE_OBJECTID) {
+ if (metadata) {
if (root_objectid == BTRFS_CHUNK_TREE_OBJECTID)
flags = BTRFS_BLOCK_GROUP_SYSTEM;
else
@@ -2200,8 +2198,11 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
&old_ref_mod, &new_ref_mod);
}
- if (ret == 0 && old_ref_mod < 0 && new_ref_mod >= 0)
- add_pinned_bytes(fs_info, -num_bytes, owner, root_objectid);
+ if (ret == 0 && old_ref_mod < 0 && new_ref_mod >= 0) {
+ bool metadata = owner < BTRFS_FIRST_FREE_OBJECTID;
+
+ add_pinned_bytes(fs_info, -num_bytes, metadata, root_objectid);
+ }
return ret;
}
@@ -2428,10 +2429,8 @@ static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
{
int ret = 0;
struct btrfs_delayed_tree_ref *ref;
- struct btrfs_key ins;
u64 parent = 0;
u64 ref_root = 0;
- bool skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
ref = btrfs_delayed_node_to_tree_ref(node);
trace_run_delayed_tree_ref(fs_info, node, ref, node->action);
@@ -2440,15 +2439,6 @@ static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
parent = ref->parent;
ref_root = ref->root;
- ins.objectid = node->bytenr;
- if (skinny_metadata) {
- ins.offset = ref->level;
- ins.type = BTRFS_METADATA_ITEM_KEY;
- } else {
- ins.offset = node->num_bytes;
- ins.type = BTRFS_EXTENT_ITEM_KEY;
- }
-
if (node->ref_mod != 1) {
btrfs_err(fs_info,
"btree block(%llu) has %d references rather than 1: action %d ref_root %llu parent %llu",
@@ -2458,11 +2448,7 @@ static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
}
if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) {
BUG_ON(!extent_op || !extent_op->update_flags);
- ret = alloc_reserved_tree_block(trans, fs_info,
- parent, ref_root,
- extent_op->flags_to_set,
- &extent_op->key,
- ref->level, &ins);
+ ret = alloc_reserved_tree_block(trans, node, extent_op);
} else if (node->action == BTRFS_ADD_DELAYED_REF) {
ret = __btrfs_inc_extent_ref(trans, fs_info, node,
parent, ref_root,
@@ -2594,8 +2580,8 @@ static int cleanup_ref_head(struct btrfs_trans_handle *trans,
delayed_refs->num_heads--;
rb_erase(&head->href_node, &delayed_refs->href_root);
RB_CLEAR_NODE(&head->href_node);
- spin_unlock(&delayed_refs->lock);
spin_unlock(&head->lock);
+ spin_unlock(&delayed_refs->lock);
atomic_dec(&delayed_refs->num_entries);
trace_run_delayed_ref_head(fs_info, head, 0);
@@ -2700,17 +2686,12 @@ static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
* insert_inline_extent_backref()).
*/
spin_lock(&locked_ref->lock);
- btrfs_merge_delayed_refs(trans, fs_info, delayed_refs,
- locked_ref);
+ btrfs_merge_delayed_refs(trans, delayed_refs, locked_ref);
- /*
- * locked_ref is the head node, so we have to go one
- * node back for any delayed ref updates
- */
ref = select_delayed_ref(locked_ref);
if (ref && ref->seq &&
- btrfs_check_delayed_seq(fs_info, delayed_refs, ref->seq)) {
+ btrfs_check_delayed_seq(fs_info, ref->seq)) {
spin_unlock(&locked_ref->lock);
unselect_delayed_ref_head(delayed_refs, locked_ref);
locked_ref = NULL;
@@ -3142,7 +3123,11 @@ static noinline int check_delayed_ref(struct btrfs_root *root,
struct rb_node *node;
int ret = 0;
+ spin_lock(&root->fs_info->trans_lock);
cur_trans = root->fs_info->running_transaction;
+ if (cur_trans)
+ refcount_inc(&cur_trans->use_count);
+ spin_unlock(&root->fs_info->trans_lock);
if (!cur_trans)
return 0;
@@ -3151,6 +3136,7 @@ static noinline int check_delayed_ref(struct btrfs_root *root,
head = btrfs_find_delayed_ref_head(delayed_refs, bytenr);
if (!head) {
spin_unlock(&delayed_refs->lock);
+ btrfs_put_transaction(cur_trans);
return 0;
}
@@ -3167,6 +3153,7 @@ static noinline int check_delayed_ref(struct btrfs_root *root,
mutex_lock(&head->mutex);
mutex_unlock(&head->mutex);
btrfs_put_delayed_ref_head(head);
+ btrfs_put_transaction(cur_trans);
return -EAGAIN;
}
spin_unlock(&delayed_refs->lock);
@@ -3199,6 +3186,7 @@ static noinline int check_delayed_ref(struct btrfs_root *root,
}
spin_unlock(&head->lock);
mutex_unlock(&head->mutex);
+ btrfs_put_transaction(cur_trans);
return ret;
}
@@ -3284,7 +3272,7 @@ int btrfs_cross_ref_exist(struct btrfs_root *root, u64 objectid, u64 offset,
path = btrfs_alloc_path();
if (!path)
- return -ENOENT;
+ return -ENOMEM;
do {
ret = check_committed_ref(root, path, objectid,
@@ -4019,8 +4007,7 @@ static const char *alloc_name(u64 flags)
};
}
-static int create_space_info(struct btrfs_fs_info *info, u64 flags,
- struct btrfs_space_info **new)
+static int create_space_info(struct btrfs_fs_info *info, u64 flags)
{
struct btrfs_space_info *space_info;
@@ -4058,7 +4045,6 @@ static int create_space_info(struct btrfs_fs_info *info, u64 flags,
return ret;
}
- *new = space_info;
list_add_rcu(&space_info->list, &info->space_info);
if (flags & BTRFS_BLOCK_GROUP_DATA)
info->data_sinfo = space_info;
@@ -4115,7 +4101,7 @@ static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
* returns target flags in extended format or 0 if restripe for this
* chunk_type is not in progress
*
- * should be called with either volume_mutex or balance_lock held
+ * should be called with balance_lock held
*/
static u64 get_restripe_target(struct btrfs_fs_info *fs_info, u64 flags)
{
@@ -4171,7 +4157,7 @@ static u64 btrfs_reduce_alloc_profile(struct btrfs_fs_info *fs_info, u64 flags)
/* First, mask out the RAID levels which aren't possible */
for (raid_type = 0; raid_type < BTRFS_NR_RAID_TYPES; raid_type++) {
if (num_devices >= btrfs_raid_array[raid_type].devs_min)
- allowed |= btrfs_raid_group[raid_type];
+ allowed |= btrfs_raid_array[raid_type].bg_flag;
}
allowed &= flags;
@@ -4334,7 +4320,7 @@ commit_trans:
need_commit--;
if (need_commit > 0) {
- btrfs_start_delalloc_roots(fs_info, 0, -1);
+ btrfs_start_delalloc_roots(fs_info, -1);
btrfs_wait_ordered_roots(fs_info, U64_MAX, 0,
(u64)-1);
}
@@ -4671,12 +4657,14 @@ again:
trans->allocating_chunk = false;
spin_lock(&space_info->lock);
- if (ret < 0 && ret != -ENOSPC)
- goto out;
- if (ret)
- space_info->full = 1;
- else
+ if (ret < 0) {
+ if (ret == -ENOSPC)
+ space_info->full = 1;
+ else
+ goto out;
+ } else {
ret = 1;
+ }
space_info->force_alloc = CHUNK_ALLOC_NO_FORCE;
out:
@@ -4785,7 +4773,7 @@ static void btrfs_writeback_inodes_sb_nr(struct btrfs_fs_info *fs_info,
* the filesystem is readonly(all dirty pages are written to
* the disk).
*/
- btrfs_start_delalloc_roots(fs_info, 0, nr_items);
+ btrfs_start_delalloc_roots(fs_info, nr_items);
if (!current->journal_info)
btrfs_wait_ordered_roots(fs_info, nr_items, 0, (u64)-1);
}
@@ -5942,44 +5930,6 @@ void btrfs_trans_release_chunk_metadata(struct btrfs_trans_handle *trans)
trans->chunk_bytes_reserved = 0;
}
-/* Can only return 0 or -ENOSPC */
-int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans,
- struct btrfs_inode *inode)
-{
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
- struct btrfs_root *root = inode->root;
- /*
- * We always use trans->block_rsv here as we will have reserved space
- * for our orphan when starting the transaction, using get_block_rsv()
- * here will sometimes make us choose the wrong block rsv as we could be
- * doing a reloc inode for a non refcounted root.
- */
- struct btrfs_block_rsv *src_rsv = trans->block_rsv;
- struct btrfs_block_rsv *dst_rsv = root->orphan_block_rsv;
-
- /*
- * We need to hold space in order to delete our orphan item once we've
- * added it, so this takes the reservation so we can release it later
- * when we are truly done with the orphan item.
- */
- u64 num_bytes = btrfs_calc_trans_metadata_size(fs_info, 1);
-
- trace_btrfs_space_reservation(fs_info, "orphan", btrfs_ino(inode),
- num_bytes, 1);
- return btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes, 1);
-}
-
-void btrfs_orphan_release_metadata(struct btrfs_inode *inode)
-{
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
- struct btrfs_root *root = inode->root;
- u64 num_bytes = btrfs_calc_trans_metadata_size(fs_info, 1);
-
- trace_btrfs_space_reservation(fs_info, "orphan", btrfs_ino(inode),
- num_bytes, 0);
- btrfs_block_rsv_release(fs_info, root->orphan_block_rsv, num_bytes);
-}
-
/*
* btrfs_subvolume_reserve_metadata() - reserve space for subvolume operation
* root: the root of the parent directory
@@ -5997,7 +5947,6 @@ void btrfs_orphan_release_metadata(struct btrfs_inode *inode)
int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
struct btrfs_block_rsv *rsv,
int items,
- u64 *qgroup_reserved,
bool use_global_rsv)
{
u64 num_bytes;
@@ -6015,8 +5964,6 @@ int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
num_bytes = 0;
}
- *qgroup_reserved = num_bytes;
-
num_bytes = btrfs_calc_trans_metadata_size(fs_info, items);
rsv->space_info = __find_space_info(fs_info,
BTRFS_BLOCK_GROUP_METADATA);
@@ -6026,8 +5973,8 @@ int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
if (ret == -ENOSPC && use_global_rsv)
ret = btrfs_block_rsv_migrate(global_rsv, rsv, num_bytes, 1);
- if (ret && *qgroup_reserved)
- btrfs_qgroup_free_meta_prealloc(root, *qgroup_reserved);
+ if (ret && num_bytes)
+ btrfs_qgroup_free_meta_prealloc(root, num_bytes);
return ret;
}
@@ -6347,6 +6294,7 @@ static int update_block_group(struct btrfs_trans_handle *trans,
spin_lock(&info->unused_bgs_lock);
if (list_empty(&cache->bg_list)) {
btrfs_get_block_group(cache);
+ trace_btrfs_add_unused_block_group(cache);
list_add_tail(&cache->bg_list,
&info->unused_bgs);
}
@@ -6504,6 +6452,7 @@ int btrfs_exclude_logged_extents(struct btrfs_fs_info *fs_info,
struct btrfs_key key;
int found_type;
int i;
+ int ret = 0;
if (!btrfs_fs_incompat(fs_info, MIXED_GROUPS))
return 0;
@@ -6520,10 +6469,12 @@ int btrfs_exclude_logged_extents(struct btrfs_fs_info *fs_info,
continue;
key.objectid = btrfs_file_extent_disk_bytenr(eb, item);
key.offset = btrfs_file_extent_disk_num_bytes(eb, item);
- __exclude_logged_extent(fs_info, key.objectid, key.offset);
+ ret = __exclude_logged_extent(fs_info, key.objectid, key.offset);
+ if (ret)
+ break;
}
- return 0;
+ return ret;
}
static void
@@ -7115,7 +7066,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
}
}
- ret = add_to_free_space_tree(trans, info, bytenr, num_bytes);
+ ret = add_to_free_space_tree(trans, bytenr, num_bytes);
if (ret) {
btrfs_abort_transaction(trans, ret);
goto out;
@@ -7259,7 +7210,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
}
out:
if (pin)
- add_pinned_bytes(fs_info, buf->len, btrfs_header_level(buf),
+ add_pinned_bytes(fs_info, buf->len, true,
root->root_key.objectid);
if (last_ref) {
@@ -7313,8 +7264,11 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans,
&old_ref_mod, &new_ref_mod);
}
- if (ret == 0 && old_ref_mod >= 0 && new_ref_mod < 0)
- add_pinned_bytes(fs_info, num_bytes, owner, root_objectid);
+ if (ret == 0 && old_ref_mod >= 0 && new_ref_mod < 0) {
+ bool metadata = owner < BTRFS_FIRST_FREE_OBJECTID;
+
+ add_pinned_bytes(fs_info, num_bytes, metadata, root_objectid);
+ }
return ret;
}
@@ -7366,24 +7320,6 @@ wait_block_group_cache_done(struct btrfs_block_group_cache *cache)
return ret;
}
-static const char *btrfs_raid_type_names[BTRFS_NR_RAID_TYPES] = {
- [BTRFS_RAID_RAID10] = "raid10",
- [BTRFS_RAID_RAID1] = "raid1",
- [BTRFS_RAID_DUP] = "dup",
- [BTRFS_RAID_RAID0] = "raid0",
- [BTRFS_RAID_SINGLE] = "single",
- [BTRFS_RAID_RAID5] = "raid5",
- [BTRFS_RAID_RAID6] = "raid6",
-};
-
-static const char *get_raid_name(enum btrfs_raid_types type)
-{
- if (type >= BTRFS_NR_RAID_TYPES)
- return NULL;
-
- return btrfs_raid_type_names[type];
-}
-
enum btrfs_loop_type {
LOOP_CACHING_NOWAIT = 0,
LOOP_CACHING_WAIT = 1,
@@ -7655,7 +7591,7 @@ have_block_group:
if (offset) {
/* we have a block, we're done */
spin_unlock(&last_ptr->refill_lock);
- trace_btrfs_reserve_extent_cluster(fs_info,
+ trace_btrfs_reserve_extent_cluster(
used_block_group,
search_start, num_bytes);
if (used_block_group != block_group) {
@@ -7728,7 +7664,7 @@ refill_cluster:
if (offset) {
/* we found one, proceed */
spin_unlock(&last_ptr->refill_lock);
- trace_btrfs_reserve_extent_cluster(fs_info,
+ trace_btrfs_reserve_extent_cluster(
block_group, search_start,
num_bytes);
goto checks;
@@ -7828,8 +7764,7 @@ checks:
ins->objectid = search_start;
ins->offset = num_bytes;
- trace_btrfs_reserve_extent(fs_info, block_group,
- search_start, num_bytes);
+ trace_btrfs_reserve_extent(block_group, search_start, num_bytes);
btrfs_release_block_group(block_group, delalloc);
break;
loop:
@@ -8177,8 +8112,7 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
btrfs_mark_buffer_dirty(path->nodes[0]);
btrfs_free_path(path);
- ret = remove_from_free_space_tree(trans, fs_info, ins->objectid,
- ins->offset);
+ ret = remove_from_free_space_tree(trans, ins->objectid, ins->offset);
if (ret)
return ret;
@@ -8193,37 +8127,52 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
}
static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
- u64 parent, u64 root_objectid,
- u64 flags, struct btrfs_disk_key *key,
- int level, struct btrfs_key *ins)
+ struct btrfs_delayed_ref_node *node,
+ struct btrfs_delayed_extent_op *extent_op)
{
+ struct btrfs_fs_info *fs_info = trans->fs_info;
int ret;
struct btrfs_extent_item *extent_item;
+ struct btrfs_key extent_key;
struct btrfs_tree_block_info *block_info;
struct btrfs_extent_inline_ref *iref;
struct btrfs_path *path;
struct extent_buffer *leaf;
+ struct btrfs_delayed_tree_ref *ref;
u32 size = sizeof(*extent_item) + sizeof(*iref);
- u64 num_bytes = ins->offset;
+ u64 num_bytes;
+ u64 flags = extent_op->flags_to_set;
bool skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
- if (!skinny_metadata)
+ ref = btrfs_delayed_node_to_tree_ref(node);
+
+ extent_key.objectid = node->bytenr;
+ if (skinny_metadata) {
+ extent_key.offset = ref->level;
+ extent_key.type = BTRFS_METADATA_ITEM_KEY;
+ num_bytes = fs_info->nodesize;
+ } else {
+ extent_key.offset = node->num_bytes;
+ extent_key.type = BTRFS_EXTENT_ITEM_KEY;
size += sizeof(*block_info);
+ num_bytes = node->num_bytes;
+ }
path = btrfs_alloc_path();
if (!path) {
- btrfs_free_and_pin_reserved_extent(fs_info, ins->objectid,
+ btrfs_free_and_pin_reserved_extent(fs_info,
+ extent_key.objectid,
fs_info->nodesize);
return -ENOMEM;
}
path->leave_spinning = 1;
ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path,
- ins, size);
+ &extent_key, size);
if (ret) {
btrfs_free_path(path);
- btrfs_free_and_pin_reserved_extent(fs_info, ins->objectid,
+ btrfs_free_and_pin_reserved_extent(fs_info,
+ extent_key.objectid,
fs_info->nodesize);
return ret;
}
@@ -8238,42 +8187,41 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
if (skinny_metadata) {
iref = (struct btrfs_extent_inline_ref *)(extent_item + 1);
- num_bytes = fs_info->nodesize;
} else {
block_info = (struct btrfs_tree_block_info *)(extent_item + 1);
- btrfs_set_tree_block_key(leaf, block_info, key);
- btrfs_set_tree_block_level(leaf, block_info, level);
+ btrfs_set_tree_block_key(leaf, block_info, &extent_op->key);
+ btrfs_set_tree_block_level(leaf, block_info, ref->level);
iref = (struct btrfs_extent_inline_ref *)(block_info + 1);
}
- if (parent > 0) {
+ if (node->type == BTRFS_SHARED_BLOCK_REF_KEY) {
BUG_ON(!(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF));
btrfs_set_extent_inline_ref_type(leaf, iref,
BTRFS_SHARED_BLOCK_REF_KEY);
- btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
+ btrfs_set_extent_inline_ref_offset(leaf, iref, ref->parent);
} else {
btrfs_set_extent_inline_ref_type(leaf, iref,
BTRFS_TREE_BLOCK_REF_KEY);
- btrfs_set_extent_inline_ref_offset(leaf, iref, root_objectid);
+ btrfs_set_extent_inline_ref_offset(leaf, iref, ref->root);
}
btrfs_mark_buffer_dirty(leaf);
btrfs_free_path(path);
- ret = remove_from_free_space_tree(trans, fs_info, ins->objectid,
+ ret = remove_from_free_space_tree(trans, extent_key.objectid,
num_bytes);
if (ret)
return ret;
- ret = update_block_group(trans, fs_info, ins->objectid,
+ ret = update_block_group(trans, fs_info, extent_key.objectid,
fs_info->nodesize, 1);
if (ret) { /* -ENOENT, logic error */
btrfs_err(fs_info, "update block group failed for %llu %llu",
- ins->objectid, ins->offset);
+ extent_key.objectid, extent_key.offset);
BUG();
}
- trace_btrfs_reserved_extent_alloc(fs_info, ins->objectid,
+ trace_btrfs_reserved_extent_alloc(fs_info, extent_key.objectid,
fs_info->nodesize);
return ret;
}
@@ -10166,8 +10114,7 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info)
} else if (btrfs_block_group_used(&cache->item) == 0) {
cache->last_byte_to_unpin = (u64)-1;
cache->cached = BTRFS_CACHE_FINISHED;
- add_new_free_space(cache, info,
- found_key.objectid,
+ add_new_free_space(cache, found_key.objectid,
found_key.objectid +
found_key.offset);
free_excluded_extents(info, cache);
@@ -10197,6 +10144,7 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info)
/* Should always be true but just in case. */
if (list_empty(&cache->bg_list)) {
btrfs_get_block_group(cache);
+ trace_btrfs_add_unused_block_group(cache);
list_add_tail(&cache->bg_list,
&info->unused_bgs);
}
@@ -10262,7 +10210,7 @@ void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans)
key.offset);
if (ret)
btrfs_abort_transaction(trans, ret);
- add_block_group_free_space(trans, fs_info, block_group);
+ add_block_group_free_space(trans, block_group);
/* already aborted the transaction if it failed. */
next:
list_del_init(&block_group->bg_list);
@@ -10303,7 +10251,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
return ret;
}
- add_new_free_space(cache, fs_info, chunk_offset, chunk_offset + size);
+ add_new_free_space(cache, chunk_offset, chunk_offset + size);
free_excluded_extents(fs_info, cache);
@@ -10384,6 +10332,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
BUG_ON(!block_group);
BUG_ON(!block_group->ro);
+ trace_btrfs_remove_block_group(block_group);
/*
* Free the reserved super bytes from this block group before
* remove it.
@@ -10641,7 +10590,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
mutex_unlock(&fs_info->chunk_mutex);
- ret = remove_block_group_free_space(trans, fs_info, block_group);
+ ret = remove_block_group_free_space(trans, block_group);
if (ret)
goto out;
@@ -10748,6 +10697,7 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
* the ro check in case balance is currently acting on
* this block group.
*/
+ trace_btrfs_skip_unused_block_group(block_group);
spin_unlock(&block_group->lock);
up_write(&space_info->groups_sem);
goto next;
@@ -10870,7 +10820,6 @@ next:
int btrfs_init_space_info(struct btrfs_fs_info *fs_info)
{
- struct btrfs_space_info *space_info;
struct btrfs_super_block *disk_super;
u64 features;
u64 flags;
@@ -10886,21 +10835,21 @@ int btrfs_init_space_info(struct btrfs_fs_info *fs_info)
mixed = 1;
flags = BTRFS_BLOCK_GROUP_SYSTEM;
- ret = create_space_info(fs_info, flags, &space_info);
+ ret = create_space_info(fs_info, flags);
if (ret)
goto out;
if (mixed) {
flags = BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA;
- ret = create_space_info(fs_info, flags, &space_info);
+ ret = create_space_info(fs_info, flags);
} else {
flags = BTRFS_BLOCK_GROUP_METADATA;
- ret = create_space_info(fs_info, flags, &space_info);
+ ret = create_space_info(fs_info, flags);
if (ret)
goto out;
flags = BTRFS_BLOCK_GROUP_DATA;
- ret = create_space_info(fs_info, flags, &space_info);
+ ret = create_space_info(fs_info, flags);
}
out:
return ret;
@@ -11085,12 +11034,7 @@ int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range)
void btrfs_end_write_no_snapshotting(struct btrfs_root *root)
{
percpu_counter_dec(&root->subv_writers->counter);
- /*
- * Make sure counter is updated before we wake up waiters.
- */
- smp_mb();
- if (waitqueue_active(&root->subv_writers->wait))
- wake_up(&root->subv_writers->wait);
+ cond_wake_up(&root->subv_writers->wait);
}
int btrfs_start_write_no_snapshotting(struct btrfs_root *root)
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index e99b329002cf..51fc015c7d2c 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -26,7 +26,7 @@
static struct kmem_cache *extent_state_cache;
static struct kmem_cache *extent_buffer_cache;
-static struct bio_set *btrfs_bioset;
+static struct bio_set btrfs_bioset;
static inline bool extent_state_in_tree(const struct extent_state *state)
{
@@ -162,20 +162,18 @@ int __init extent_io_init(void)
if (!extent_buffer_cache)
goto free_state_cache;
- btrfs_bioset = bioset_create(BIO_POOL_SIZE,
- offsetof(struct btrfs_io_bio, bio),
- BIOSET_NEED_BVECS);
- if (!btrfs_bioset)
+ if (bioset_init(&btrfs_bioset, BIO_POOL_SIZE,
+ offsetof(struct btrfs_io_bio, bio),
+ BIOSET_NEED_BVECS))
goto free_buffer_cache;
- if (bioset_integrity_create(btrfs_bioset, BIO_POOL_SIZE))
+ if (bioset_integrity_create(&btrfs_bioset, BIO_POOL_SIZE))
goto free_bioset;
return 0;
free_bioset:
- bioset_free(btrfs_bioset);
- btrfs_bioset = NULL;
+ bioset_exit(&btrfs_bioset);
free_buffer_cache:
kmem_cache_destroy(extent_buffer_cache);
@@ -198,8 +196,7 @@ void __cold extent_io_exit(void)
rcu_barrier();
kmem_cache_destroy(extent_state_cache);
kmem_cache_destroy(extent_buffer_cache);
- if (btrfs_bioset)
- bioset_free(btrfs_bioset);
+ bioset_exit(&btrfs_bioset);
}
void extent_io_tree_init(struct extent_io_tree *tree,
@@ -2679,7 +2676,7 @@ struct bio *btrfs_bio_alloc(struct block_device *bdev, u64 first_byte)
{
struct bio *bio;
- bio = bio_alloc_bioset(GFP_NOFS, BIO_MAX_PAGES, btrfs_bioset);
+ bio = bio_alloc_bioset(GFP_NOFS, BIO_MAX_PAGES, &btrfs_bioset);
bio_set_dev(bio, bdev);
bio->bi_iter.bi_sector = first_byte >> 9;
btrfs_io_bio_init(btrfs_io_bio(bio));
@@ -2692,7 +2689,7 @@ struct bio *btrfs_bio_clone(struct bio *bio)
struct bio *new;
/* Bio allocation backed by a bioset does not fail */
- new = bio_clone_fast(bio, GFP_NOFS, btrfs_bioset);
+ new = bio_clone_fast(bio, GFP_NOFS, &btrfs_bioset);
btrfs_bio = btrfs_io_bio(new);
btrfs_io_bio_init(btrfs_bio);
btrfs_bio->iter = bio->bi_iter;
@@ -2704,7 +2701,7 @@ struct bio *btrfs_io_bio_alloc(unsigned int nr_iovecs)
struct bio *bio;
/* Bio allocation backed by a bioset does not fail */
- bio = bio_alloc_bioset(GFP_NOFS, nr_iovecs, btrfs_bioset);
+ bio = bio_alloc_bioset(GFP_NOFS, nr_iovecs, &btrfs_bioset);
btrfs_io_bio_init(btrfs_io_bio(bio));
return bio;
}
@@ -2715,7 +2712,7 @@ struct bio *btrfs_bio_clone_partial(struct bio *orig, int offset, int size)
struct btrfs_io_bio *btrfs_bio;
/* this will never fail when it's backed by a bioset */
- bio = bio_clone_fast(orig, GFP_NOFS, btrfs_bioset);
+ bio = bio_clone_fast(orig, GFP_NOFS, &btrfs_bioset);
ASSERT(bio);
btrfs_bio = btrfs_io_bio(bio);
@@ -4109,14 +4106,13 @@ int extent_write_locked_range(struct inode *inode, u64 start, u64 end,
return ret;
}
-int extent_writepages(struct extent_io_tree *tree,
- struct address_space *mapping,
+int extent_writepages(struct address_space *mapping,
struct writeback_control *wbc)
{
int ret = 0;
struct extent_page_data epd = {
.bio = NULL,
- .tree = tree,
+ .tree = &BTRFS_I(mapping->host)->io_tree,
.extent_locked = 0,
.sync_io = wbc->sync_mode == WB_SYNC_ALL,
};
@@ -4126,9 +4122,8 @@ int extent_writepages(struct extent_io_tree *tree,
return ret;
}
-int extent_readpages(struct extent_io_tree *tree,
- struct address_space *mapping,
- struct list_head *pages, unsigned nr_pages)
+int extent_readpages(struct address_space *mapping, struct list_head *pages,
+ unsigned nr_pages)
{
struct bio *bio = NULL;
unsigned page_idx;
@@ -4136,6 +4131,7 @@ int extent_readpages(struct extent_io_tree *tree,
struct page *pagepool[16];
struct page *page;
struct extent_map *em_cached = NULL;
+ struct extent_io_tree *tree = &BTRFS_I(mapping->host)->io_tree;
int nr = 0;
u64 prev_em_start = (u64)-1;
@@ -4202,8 +4198,7 @@ int extent_invalidatepage(struct extent_io_tree *tree,
* are locked or under IO and drops the related state bits if it is safe
* to drop the page.
*/
-static int try_release_extent_state(struct extent_map_tree *map,
- struct extent_io_tree *tree,
+static int try_release_extent_state(struct extent_io_tree *tree,
struct page *page, gfp_t mask)
{
u64 start = page_offset(page);
@@ -4238,13 +4233,13 @@ static int try_release_extent_state(struct extent_map_tree *map,
* in the range corresponding to the page, both state records and extent
* map records are removed
*/
-int try_release_extent_mapping(struct extent_map_tree *map,
- struct extent_io_tree *tree, struct page *page,
- gfp_t mask)
+int try_release_extent_mapping(struct page *page, gfp_t mask)
{
struct extent_map *em;
u64 start = page_offset(page);
u64 end = start + PAGE_SIZE - 1;
+ struct extent_io_tree *tree = &BTRFS_I(page->mapping->host)->io_tree;
+ struct extent_map_tree *map = &BTRFS_I(page->mapping->host)->extent_tree;
if (gfpflags_allow_blocking(mask) &&
page->mapping->host->i_size > SZ_16M) {
@@ -4278,7 +4273,7 @@ int try_release_extent_mapping(struct extent_map_tree *map,
free_extent_map(em);
}
}
- return try_release_extent_state(map, tree, page, mask);
+ return try_release_extent_state(tree, page, mask);
}
/*
@@ -5620,46 +5615,6 @@ void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src,
}
}
-void le_bitmap_set(u8 *map, unsigned int start, int len)
-{
- u8 *p = map + BIT_BYTE(start);
- const unsigned int size = start + len;
- int bits_to_set = BITS_PER_BYTE - (start % BITS_PER_BYTE);
- u8 mask_to_set = BITMAP_FIRST_BYTE_MASK(start);
-
- while (len - bits_to_set >= 0) {
- *p |= mask_to_set;
- len -= bits_to_set;
- bits_to_set = BITS_PER_BYTE;
- mask_to_set = ~0;
- p++;
- }
- if (len) {
- mask_to_set &= BITMAP_LAST_BYTE_MASK(size);
- *p |= mask_to_set;
- }
-}
-
-void le_bitmap_clear(u8 *map, unsigned int start, int len)
-{
- u8 *p = map + BIT_BYTE(start);
- const unsigned int size = start + len;
- int bits_to_clear = BITS_PER_BYTE - (start % BITS_PER_BYTE);
- u8 mask_to_clear = BITMAP_FIRST_BYTE_MASK(start);
-
- while (len - bits_to_clear >= 0) {
- *p &= ~mask_to_clear;
- len -= bits_to_clear;
- bits_to_clear = BITS_PER_BYTE;
- mask_to_clear = ~0;
- p++;
- }
- if (len) {
- mask_to_clear &= BITMAP_LAST_BYTE_MASK(size);
- *p &= ~mask_to_clear;
- }
-}
-
/*
* eb_bitmap_offset() - calculate the page and offset of the byte containing the
* given bit number
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index a53009694b16..0bfd4aeb822d 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -79,14 +79,6 @@
#define BITMAP_LAST_BYTE_MASK(nbits) \
(BYTE_MASK >> (-(nbits) & (BITS_PER_BYTE - 1)))
-static inline int le_test_bit(int nr, const u8 *addr)
-{
- return 1U & (addr[BIT_BYTE(nr)] >> (nr & (BITS_PER_BYTE-1)));
-}
-
-void le_bitmap_set(u8 *map, unsigned int start, int len);
-void le_bitmap_clear(u8 *map, unsigned int start, int len);
-
struct extent_state;
struct btrfs_root;
struct btrfs_inode;
@@ -278,9 +270,7 @@ typedef struct extent_map *(get_extent_t)(struct btrfs_inode *inode,
int create);
void extent_io_tree_init(struct extent_io_tree *tree, void *private_data);
-int try_release_extent_mapping(struct extent_map_tree *map,
- struct extent_io_tree *tree, struct page *page,
- gfp_t mask);
+int try_release_extent_mapping(struct page *page, gfp_t mask);
int try_release_extent_buffer(struct page *page);
int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
struct extent_state **cached);
@@ -421,14 +411,12 @@ int extent_invalidatepage(struct extent_io_tree *tree,
int extent_write_full_page(struct page *page, struct writeback_control *wbc);
int extent_write_locked_range(struct inode *inode, u64 start, u64 end,
int mode);
-int extent_writepages(struct extent_io_tree *tree,
- struct address_space *mapping,
+int extent_writepages(struct address_space *mapping,
struct writeback_control *wbc);
int btree_write_cache_pages(struct address_space *mapping,
struct writeback_control *wbc);
-int extent_readpages(struct extent_io_tree *tree,
- struct address_space *mapping,
- struct list_head *pages, unsigned nr_pages);
+int extent_readpages(struct address_space *mapping, struct list_head *pages,
+ unsigned nr_pages);
int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
__u64 start, __u64 len);
void set_page_extent_mapped(struct page *page);
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index 1b8a078f92eb..6648d55e5339 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -518,6 +518,7 @@ static noinline int merge_extent_mapping(struct extent_map_tree *em_tree,
/**
* btrfs_add_extent_mapping - add extent mapping into em_tree
+ * @fs_info - used for tracepoint
* @em_tree - the extent tree into which we want to insert the extent mapping
* @em_in - extent we are inserting
* @start - start of the logical range btrfs_get_extent() is requesting
@@ -535,7 +536,8 @@ static noinline int merge_extent_mapping(struct extent_map_tree *em_tree,
* Return 0 on success, otherwise -EEXIST.
*
*/
-int btrfs_add_extent_mapping(struct extent_map_tree *em_tree,
+int btrfs_add_extent_mapping(struct btrfs_fs_info *fs_info,
+ struct extent_map_tree *em_tree,
struct extent_map **em_in, u64 start, u64 len)
{
int ret;
@@ -553,7 +555,7 @@ int btrfs_add_extent_mapping(struct extent_map_tree *em_tree,
existing = search_extent_mapping(em_tree, start, len);
- trace_btrfs_handle_em_exist(existing, em, start, len);
+ trace_btrfs_handle_em_exist(fs_info, existing, em, start, len);
/*
* existing will always be non-NULL, since there must be
diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h
index 5fcb80a6ce37..25d985e7532a 100644
--- a/fs/btrfs/extent_map.h
+++ b/fs/btrfs/extent_map.h
@@ -92,7 +92,8 @@ int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len, u64 gen
void clear_em_logging(struct extent_map_tree *tree, struct extent_map *em);
struct extent_map *search_extent_mapping(struct extent_map_tree *tree,
u64 start, u64 len);
-int btrfs_add_extent_mapping(struct extent_map_tree *em_tree,
+int btrfs_add_extent_mapping(struct btrfs_fs_info *fs_info,
+ struct extent_map_tree *em_tree,
struct extent_map **em_in, u64 start, u64 len);
#endif
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index e5b569bebc73..d5f80cb300be 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -253,10 +253,8 @@ int btrfs_truncate_free_space_cache(struct btrfs_trans_handle *trans,
truncate_pagecache(inode, 0);
/*
- * We don't need an orphan item because truncating the free space cache
- * will never be split across transactions.
- * We don't need to check for -EAGAIN because we're a free space
- * cache inode
+ * We skip the throttling logic for free space cache inodes, so we don't
+ * need to check for -EAGAIN.
*/
ret = btrfs_truncate_inode_items(trans, root, inode,
0, BTRFS_EXTENT_DATA_KEY);
diff --git a/fs/btrfs/free-space-tree.c b/fs/btrfs/free-space-tree.c
index 32a0f6cb5594..b5950aacd697 100644
--- a/fs/btrfs/free-space-tree.c
+++ b/fs/btrfs/free-space-tree.c
@@ -12,7 +12,6 @@
#include "transaction.h"
static int __add_block_group_free_space(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
struct btrfs_block_group_cache *block_group,
struct btrfs_path *path);
@@ -45,11 +44,10 @@ void set_free_space_tree_thresholds(struct btrfs_block_group_cache *cache)
}
static int add_new_free_space_info(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
struct btrfs_block_group_cache *block_group,
struct btrfs_path *path)
{
- struct btrfs_root *root = fs_info->free_space_root;
+ struct btrfs_root *root = trans->fs_info->free_space_root;
struct btrfs_free_space_info *info;
struct btrfs_key key;
struct extent_buffer *leaf;
@@ -138,10 +136,11 @@ static inline u32 free_space_bitmap_size(u64 size, u32 sectorsize)
return DIV_ROUND_UP((u32)div_u64(size, sectorsize), BITS_PER_BYTE);
}
-static u8 *alloc_bitmap(u32 bitmap_size)
+static unsigned long *alloc_bitmap(u32 bitmap_size)
{
- u8 *ret;
+ unsigned long *ret;
unsigned int nofs_flag;
+ u32 bitmap_rounded_size = round_up(bitmap_size, sizeof(unsigned long));
/*
* GFP_NOFS doesn't work with kvmalloc(), but we really can't recurse
@@ -152,21 +151,42 @@ static u8 *alloc_bitmap(u32 bitmap_size)
* know that recursion is unsafe.
*/
nofs_flag = memalloc_nofs_save();
- ret = kvzalloc(bitmap_size, GFP_KERNEL);
+ ret = kvzalloc(bitmap_rounded_size, GFP_KERNEL);
memalloc_nofs_restore(nofs_flag);
return ret;
}
+static void le_bitmap_set(unsigned long *map, unsigned int start, int len)
+{
+ u8 *p = ((u8 *)map) + BIT_BYTE(start);
+ const unsigned int size = start + len;
+ int bits_to_set = BITS_PER_BYTE - (start % BITS_PER_BYTE);
+ u8 mask_to_set = BITMAP_FIRST_BYTE_MASK(start);
+
+ while (len - bits_to_set >= 0) {
+ *p |= mask_to_set;
+ len -= bits_to_set;
+ bits_to_set = BITS_PER_BYTE;
+ mask_to_set = ~0;
+ p++;
+ }
+ if (len) {
+ mask_to_set &= BITMAP_LAST_BYTE_MASK(size);
+ *p |= mask_to_set;
+ }
+}
+
int convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
struct btrfs_block_group_cache *block_group,
struct btrfs_path *path)
{
+ struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_root *root = fs_info->free_space_root;
struct btrfs_free_space_info *info;
struct btrfs_key key, found_key;
struct extent_buffer *leaf;
- u8 *bitmap, *bitmap_cursor;
+ unsigned long *bitmap;
+ char *bitmap_cursor;
u64 start, end;
u64 bitmap_range, i;
u32 bitmap_size, flags, expected_extent_count;
@@ -255,7 +275,7 @@ int convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans,
goto out;
}
- bitmap_cursor = bitmap;
+ bitmap_cursor = (char *)bitmap;
bitmap_range = fs_info->sectorsize * BTRFS_FREE_SPACE_BITMAP_BITS;
i = start;
while (i < end) {
@@ -296,21 +316,18 @@ out:
}
int convert_free_space_to_extents(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
struct btrfs_block_group_cache *block_group,
struct btrfs_path *path)
{
+ struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_root *root = fs_info->free_space_root;
struct btrfs_free_space_info *info;
struct btrfs_key key, found_key;
struct extent_buffer *leaf;
- u8 *bitmap;
+ unsigned long *bitmap;
u64 start, end;
- /* Initialize to silence GCC. */
- u64 extent_start = 0;
- u64 offset;
u32 bitmap_size, flags, expected_extent_count;
- int prev_bit = 0, bit, bitnr;
+ unsigned long nrbits, start_bit, end_bit;
u32 extent_count = 0;
int done = 0, nr;
int ret;
@@ -348,7 +365,7 @@ int convert_free_space_to_extents(struct btrfs_trans_handle *trans,
break;
} else if (found_key.type == BTRFS_FREE_SPACE_BITMAP_KEY) {
unsigned long ptr;
- u8 *bitmap_cursor;
+ char *bitmap_cursor;
u32 bitmap_pos, data_size;
ASSERT(found_key.objectid >= start);
@@ -358,7 +375,7 @@ int convert_free_space_to_extents(struct btrfs_trans_handle *trans,
bitmap_pos = div_u64(found_key.objectid - start,
fs_info->sectorsize *
BITS_PER_BYTE);
- bitmap_cursor = bitmap + bitmap_pos;
+ bitmap_cursor = ((char *)bitmap) + bitmap_pos;
data_size = free_space_bitmap_size(found_key.offset,
fs_info->sectorsize);
@@ -392,32 +409,16 @@ int convert_free_space_to_extents(struct btrfs_trans_handle *trans,
btrfs_mark_buffer_dirty(leaf);
btrfs_release_path(path);
- offset = start;
- bitnr = 0;
- while (offset < end) {
- bit = !!le_test_bit(bitnr, bitmap);
- if (prev_bit == 0 && bit == 1) {
- extent_start = offset;
- } else if (prev_bit == 1 && bit == 0) {
- key.objectid = extent_start;
- key.type = BTRFS_FREE_SPACE_EXTENT_KEY;
- key.offset = offset - extent_start;
-
- ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
- if (ret)
- goto out;
- btrfs_release_path(path);
+ nrbits = div_u64(block_group->key.offset, block_group->fs_info->sectorsize);
+ start_bit = find_next_bit_le(bitmap, nrbits, 0);
- extent_count++;
- }
- prev_bit = bit;
- offset += fs_info->sectorsize;
- bitnr++;
- }
- if (prev_bit == 1) {
- key.objectid = extent_start;
+ while (start_bit < nrbits) {
+ end_bit = find_next_zero_bit_le(bitmap, nrbits, start_bit);
+ ASSERT(start_bit < end_bit);
+
+ key.objectid = start + start_bit * block_group->fs_info->sectorsize;
key.type = BTRFS_FREE_SPACE_EXTENT_KEY;
- key.offset = end - extent_start;
+ key.offset = (end_bit - start_bit) * block_group->fs_info->sectorsize;
ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
if (ret)
@@ -425,6 +426,8 @@ int convert_free_space_to_extents(struct btrfs_trans_handle *trans,
btrfs_release_path(path);
extent_count++;
+
+ start_bit = find_next_bit_le(bitmap, nrbits, end_bit);
}
if (extent_count != expected_extent_count) {
@@ -446,7 +449,6 @@ out:
}
static int update_free_space_extent_count(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
struct btrfs_block_group_cache *block_group,
struct btrfs_path *path,
int new_extents)
@@ -459,7 +461,8 @@ static int update_free_space_extent_count(struct btrfs_trans_handle *trans,
if (new_extents == 0)
return 0;
- info = search_free_space_info(trans, fs_info, block_group, path, 1);
+ info = search_free_space_info(trans, trans->fs_info, block_group, path,
+ 1);
if (IS_ERR(info)) {
ret = PTR_ERR(info);
goto out;
@@ -474,12 +477,10 @@ static int update_free_space_extent_count(struct btrfs_trans_handle *trans,
if (!(flags & BTRFS_FREE_SPACE_USING_BITMAPS) &&
extent_count > block_group->bitmap_high_thresh) {
- ret = convert_free_space_to_bitmaps(trans, fs_info, block_group,
- path);
+ ret = convert_free_space_to_bitmaps(trans, block_group, path);
} else if ((flags & BTRFS_FREE_SPACE_USING_BITMAPS) &&
extent_count < block_group->bitmap_low_thresh) {
- ret = convert_free_space_to_extents(trans, fs_info, block_group,
- path);
+ ret = convert_free_space_to_extents(trans, block_group, path);
}
out:
@@ -576,12 +577,11 @@ static int free_space_next_bitmap(struct btrfs_trans_handle *trans,
* the bitmap.
*/
static int modify_free_space_bitmap(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
struct btrfs_block_group_cache *block_group,
struct btrfs_path *path,
u64 start, u64 size, int remove)
{
- struct btrfs_root *root = fs_info->free_space_root;
+ struct btrfs_root *root = block_group->fs_info->free_space_root;
struct btrfs_key key;
u64 end = start + size;
u64 cur_start, cur_size;
@@ -682,7 +682,7 @@ static int modify_free_space_bitmap(struct btrfs_trans_handle *trans,
}
btrfs_release_path(path);
- ret = update_free_space_extent_count(trans, fs_info, block_group, path,
+ ret = update_free_space_extent_count(trans, block_group, path,
new_extents);
out:
@@ -690,12 +690,11 @@ out:
}
static int remove_free_space_extent(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
struct btrfs_block_group_cache *block_group,
struct btrfs_path *path,
u64 start, u64 size)
{
- struct btrfs_root *root = fs_info->free_space_root;
+ struct btrfs_root *root = trans->fs_info->free_space_root;
struct btrfs_key key;
u64 found_start, found_end;
u64 end = start + size;
@@ -769,7 +768,7 @@ static int remove_free_space_extent(struct btrfs_trans_handle *trans,
}
btrfs_release_path(path);
- ret = update_free_space_extent_count(trans, fs_info, block_group, path,
+ ret = update_free_space_extent_count(trans, block_group, path,
new_extents);
out:
@@ -777,7 +776,6 @@ out:
}
int __remove_from_free_space_tree(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
struct btrfs_block_group_cache *block_group,
struct btrfs_path *path, u64 start, u64 size)
{
@@ -786,36 +784,35 @@ int __remove_from_free_space_tree(struct btrfs_trans_handle *trans,
int ret;
if (block_group->needs_free_space) {
- ret = __add_block_group_free_space(trans, fs_info, block_group,
- path);
+ ret = __add_block_group_free_space(trans, block_group, path);
if (ret)
return ret;
}
- info = search_free_space_info(NULL, fs_info, block_group, path, 0);
+ info = search_free_space_info(NULL, trans->fs_info, block_group, path,
+ 0);
if (IS_ERR(info))
return PTR_ERR(info);
flags = btrfs_free_space_flags(path->nodes[0], info);
btrfs_release_path(path);
if (flags & BTRFS_FREE_SPACE_USING_BITMAPS) {
- return modify_free_space_bitmap(trans, fs_info, block_group,
- path, start, size, 1);
+ return modify_free_space_bitmap(trans, block_group, path,
+ start, size, 1);
} else {
- return remove_free_space_extent(trans, fs_info, block_group,
- path, start, size);
+ return remove_free_space_extent(trans, block_group, path,
+ start, size);
}
}
int remove_from_free_space_tree(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
u64 start, u64 size)
{
struct btrfs_block_group_cache *block_group;
struct btrfs_path *path;
int ret;
- if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE))
+ if (!btrfs_fs_compat_ro(trans->fs_info, FREE_SPACE_TREE))
return 0;
path = btrfs_alloc_path();
@@ -824,7 +821,7 @@ int remove_from_free_space_tree(struct btrfs_trans_handle *trans,
goto out;
}
- block_group = btrfs_lookup_block_group(fs_info, start);
+ block_group = btrfs_lookup_block_group(trans->fs_info, start);
if (!block_group) {
ASSERT(0);
ret = -ENOENT;
@@ -832,8 +829,8 @@ int remove_from_free_space_tree(struct btrfs_trans_handle *trans,
}
mutex_lock(&block_group->free_space_lock);
- ret = __remove_from_free_space_tree(trans, fs_info, block_group, path,
- start, size);
+ ret = __remove_from_free_space_tree(trans, block_group, path, start,
+ size);
mutex_unlock(&block_group->free_space_lock);
btrfs_put_block_group(block_group);
@@ -845,12 +842,11 @@ out:
}
static int add_free_space_extent(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
struct btrfs_block_group_cache *block_group,
struct btrfs_path *path,
u64 start, u64 size)
{
- struct btrfs_root *root = fs_info->free_space_root;
+ struct btrfs_root *root = trans->fs_info->free_space_root;
struct btrfs_key key, new_key;
u64 found_start, found_end;
u64 end = start + size;
@@ -965,7 +961,7 @@ insert:
goto out;
btrfs_release_path(path);
- ret = update_free_space_extent_count(trans, fs_info, block_group, path,
+ ret = update_free_space_extent_count(trans, block_group, path,
new_extents);
out:
@@ -973,17 +969,16 @@ out:
}
int __add_to_free_space_tree(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
struct btrfs_block_group_cache *block_group,
struct btrfs_path *path, u64 start, u64 size)
{
+ struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_free_space_info *info;
u32 flags;
int ret;
if (block_group->needs_free_space) {
- ret = __add_block_group_free_space(trans, fs_info, block_group,
- path);
+ ret = __add_block_group_free_space(trans, block_group, path);
if (ret)
return ret;
}
@@ -995,23 +990,22 @@ int __add_to_free_space_tree(struct btrfs_trans_handle *trans,
btrfs_release_path(path);
if (flags & BTRFS_FREE_SPACE_USING_BITMAPS) {
- return modify_free_space_bitmap(trans, fs_info, block_group,
- path, start, size, 0);
+ return modify_free_space_bitmap(trans, block_group, path,
+ start, size, 0);
} else {
- return add_free_space_extent(trans, fs_info, block_group, path,
- start, size);
+ return add_free_space_extent(trans, block_group, path, start,
+ size);
}
}
int add_to_free_space_tree(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
u64 start, u64 size)
{
struct btrfs_block_group_cache *block_group;
struct btrfs_path *path;
int ret;
- if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE))
+ if (!btrfs_fs_compat_ro(trans->fs_info, FREE_SPACE_TREE))
return 0;
path = btrfs_alloc_path();
@@ -1020,7 +1014,7 @@ int add_to_free_space_tree(struct btrfs_trans_handle *trans,
goto out;
}
- block_group = btrfs_lookup_block_group(fs_info, start);
+ block_group = btrfs_lookup_block_group(trans->fs_info, start);
if (!block_group) {
ASSERT(0);
ret = -ENOENT;
@@ -1028,8 +1022,7 @@ int add_to_free_space_tree(struct btrfs_trans_handle *trans,
}
mutex_lock(&block_group->free_space_lock);
- ret = __add_to_free_space_tree(trans, fs_info, block_group, path, start,
- size);
+ ret = __add_to_free_space_tree(trans, block_group, path, start, size);
mutex_unlock(&block_group->free_space_lock);
btrfs_put_block_group(block_group);
@@ -1046,10 +1039,9 @@ out:
* through the normal add/remove hooks.
*/
static int populate_free_space_tree(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
struct btrfs_block_group_cache *block_group)
{
- struct btrfs_root *extent_root = fs_info->extent_root;
+ struct btrfs_root *extent_root = trans->fs_info->extent_root;
struct btrfs_path *path, *path2;
struct btrfs_key key;
u64 start, end;
@@ -1066,7 +1058,7 @@ static int populate_free_space_tree(struct btrfs_trans_handle *trans,
return -ENOMEM;
}
- ret = add_new_free_space_info(trans, fs_info, block_group, path2);
+ ret = add_new_free_space_info(trans, block_group, path2);
if (ret)
goto out;
@@ -1099,7 +1091,7 @@ static int populate_free_space_tree(struct btrfs_trans_handle *trans,
break;
if (start < key.objectid) {
- ret = __add_to_free_space_tree(trans, fs_info,
+ ret = __add_to_free_space_tree(trans,
block_group,
path2, start,
key.objectid -
@@ -1109,7 +1101,7 @@ static int populate_free_space_tree(struct btrfs_trans_handle *trans,
}
start = key.objectid;
if (key.type == BTRFS_METADATA_ITEM_KEY)
- start += fs_info->nodesize;
+ start += trans->fs_info->nodesize;
else
start += key.offset;
} else if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
@@ -1124,8 +1116,8 @@ static int populate_free_space_tree(struct btrfs_trans_handle *trans,
break;
}
if (start < end) {
- ret = __add_to_free_space_tree(trans, fs_info, block_group,
- path2, start, end - start);
+ ret = __add_to_free_space_tree(trans, block_group, path2,
+ start, end - start);
if (ret)
goto out_locked;
}
@@ -1165,7 +1157,7 @@ int btrfs_create_free_space_tree(struct btrfs_fs_info *fs_info)
while (node) {
block_group = rb_entry(node, struct btrfs_block_group_cache,
cache_node);
- ret = populate_free_space_tree(trans, fs_info, block_group);
+ ret = populate_free_space_tree(trans, block_group);
if (ret)
goto abort;
node = rb_next(node);
@@ -1269,7 +1261,6 @@ abort:
}
static int __add_block_group_free_space(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
struct btrfs_block_group_cache *block_group,
struct btrfs_path *path)
{
@@ -1277,19 +1268,19 @@ static int __add_block_group_free_space(struct btrfs_trans_handle *trans,
block_group->needs_free_space = 0;
- ret = add_new_free_space_info(trans, fs_info, block_group, path);
+ ret = add_new_free_space_info(trans, block_group, path);
if (ret)
return ret;
- return __add_to_free_space_tree(trans, fs_info, block_group, path,
+ return __add_to_free_space_tree(trans, block_group, path,
block_group->key.objectid,
block_group->key.offset);
}
int add_block_group_free_space(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
struct btrfs_block_group_cache *block_group)
{
+ struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_path *path = NULL;
int ret = 0;
@@ -1306,7 +1297,7 @@ int add_block_group_free_space(struct btrfs_trans_handle *trans,
goto out;
}
- ret = __add_block_group_free_space(trans, fs_info, block_group, path);
+ ret = __add_block_group_free_space(trans, block_group, path);
out:
btrfs_free_path(path);
@@ -1317,10 +1308,9 @@ out:
}
int remove_block_group_free_space(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
struct btrfs_block_group_cache *block_group)
{
- struct btrfs_root *root = fs_info->free_space_root;
+ struct btrfs_root *root = trans->fs_info->free_space_root;
struct btrfs_path *path;
struct btrfs_key key, found_key;
struct extent_buffer *leaf;
@@ -1328,7 +1318,7 @@ int remove_block_group_free_space(struct btrfs_trans_handle *trans,
int done = 0, nr;
int ret;
- if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE))
+ if (!btrfs_fs_compat_ro(trans->fs_info, FREE_SPACE_TREE))
return 0;
if (block_group->needs_free_space) {
@@ -1439,7 +1429,6 @@ static int load_free_space_bitmaps(struct btrfs_caching_control *caching_ctl,
extent_start = offset;
} else if (prev_bit == 1 && bit == 0) {
total_found += add_new_free_space(block_group,
- fs_info,
extent_start,
offset);
if (total_found > CACHING_CTL_WAKE_UP) {
@@ -1453,8 +1442,8 @@ static int load_free_space_bitmaps(struct btrfs_caching_control *caching_ctl,
}
}
if (prev_bit == 1) {
- total_found += add_new_free_space(block_group, fs_info,
- extent_start, end);
+ total_found += add_new_free_space(block_group, extent_start,
+ end);
extent_count++;
}
@@ -1511,8 +1500,7 @@ static int load_free_space_extents(struct btrfs_caching_control *caching_ctl,
caching_ctl->progress = key.objectid;
- total_found += add_new_free_space(block_group, fs_info,
- key.objectid,
+ total_found += add_new_free_space(block_group, key.objectid,
key.objectid + key.offset);
if (total_found > CACHING_CTL_WAKE_UP) {
total_found = 0;
diff --git a/fs/btrfs/free-space-tree.h b/fs/btrfs/free-space-tree.h
index 874b4feecad2..3133651d7d70 100644
--- a/fs/btrfs/free-space-tree.h
+++ b/fs/btrfs/free-space-tree.h
@@ -19,16 +19,12 @@ int btrfs_create_free_space_tree(struct btrfs_fs_info *fs_info);
int btrfs_clear_free_space_tree(struct btrfs_fs_info *fs_info);
int load_free_space_tree(struct btrfs_caching_control *caching_ctl);
int add_block_group_free_space(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
struct btrfs_block_group_cache *block_group);
int remove_block_group_free_space(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
struct btrfs_block_group_cache *block_group);
int add_to_free_space_tree(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
u64 start, u64 size);
int remove_from_free_space_tree(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
u64 start, u64 size);
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
@@ -38,19 +34,15 @@ search_free_space_info(struct btrfs_trans_handle *trans,
struct btrfs_block_group_cache *block_group,
struct btrfs_path *path, int cow);
int __add_to_free_space_tree(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
struct btrfs_block_group_cache *block_group,
struct btrfs_path *path, u64 start, u64 size);
int __remove_from_free_space_tree(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
struct btrfs_block_group_cache *block_group,
struct btrfs_path *path, u64 start, u64 size);
int convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
struct btrfs_block_group_cache *block_group,
struct btrfs_path *path);
int convert_free_space_to_extents(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
struct btrfs_block_group_cache *block_group,
struct btrfs_path *path);
int free_space_test_bit(struct btrfs_block_group_cache *block_group,
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index d241285a0d2a..89b208201783 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1018,8 +1018,10 @@ static noinline int cow_file_range(struct inode *inode,
ram_size, /* ram_bytes */
BTRFS_COMPRESS_NONE, /* compress_type */
BTRFS_ORDERED_REGULAR /* type */);
- if (IS_ERR(em))
+ if (IS_ERR(em)) {
+ ret = PTR_ERR(em);
goto out_reserve;
+ }
free_extent_map(em);
ret = btrfs_add_ordered_extent(inode, start, ins.objectid,
@@ -1156,13 +1158,10 @@ static noinline void async_cow_submit(struct btrfs_work *work)
nr_pages = (async_cow->end - async_cow->start + PAGE_SIZE) >>
PAGE_SHIFT;
- /*
- * atomic_sub_return implies a barrier for waitqueue_active
- */
+ /* atomic_sub_return implies a barrier */
if (atomic_sub_return(nr_pages, &fs_info->async_delalloc_pages) <
- 5 * SZ_1M &&
- waitqueue_active(&fs_info->async_submit_wait))
- wake_up(&fs_info->async_submit_wait);
+ 5 * SZ_1M)
+ cond_wake_up_nomb(&fs_info->async_submit_wait);
if (async_cow->inode)
submit_compressed_extents(async_cow->inode, async_cow);
@@ -1373,6 +1372,13 @@ next_slot:
btrfs_file_extent_encryption(leaf, fi) ||
btrfs_file_extent_other_encoding(leaf, fi))
goto out_check;
+ /*
+ * Do the same check as in btrfs_cross_ref_exist but
+ * without the unnecessary search.
+ */
+ if (btrfs_file_extent_generation(leaf, fi) <=
+ btrfs_root_last_snapshot(&root->root_item))
+ goto out_check;
if (extent_type == BTRFS_FILE_EXTENT_REG && !force)
goto out_check;
if (btrfs_extent_readonly(fs_info, disk_bytenr))
@@ -1742,24 +1748,32 @@ static void btrfs_add_delalloc_inodes(struct btrfs_root *root,
spin_unlock(&root->delalloc_lock);
}
-static void btrfs_del_delalloc_inode(struct btrfs_root *root,
- struct btrfs_inode *inode)
+
+void __btrfs_del_delalloc_inode(struct btrfs_root *root,
+ struct btrfs_inode *inode)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
- spin_lock(&root->delalloc_lock);
if (!list_empty(&inode->delalloc_inodes)) {
list_del_init(&inode->delalloc_inodes);
clear_bit(BTRFS_INODE_IN_DELALLOC_LIST,
&inode->runtime_flags);
root->nr_delalloc_inodes--;
if (!root->nr_delalloc_inodes) {
+ ASSERT(list_empty(&root->delalloc_inodes));
spin_lock(&fs_info->delalloc_root_lock);
BUG_ON(list_empty(&root->delalloc_root));
list_del_init(&root->delalloc_root);
spin_unlock(&fs_info->delalloc_root_lock);
}
}
+}
+
+static void btrfs_del_delalloc_inode(struct btrfs_root *root,
+ struct btrfs_inode *inode)
+{
+ spin_lock(&root->delalloc_lock);
+ __btrfs_del_delalloc_inode(root, inode);
spin_unlock(&root->delalloc_lock);
}
@@ -3151,6 +3165,9 @@ out:
/* once for the tree */
btrfs_put_ordered_extent(ordered_extent);
+ /* Try to release some metadata so we don't get an OOM but don't wait */
+ btrfs_btree_balance_dirty_nodelay(fs_info);
+
return ret;
}
@@ -3293,177 +3310,31 @@ void btrfs_run_delayed_iputs(struct btrfs_fs_info *fs_info)
}
/*
- * This is called in transaction commit time. If there are no orphan
- * files in the subvolume, it removes orphan item and frees block_rsv
- * structure.
- */
-void btrfs_orphan_commit_root(struct btrfs_trans_handle *trans,
- struct btrfs_root *root)
-{
- struct btrfs_fs_info *fs_info = root->fs_info;
- struct btrfs_block_rsv *block_rsv;
- int ret;
-
- if (atomic_read(&root->orphan_inodes) ||
- root->orphan_cleanup_state != ORPHAN_CLEANUP_DONE)
- return;
-
- spin_lock(&root->orphan_lock);
- if (atomic_read(&root->orphan_inodes)) {
- spin_unlock(&root->orphan_lock);
- return;
- }
-
- if (root->orphan_cleanup_state != ORPHAN_CLEANUP_DONE) {
- spin_unlock(&root->orphan_lock);
- return;
- }
-
- block_rsv = root->orphan_block_rsv;
- root->orphan_block_rsv = NULL;
- spin_unlock(&root->orphan_lock);
-
- if (test_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED, &root->state) &&
- btrfs_root_refs(&root->root_item) > 0) {
- ret = btrfs_del_orphan_item(trans, fs_info->tree_root,
- root->root_key.objectid);
- if (ret)
- btrfs_abort_transaction(trans, ret);
- else
- clear_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED,
- &root->state);
- }
-
- if (block_rsv) {
- WARN_ON(block_rsv->size > 0);
- btrfs_free_block_rsv(fs_info, block_rsv);
- }
-}
-
-/*
- * This creates an orphan entry for the given inode in case something goes
- * wrong in the middle of an unlink/truncate.
- *
- * NOTE: caller of this function should reserve 5 units of metadata for
- * this function.
+ * This creates an orphan entry for the given inode in case something goes wrong
+ * in the middle of an unlink.
*/
int btrfs_orphan_add(struct btrfs_trans_handle *trans,
- struct btrfs_inode *inode)
+ struct btrfs_inode *inode)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
- struct btrfs_root *root = inode->root;
- struct btrfs_block_rsv *block_rsv = NULL;
- int reserve = 0;
- bool insert = false;
int ret;
- if (!root->orphan_block_rsv) {
- block_rsv = btrfs_alloc_block_rsv(fs_info,
- BTRFS_BLOCK_RSV_TEMP);
- if (!block_rsv)
- return -ENOMEM;
- }
-
- if (!test_and_set_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
- &inode->runtime_flags))
- insert = true;
-
- if (!test_and_set_bit(BTRFS_INODE_ORPHAN_META_RESERVED,
- &inode->runtime_flags))
- reserve = 1;
-
- spin_lock(&root->orphan_lock);
- /* If someone has created ->orphan_block_rsv, be happy to use it. */
- if (!root->orphan_block_rsv) {
- root->orphan_block_rsv = block_rsv;
- } else if (block_rsv) {
- btrfs_free_block_rsv(fs_info, block_rsv);
- block_rsv = NULL;
- }
-
- if (insert)
- atomic_inc(&root->orphan_inodes);
- spin_unlock(&root->orphan_lock);
-
- /* grab metadata reservation from transaction handle */
- if (reserve) {
- ret = btrfs_orphan_reserve_metadata(trans, inode);
- ASSERT(!ret);
- if (ret) {
- /*
- * dec doesn't need spin_lock as ->orphan_block_rsv
- * would be released only if ->orphan_inodes is
- * zero.
- */
- atomic_dec(&root->orphan_inodes);
- clear_bit(BTRFS_INODE_ORPHAN_META_RESERVED,
- &inode->runtime_flags);
- if (insert)
- clear_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
- &inode->runtime_flags);
- return ret;
- }
- }
-
- /* insert an orphan item to track this unlinked/truncated file */
- if (insert) {
- ret = btrfs_insert_orphan_item(trans, root, btrfs_ino(inode));
- if (ret) {
- if (reserve) {
- clear_bit(BTRFS_INODE_ORPHAN_META_RESERVED,
- &inode->runtime_flags);
- btrfs_orphan_release_metadata(inode);
- }
- /*
- * btrfs_orphan_commit_root may race with us and set
- * ->orphan_block_rsv to zero, in order to avoid that,
- * decrease ->orphan_inodes after everything is done.
- */
- atomic_dec(&root->orphan_inodes);
- if (ret != -EEXIST) {
- clear_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
- &inode->runtime_flags);
- btrfs_abort_transaction(trans, ret);
- return ret;
- }
- }
- ret = 0;
+ ret = btrfs_insert_orphan_item(trans, inode->root, btrfs_ino(inode));
+ if (ret && ret != -EEXIST) {
+ btrfs_abort_transaction(trans, ret);
+ return ret;
}
return 0;
}
/*
- * We have done the truncate/delete so we can go ahead and remove the orphan
- * item for this particular inode.
+ * We have done the delete so we can go ahead and remove the orphan item for
+ * this particular inode.
*/
static int btrfs_orphan_del(struct btrfs_trans_handle *trans,
struct btrfs_inode *inode)
{
- struct btrfs_root *root = inode->root;
- int delete_item = 0;
- int ret = 0;
-
- if (test_and_clear_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
- &inode->runtime_flags))
- delete_item = 1;
-
- if (delete_item && trans)
- ret = btrfs_del_orphan_item(trans, root, btrfs_ino(inode));
-
- if (test_and_clear_bit(BTRFS_INODE_ORPHAN_META_RESERVED,
- &inode->runtime_flags))
- btrfs_orphan_release_metadata(inode);
-
- /*
- * btrfs_orphan_commit_root may race with us and set ->orphan_block_rsv
- * to zero, in order to avoid that, decrease ->orphan_inodes after
- * everything is done.
- */
- if (delete_item)
- atomic_dec(&root->orphan_inodes);
-
- return ret;
+ return btrfs_del_orphan_item(trans, inode->root, btrfs_ino(inode));
}
/*
@@ -3479,7 +3350,7 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
struct btrfs_trans_handle *trans;
struct inode *inode;
u64 last_objectid = 0;
- int ret = 0, nr_unlink = 0, nr_truncate = 0;
+ int ret = 0, nr_unlink = 0;
if (cmpxchg(&root->orphan_cleanup_state, 0, ORPHAN_CLEANUP_STARTED))
return 0;
@@ -3579,12 +3450,31 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
key.offset = found_key.objectid - 1;
continue;
}
+
}
+
/*
- * Inode is already gone but the orphan item is still there,
- * kill the orphan item.
+ * If we have an inode with links, there are a couple of
+ * possibilities. Old kernels (before v3.12) used to create an
+ * orphan item for truncate indicating that there were possibly
+ * extent items past i_size that needed to be deleted. In v3.12,
+ * truncate was changed to update i_size in sync with the extent
+ * items, but the (useless) orphan item was still created. Since
+ * v4.18, we don't create the orphan item for truncate at all.
+ *
+ * So, this item could mean that we need to do a truncate, but
+ * only if this filesystem was last used on a pre-v3.12 kernel
+ * and was not cleanly unmounted. The odds of that are quite
+ * slim, and it's a pain to do the truncate now, so just delete
+ * the orphan item.
+ *
+ * It's also possible that this orphan item was supposed to be
+ * deleted but wasn't. The inode number may have been reused,
+ * but either way, we can delete the orphan item.
*/
- if (ret == -ENOENT) {
+ if (ret == -ENOENT || inode->i_nlink) {
+ if (!ret)
+ iput(inode);
trans = btrfs_start_transaction(root, 1);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
@@ -3600,42 +3490,7 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
continue;
}
- /*
- * add this inode to the orphan list so btrfs_orphan_del does
- * the proper thing when we hit it
- */
- set_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
- &BTRFS_I(inode)->runtime_flags);
- atomic_inc(&root->orphan_inodes);
-
- /* if we have links, this was a truncate, lets do that */
- if (inode->i_nlink) {
- if (WARN_ON(!S_ISREG(inode->i_mode))) {
- iput(inode);
- continue;
- }
- nr_truncate++;
-
- /* 1 for the orphan item deletion. */
- trans = btrfs_start_transaction(root, 1);
- if (IS_ERR(trans)) {
- iput(inode);
- ret = PTR_ERR(trans);
- goto out;
- }
- ret = btrfs_orphan_add(trans, BTRFS_I(inode));
- btrfs_end_transaction(trans);
- if (ret) {
- iput(inode);
- goto out;
- }
-
- ret = btrfs_truncate(inode, false);
- if (ret)
- btrfs_orphan_del(NULL, BTRFS_I(inode));
- } else {
- nr_unlink++;
- }
+ nr_unlink++;
/* this will do delete_inode and everything for us */
iput(inode);
@@ -3647,12 +3502,7 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
root->orphan_cleanup_state = ORPHAN_CLEANUP_DONE;
- if (root->orphan_block_rsv)
- btrfs_block_rsv_release(fs_info, root->orphan_block_rsv,
- (u64)-1);
-
- if (root->orphan_block_rsv ||
- test_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED, &root->state)) {
+ if (test_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED, &root->state)) {
trans = btrfs_join_transaction(root);
if (!IS_ERR(trans))
btrfs_end_transaction(trans);
@@ -3660,8 +3510,6 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
if (nr_unlink)
btrfs_debug(fs_info, "unlinked %d orphans", nr_unlink);
- if (nr_truncate)
- btrfs_debug(fs_info, "truncated %d orphans", nr_truncate);
out:
if (ret)
@@ -3924,7 +3772,7 @@ cache_acl:
break;
}
- btrfs_update_iflags(inode);
+ btrfs_sync_inode_flags_to_i_flags(inode);
return 0;
make_bad:
@@ -4238,7 +4086,7 @@ out:
return ret;
}
-int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
+static int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct inode *dir, u64 objectid,
const char *name, int name_len)
@@ -4319,6 +4167,262 @@ out:
return ret;
}
+/*
+ * Helper to check if the subvolume references other subvolumes or if it's
+ * default.
+ */
+static noinline int may_destroy_subvol(struct btrfs_root *root)
+{
+ struct btrfs_fs_info *fs_info = root->fs_info;
+ struct btrfs_path *path;
+ struct btrfs_dir_item *di;
+ struct btrfs_key key;
+ u64 dir_id;
+ int ret;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ /* Make sure this root isn't set as the default subvol */
+ dir_id = btrfs_super_root_dir(fs_info->super_copy);
+ di = btrfs_lookup_dir_item(NULL, fs_info->tree_root, path,
+ dir_id, "default", 7, 0);
+ if (di && !IS_ERR(di)) {
+ btrfs_dir_item_key_to_cpu(path->nodes[0], di, &key);
+ if (key.objectid == root->root_key.objectid) {
+ ret = -EPERM;
+ btrfs_err(fs_info,
+ "deleting default subvolume %llu is not allowed",
+ key.objectid);
+ goto out;
+ }
+ btrfs_release_path(path);
+ }
+
+ key.objectid = root->root_key.objectid;
+ key.type = BTRFS_ROOT_REF_KEY;
+ key.offset = (u64)-1;
+
+ ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, path, 0, 0);
+ if (ret < 0)
+ goto out;
+ BUG_ON(ret == 0);
+
+ ret = 0;
+ if (path->slots[0] > 0) {
+ path->slots[0]--;
+ btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
+ if (key.objectid == root->root_key.objectid &&
+ key.type == BTRFS_ROOT_REF_KEY)
+ ret = -ENOTEMPTY;
+ }
+out:
+ btrfs_free_path(path);
+ return ret;
+}
+
+/* Delete all dentries for inodes belonging to the root */
+static void btrfs_prune_dentries(struct btrfs_root *root)
+{
+ struct btrfs_fs_info *fs_info = root->fs_info;
+ struct rb_node *node;
+ struct rb_node *prev;
+ struct btrfs_inode *entry;
+ struct inode *inode;
+ u64 objectid = 0;
+
+ if (!test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state))
+ WARN_ON(btrfs_root_refs(&root->root_item) != 0);
+
+ spin_lock(&root->inode_lock);
+again:
+ node = root->inode_tree.rb_node;
+ prev = NULL;
+ while (node) {
+ prev = node;
+ entry = rb_entry(node, struct btrfs_inode, rb_node);
+
+ if (objectid < btrfs_ino(BTRFS_I(&entry->vfs_inode)))
+ node = node->rb_left;
+ else if (objectid > btrfs_ino(BTRFS_I(&entry->vfs_inode)))
+ node = node->rb_right;
+ else
+ break;
+ }
+ if (!node) {
+ while (prev) {
+ entry = rb_entry(prev, struct btrfs_inode, rb_node);
+ if (objectid <= btrfs_ino(BTRFS_I(&entry->vfs_inode))) {
+ node = prev;
+ break;
+ }
+ prev = rb_next(prev);
+ }
+ }
+ while (node) {
+ entry = rb_entry(node, struct btrfs_inode, rb_node);
+ objectid = btrfs_ino(BTRFS_I(&entry->vfs_inode)) + 1;
+ inode = igrab(&entry->vfs_inode);
+ if (inode) {
+ spin_unlock(&root->inode_lock);
+ if (atomic_read(&inode->i_count) > 1)
+ d_prune_aliases(inode);
+ /*
+ * btrfs_drop_inode will have it removed from the inode
+ * cache when its usage count hits zero.
+ */
+ iput(inode);
+ cond_resched();
+ spin_lock(&root->inode_lock);
+ goto again;
+ }
+
+ if (cond_resched_lock(&root->inode_lock))
+ goto again;
+
+ node = rb_next(node);
+ }
+ spin_unlock(&root->inode_lock);
+}
+
+int btrfs_delete_subvolume(struct inode *dir, struct dentry *dentry)
+{
+ struct btrfs_fs_info *fs_info = btrfs_sb(dentry->d_sb);
+ struct btrfs_root *root = BTRFS_I(dir)->root;
+ struct inode *inode = d_inode(dentry);
+ struct btrfs_root *dest = BTRFS_I(inode)->root;
+ struct btrfs_trans_handle *trans;
+ struct btrfs_block_rsv block_rsv;
+ u64 root_flags;
+ int ret;
+ int err;
+
+ /*
+ * Don't allow to delete a subvolume with send in progress. This is
+ * inside the inode lock so the error handling that has to drop the bit
+ * again is not run concurrently.
+ */
+ spin_lock(&dest->root_item_lock);
+ root_flags = btrfs_root_flags(&dest->root_item);
+ if (dest->send_in_progress == 0) {
+ btrfs_set_root_flags(&dest->root_item,
+ root_flags | BTRFS_ROOT_SUBVOL_DEAD);
+ spin_unlock(&dest->root_item_lock);
+ } else {
+ spin_unlock(&dest->root_item_lock);
+ btrfs_warn(fs_info,
+ "attempt to delete subvolume %llu during send",
+ dest->root_key.objectid);
+ return -EPERM;
+ }
+
+ down_write(&fs_info->subvol_sem);
+
+ err = may_destroy_subvol(dest);
+ if (err)
+ goto out_up_write;
+
+ btrfs_init_block_rsv(&block_rsv, BTRFS_BLOCK_RSV_TEMP);
+ /*
+ * One for dir inode,
+ * two for dir entries,
+ * two for root ref/backref.
+ */
+ err = btrfs_subvolume_reserve_metadata(root, &block_rsv, 5, true);
+ if (err)
+ goto out_up_write;
+
+ trans = btrfs_start_transaction(root, 0);
+ if (IS_ERR(trans)) {
+ err = PTR_ERR(trans);
+ goto out_release;
+ }
+ trans->block_rsv = &block_rsv;
+ trans->bytes_reserved = block_rsv.size;
+
+ btrfs_record_snapshot_destroy(trans, BTRFS_I(dir));
+
+ ret = btrfs_unlink_subvol(trans, root, dir,
+ dest->root_key.objectid,
+ dentry->d_name.name,
+ dentry->d_name.len);
+ if (ret) {
+ err = ret;
+ btrfs_abort_transaction(trans, ret);
+ goto out_end_trans;
+ }
+
+ btrfs_record_root_in_trans(trans, dest);
+
+ memset(&dest->root_item.drop_progress, 0,
+ sizeof(dest->root_item.drop_progress));
+ dest->root_item.drop_level = 0;
+ btrfs_set_root_refs(&dest->root_item, 0);
+
+ if (!test_and_set_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED, &dest->state)) {
+ ret = btrfs_insert_orphan_item(trans,
+ fs_info->tree_root,
+ dest->root_key.objectid);
+ if (ret) {
+ btrfs_abort_transaction(trans, ret);
+ err = ret;
+ goto out_end_trans;
+ }
+ }
+
+ ret = btrfs_uuid_tree_remove(trans, dest->root_item.uuid,
+ BTRFS_UUID_KEY_SUBVOL,
+ dest->root_key.objectid);
+ if (ret && ret != -ENOENT) {
+ btrfs_abort_transaction(trans, ret);
+ err = ret;
+ goto out_end_trans;
+ }
+ if (!btrfs_is_empty_uuid(dest->root_item.received_uuid)) {
+ ret = btrfs_uuid_tree_remove(trans,
+ dest->root_item.received_uuid,
+ BTRFS_UUID_KEY_RECEIVED_SUBVOL,
+ dest->root_key.objectid);
+ if (ret && ret != -ENOENT) {
+ btrfs_abort_transaction(trans, ret);
+ err = ret;
+ goto out_end_trans;
+ }
+ }
+
+out_end_trans:
+ trans->block_rsv = NULL;
+ trans->bytes_reserved = 0;
+ ret = btrfs_end_transaction(trans);
+ if (ret && !err)
+ err = ret;
+ inode->i_flags |= S_DEAD;
+out_release:
+ btrfs_subvolume_release_metadata(fs_info, &block_rsv);
+out_up_write:
+ up_write(&fs_info->subvol_sem);
+ if (err) {
+ spin_lock(&dest->root_item_lock);
+ root_flags = btrfs_root_flags(&dest->root_item);
+ btrfs_set_root_flags(&dest->root_item,
+ root_flags & ~BTRFS_ROOT_SUBVOL_DEAD);
+ spin_unlock(&dest->root_item_lock);
+ } else {
+ d_invalidate(dentry);
+ btrfs_prune_dentries(dest);
+ ASSERT(dest->send_in_progress == 0);
+
+ /* the last ref */
+ if (dest->ino_cache_inode) {
+ iput(dest->ino_cache_inode);
+ dest->ino_cache_inode = NULL;
+ }
+ }
+
+ return err;
+}
+
static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
{
struct inode *inode = d_inode(dentry);
@@ -4330,7 +4434,7 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
if (inode->i_size > BTRFS_EMPTY_DIR_SIZE)
return -ENOTEMPTY;
if (btrfs_ino(BTRFS_I(inode)) == BTRFS_FIRST_FREE_OBJECTID)
- return -EPERM;
+ return btrfs_delete_subvolume(dir, dentry);
trans = __unlink_start_trans(dir);
if (IS_ERR(trans))
@@ -4442,7 +4546,6 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
int pending_del_slot = 0;
int extent_type = -1;
int ret;
- int err = 0;
u64 ino = btrfs_ino(BTRFS_I(inode));
u64 bytes_deleted = 0;
bool be_nice = false;
@@ -4494,22 +4597,19 @@ search_again:
* up a huge file in a single leaf. Most of the time that
* bytes_deleted is > 0, it will be huge by the time we get here
*/
- if (be_nice && bytes_deleted > SZ_32M) {
- if (btrfs_should_end_transaction(trans)) {
- err = -EAGAIN;
- goto error;
- }
+ if (be_nice && bytes_deleted > SZ_32M &&
+ btrfs_should_end_transaction(trans)) {
+ ret = -EAGAIN;
+ goto out;
}
-
path->leave_spinning = 1;
ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
- if (ret < 0) {
- err = ret;
+ if (ret < 0)
goto out;
- }
if (ret > 0) {
+ ret = 0;
/* there are no items in the tree for us to truncate, we're
* done
*/
@@ -4620,7 +4720,7 @@ search_again:
* We have to bail so the last_size is set to
* just before this extent.
*/
- err = NEED_TRUNCATE_BLOCK;
+ ret = NEED_TRUNCATE_BLOCK;
break;
}
@@ -4659,7 +4759,10 @@ delete:
extent_num_bytes, 0,
btrfs_header_owner(leaf),
ino, extent_offset);
- BUG_ON(ret);
+ if (ret) {
+ btrfs_abort_transaction(trans, ret);
+ break;
+ }
if (btrfs_should_throttle_delayed_refs(trans, fs_info))
btrfs_async_run_delayed_refs(fs_info,
trans->delayed_ref_updates * 2,
@@ -4687,7 +4790,7 @@ delete:
pending_del_nr);
if (ret) {
btrfs_abort_transaction(trans, ret);
- goto error;
+ break;
}
pending_del_nr = 0;
}
@@ -4698,8 +4801,8 @@ delete:
trans->delayed_ref_updates = 0;
ret = btrfs_run_delayed_refs(trans,
updates * 2);
- if (ret && !err)
- err = ret;
+ if (ret)
+ break;
}
}
/*
@@ -4707,8 +4810,8 @@ delete:
* and let the transaction restart
*/
if (should_end) {
- err = -EAGAIN;
- goto error;
+ ret = -EAGAIN;
+ break;
}
goto search_again;
} else {
@@ -4716,32 +4819,37 @@ delete:
}
}
out:
- if (pending_del_nr) {
- ret = btrfs_del_items(trans, root, path, pending_del_slot,
+ if (ret >= 0 && pending_del_nr) {
+ int err;
+
+ err = btrfs_del_items(trans, root, path, pending_del_slot,
pending_del_nr);
- if (ret)
- btrfs_abort_transaction(trans, ret);
+ if (err) {
+ btrfs_abort_transaction(trans, err);
+ ret = err;
+ }
}
-error:
if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
ASSERT(last_size >= new_size);
- if (!err && last_size > new_size)
+ if (!ret && last_size > new_size)
last_size = new_size;
btrfs_ordered_update_i_size(inode, last_size, NULL);
}
btrfs_free_path(path);
- if (be_nice && bytes_deleted > SZ_32M) {
+ if (be_nice && bytes_deleted > SZ_32M && (ret >= 0 || ret == -EAGAIN)) {
unsigned long updates = trans->delayed_ref_updates;
+ int err;
+
if (updates) {
trans->delayed_ref_updates = 0;
- ret = btrfs_run_delayed_refs(trans, updates * 2);
- if (ret && !err)
- err = ret;
+ err = btrfs_run_delayed_refs(trans, updates * 2);
+ if (err)
+ ret = err;
}
}
- return err;
+ return ret;
}
/*
@@ -5083,30 +5191,6 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr)
set_bit(BTRFS_INODE_ORDERED_DATA_CLOSE,
&BTRFS_I(inode)->runtime_flags);
- /*
- * 1 for the orphan item we're going to add
- * 1 for the orphan item deletion.
- */
- trans = btrfs_start_transaction(root, 2);
- if (IS_ERR(trans))
- return PTR_ERR(trans);
-
- /*
- * We need to do this in case we fail at _any_ point during the
- * actual truncate. Once we do the truncate_setsize we could
- * invalidate pages which forces any outstanding ordered io to
- * be instantly completed which will give us extents that need
- * to be truncated. If we fail to get an orphan inode down we
- * could have left over extents that were never meant to live,
- * so we need to guarantee from this point on that everything
- * will be consistent.
- */
- ret = btrfs_orphan_add(trans, BTRFS_I(inode));
- btrfs_end_transaction(trans);
- if (ret)
- return ret;
-
- /* we don't support swapfiles, so vmtruncate shouldn't fail */
truncate_setsize(inode, newsize);
/* Disable nonlocked read DIO to avoid the end less truncate */
@@ -5118,29 +5202,16 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr)
if (ret && inode->i_nlink) {
int err;
- /* To get a stable disk_i_size */
- err = btrfs_wait_ordered_range(inode, 0, (u64)-1);
- if (err) {
- btrfs_orphan_del(NULL, BTRFS_I(inode));
- return err;
- }
-
/*
- * failed to truncate, disk_i_size is only adjusted down
- * as we remove extents, so it should represent the true
- * size of the inode, so reset the in memory size and
- * delete our orphan entry.
+ * Truncate failed, so fix up the in-memory size. We
+ * adjusted disk_i_size down as we removed extents, so
+ * wait for disk_i_size to be stable and then update the
+ * in-memory size to match.
*/
- trans = btrfs_join_transaction(root);
- if (IS_ERR(trans)) {
- btrfs_orphan_del(NULL, BTRFS_I(inode));
- return ret;
- }
- i_size_write(inode, BTRFS_I(inode)->disk_i_size);
- err = btrfs_orphan_del(trans, BTRFS_I(inode));
+ err = btrfs_wait_ordered_range(inode, 0, (u64)-1);
if (err)
- btrfs_abort_transaction(trans, err);
- btrfs_end_transaction(trans);
+ return err;
+ i_size_write(inode, BTRFS_I(inode)->disk_i_size);
}
}
@@ -5270,13 +5341,52 @@ static void evict_inode_truncate_pages(struct inode *inode)
spin_unlock(&io_tree->lock);
}
+static struct btrfs_trans_handle *evict_refill_and_join(struct btrfs_root *root,
+ struct btrfs_block_rsv *rsv,
+ u64 min_size)
+{
+ struct btrfs_fs_info *fs_info = root->fs_info;
+ struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
+ int failures = 0;
+
+ for (;;) {
+ struct btrfs_trans_handle *trans;
+ int ret;
+
+ ret = btrfs_block_rsv_refill(root, rsv, min_size,
+ BTRFS_RESERVE_FLUSH_LIMIT);
+
+ if (ret && ++failures > 2) {
+ btrfs_warn(fs_info,
+ "could not allocate space for a delete; will truncate on mount");
+ return ERR_PTR(-ENOSPC);
+ }
+
+ trans = btrfs_join_transaction(root);
+ if (IS_ERR(trans) || !ret)
+ return trans;
+
+ /*
+ * Try to steal from the global reserve if there is space for
+ * it.
+ */
+ if (!btrfs_check_space_for_delayed_refs(trans, fs_info) &&
+ !btrfs_block_rsv_migrate(global_rsv, rsv, min_size, 0))
+ return trans;
+
+ /* If not, commit and try again. */
+ ret = btrfs_commit_transaction(trans);
+ if (ret)
+ return ERR_PTR(ret);
+ }
+}
+
void btrfs_evict_inode(struct inode *inode)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_trans_handle *trans;
struct btrfs_root *root = BTRFS_I(inode)->root;
- struct btrfs_block_rsv *rsv, *global_rsv;
- int steal_from_global = 0;
+ struct btrfs_block_rsv *rsv;
u64 min_size;
int ret;
@@ -5297,21 +5407,16 @@ void btrfs_evict_inode(struct inode *inode)
btrfs_is_free_space_inode(BTRFS_I(inode))))
goto no_delete;
- if (is_bad_inode(inode)) {
- btrfs_orphan_del(NULL, BTRFS_I(inode));
+ if (is_bad_inode(inode))
goto no_delete;
- }
/* do we really want it for ->i_nlink > 0 and zero btrfs_root_refs? */
if (!special_file(inode->i_mode))
btrfs_wait_ordered_range(inode, 0, (u64)-1);
btrfs_free_io_failure_record(BTRFS_I(inode), 0, (u64)-1);
- if (test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags)) {
- BUG_ON(test_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
- &BTRFS_I(inode)->runtime_flags));
+ if (test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags))
goto no_delete;
- }
if (inode->i_nlink > 0) {
BUG_ON(btrfs_root_refs(&root->root_item) != 0 &&
@@ -5320,130 +5425,63 @@ void btrfs_evict_inode(struct inode *inode)
}
ret = btrfs_commit_inode_delayed_inode(BTRFS_I(inode));
- if (ret) {
- btrfs_orphan_del(NULL, BTRFS_I(inode));
+ if (ret)
goto no_delete;
- }
rsv = btrfs_alloc_block_rsv(fs_info, BTRFS_BLOCK_RSV_TEMP);
- if (!rsv) {
- btrfs_orphan_del(NULL, BTRFS_I(inode));
+ if (!rsv)
goto no_delete;
- }
rsv->size = min_size;
rsv->failfast = 1;
- global_rsv = &fs_info->global_block_rsv;
btrfs_i_size_write(BTRFS_I(inode), 0);
- /*
- * This is a bit simpler than btrfs_truncate since we've already
- * reserved our space for our orphan item in the unlink, so we just
- * need to reserve some slack space in case we add bytes and update
- * inode item when doing the truncate.
- */
while (1) {
- ret = btrfs_block_rsv_refill(root, rsv, min_size,
- BTRFS_RESERVE_FLUSH_LIMIT);
-
- /*
- * Try and steal from the global reserve since we will
- * likely not use this space anyway, we want to try as
- * hard as possible to get this to work.
- */
- if (ret)
- steal_from_global++;
- else
- steal_from_global = 0;
- ret = 0;
-
- /*
- * steal_from_global == 0: we reserved stuff, hooray!
- * steal_from_global == 1: we didn't reserve stuff, boo!
- * steal_from_global == 2: we've committed, still not a lot of
- * room but maybe we'll have room in the global reserve this
- * time.
- * steal_from_global == 3: abandon all hope!
- */
- if (steal_from_global > 2) {
- btrfs_warn(fs_info,
- "Could not get space for a delete, will truncate on mount %d",
- ret);
- btrfs_orphan_del(NULL, BTRFS_I(inode));
- btrfs_free_block_rsv(fs_info, rsv);
- goto no_delete;
- }
-
- trans = btrfs_join_transaction(root);
- if (IS_ERR(trans)) {
- btrfs_orphan_del(NULL, BTRFS_I(inode));
- btrfs_free_block_rsv(fs_info, rsv);
- goto no_delete;
- }
-
- /*
- * We can't just steal from the global reserve, we need to make
- * sure there is room to do it, if not we need to commit and try
- * again.
- */
- if (steal_from_global) {
- if (!btrfs_check_space_for_delayed_refs(trans, fs_info))
- ret = btrfs_block_rsv_migrate(global_rsv, rsv,
- min_size, 0);
- else
- ret = -ENOSPC;
- }
-
- /*
- * Couldn't steal from the global reserve, we have too much
- * pending stuff built up, commit the transaction and try it
- * again.
- */
- if (ret) {
- ret = btrfs_commit_transaction(trans);
- if (ret) {
- btrfs_orphan_del(NULL, BTRFS_I(inode));
- btrfs_free_block_rsv(fs_info, rsv);
- goto no_delete;
- }
- continue;
- } else {
- steal_from_global = 0;
- }
+ trans = evict_refill_and_join(root, rsv, min_size);
+ if (IS_ERR(trans))
+ goto free_rsv;
trans->block_rsv = rsv;
ret = btrfs_truncate_inode_items(trans, root, inode, 0, 0);
- if (ret != -ENOSPC && ret != -EAGAIN)
- break;
-
trans->block_rsv = &fs_info->trans_block_rsv;
btrfs_end_transaction(trans);
- trans = NULL;
btrfs_btree_balance_dirty(fs_info);
+ if (ret && ret != -ENOSPC && ret != -EAGAIN)
+ goto free_rsv;
+ else if (!ret)
+ break;
}
- btrfs_free_block_rsv(fs_info, rsv);
-
/*
- * Errors here aren't a big deal, it just means we leave orphan items
- * in the tree. They will be cleaned up on the next mount.
+ * Errors here aren't a big deal, it just means we leave orphan items in
+ * the tree. They will be cleaned up on the next mount. If the inode
+ * number gets reused, cleanup deletes the orphan item without doing
+ * anything, and unlink reuses the existing orphan item.
+ *
+ * If it turns out that we are dropping too many of these, we might want
+ * to add a mechanism for retrying these after a commit.
*/
- if (ret == 0) {
- trans->block_rsv = root->orphan_block_rsv;
+ trans = evict_refill_and_join(root, rsv, min_size);
+ if (!IS_ERR(trans)) {
+ trans->block_rsv = rsv;
btrfs_orphan_del(trans, BTRFS_I(inode));
- } else {
- btrfs_orphan_del(NULL, BTRFS_I(inode));
+ trans->block_rsv = &fs_info->trans_block_rsv;
+ btrfs_end_transaction(trans);
}
- trans->block_rsv = &fs_info->trans_block_rsv;
if (!(root == fs_info->tree_root ||
root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID))
btrfs_return_ino(root, btrfs_ino(BTRFS_I(inode)));
- btrfs_end_transaction(trans);
- btrfs_btree_balance_dirty(fs_info);
+free_rsv:
+ btrfs_free_block_rsv(fs_info, rsv);
no_delete:
+ /*
+ * If we didn't successfully delete, the orphan item will still be in
+ * the tree and we'll retry on the next mount. Again, we might also want
+ * to retry these periodically in the future.
+ */
btrfs_remove_delayed_node(BTRFS_I(inode));
clear_inode(inode);
}
@@ -5619,69 +5657,6 @@ static void inode_tree_del(struct inode *inode)
}
}
-void btrfs_invalidate_inodes(struct btrfs_root *root)
-{
- struct btrfs_fs_info *fs_info = root->fs_info;
- struct rb_node *node;
- struct rb_node *prev;
- struct btrfs_inode *entry;
- struct inode *inode;
- u64 objectid = 0;
-
- if (!test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state))
- WARN_ON(btrfs_root_refs(&root->root_item) != 0);
-
- spin_lock(&root->inode_lock);
-again:
- node = root->inode_tree.rb_node;
- prev = NULL;
- while (node) {
- prev = node;
- entry = rb_entry(node, struct btrfs_inode, rb_node);
-
- if (objectid < btrfs_ino(BTRFS_I(&entry->vfs_inode)))
- node = node->rb_left;
- else if (objectid > btrfs_ino(BTRFS_I(&entry->vfs_inode)))
- node = node->rb_right;
- else
- break;
- }
- if (!node) {
- while (prev) {
- entry = rb_entry(prev, struct btrfs_inode, rb_node);
- if (objectid <= btrfs_ino(BTRFS_I(&entry->vfs_inode))) {
- node = prev;
- break;
- }
- prev = rb_next(prev);
- }
- }
- while (node) {
- entry = rb_entry(node, struct btrfs_inode, rb_node);
- objectid = btrfs_ino(BTRFS_I(&entry->vfs_inode)) + 1;
- inode = igrab(&entry->vfs_inode);
- if (inode) {
- spin_unlock(&root->inode_lock);
- if (atomic_read(&inode->i_count) > 1)
- d_prune_aliases(inode);
- /*
- * btrfs_drop_inode will have it removed from
- * the inode cache when its usage count
- * hits zero.
- */
- iput(inode);
- cond_resched();
- spin_lock(&root->inode_lock);
- goto again;
- }
-
- if (cond_resched_lock(&root->inode_lock))
- goto again;
-
- node = rb_next(node);
- }
- spin_unlock(&root->inode_lock);
-}
static int btrfs_init_locked_inode(struct inode *inode, void *p)
{
@@ -5843,11 +5818,6 @@ static int btrfs_dentry_delete(const struct dentry *dentry)
return 0;
}
-static void btrfs_dentry_release(struct dentry *dentry)
-{
- kfree(dentry->d_fsdata);
-}
-
static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry,
unsigned int flags)
{
@@ -6263,7 +6233,7 @@ static void btrfs_inherit_iflags(struct inode *inode, struct inode *dir)
BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM;
}
- btrfs_update_iflags(inode);
+ btrfs_sync_inode_flags_to_i_flags(inode);
}
static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
@@ -6579,8 +6549,7 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
goto out_unlock_inode;
} else {
btrfs_update_inode(trans, root, inode);
- unlock_new_inode(inode);
- d_instantiate(dentry, inode);
+ d_instantiate_new(dentry, inode);
}
out_unlock:
@@ -6656,8 +6625,7 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
goto out_unlock_inode;
BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
- unlock_new_inode(inode);
- d_instantiate(dentry, inode);
+ d_instantiate_new(dentry, inode);
out_unlock:
btrfs_end_transaction(trans);
@@ -6700,8 +6668,9 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
* 2 items for inode and inode ref
* 2 items for dir items
* 1 item for parent inode
+ * 1 item for orphan item deletion if O_TMPFILE
*/
- trans = btrfs_start_transaction(root, 5);
+ trans = btrfs_start_transaction(root, inode->i_nlink ? 5 : 6);
if (IS_ERR(trans)) {
err = PTR_ERR(trans);
trans = NULL;
@@ -6802,12 +6771,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
if (err)
goto out_fail_inode;
- d_instantiate(dentry, inode);
- /*
- * mkdir is special. We're unlocking after we call d_instantiate
- * to avoid a race with nfsd calling d_instantiate.
- */
- unlock_new_inode(inode);
+ d_instantiate_new(dentry, inode);
drop_on_err = 0;
out_fail:
@@ -7083,7 +7047,7 @@ insert:
err = 0;
write_lock(&em_tree->lock);
- err = btrfs_add_extent_mapping(em_tree, &em, start, len);
+ err = btrfs_add_extent_mapping(fs_info, em_tree, &em, start, len);
write_unlock(&em_tree->lock);
out:
@@ -7368,6 +7332,14 @@ noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,
btrfs_file_extent_other_encoding(leaf, fi))
goto out;
+ /*
+ * Do the same check as in btrfs_cross_ref_exist but without the
+ * unnecessary search.
+ */
+ if (btrfs_file_extent_generation(leaf, fi) <=
+ btrfs_root_last_snapshot(&root->root_item))
+ goto out;
+
backref_offset = btrfs_file_extent_offset(leaf, fi);
if (orig_start) {
@@ -7568,6 +7540,125 @@ static struct extent_map *create_io_em(struct inode *inode, u64 start, u64 len,
return em;
}
+
+static int btrfs_get_blocks_direct_read(struct extent_map *em,
+ struct buffer_head *bh_result,
+ struct inode *inode,
+ u64 start, u64 len)
+{
+ if (em->block_start == EXTENT_MAP_HOLE ||
+ test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
+ return -ENOENT;
+
+ len = min(len, em->len - (start - em->start));
+
+ bh_result->b_blocknr = (em->block_start + (start - em->start)) >>
+ inode->i_blkbits;
+ bh_result->b_size = len;
+ bh_result->b_bdev = em->bdev;
+ set_buffer_mapped(bh_result);
+
+ return 0;
+}
+
+static int btrfs_get_blocks_direct_write(struct extent_map **map,
+ struct buffer_head *bh_result,
+ struct inode *inode,
+ struct btrfs_dio_data *dio_data,
+ u64 start, u64 len)
+{
+ struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct extent_map *em = *map;
+ int ret = 0;
+
+ /*
+ * We don't allocate a new extent in the following cases
+ *
+ * 1) The inode is marked as NODATACOW. In this case we'll just use the
+ * existing extent.
+ * 2) The extent is marked as PREALLOC. We're good to go here and can
+ * just use the extent.
+ *
+ */
+ if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags) ||
+ ((BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW) &&
+ em->block_start != EXTENT_MAP_HOLE)) {
+ int type;
+ u64 block_start, orig_start, orig_block_len, ram_bytes;
+
+ if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
+ type = BTRFS_ORDERED_PREALLOC;
+ else
+ type = BTRFS_ORDERED_NOCOW;
+ len = min(len, em->len - (start - em->start));
+ block_start = em->block_start + (start - em->start);
+
+ if (can_nocow_extent(inode, start, &len, &orig_start,
+ &orig_block_len, &ram_bytes) == 1 &&
+ btrfs_inc_nocow_writers(fs_info, block_start)) {
+ struct extent_map *em2;
+
+ em2 = btrfs_create_dio_extent(inode, start, len,
+ orig_start, block_start,
+ len, orig_block_len,
+ ram_bytes, type);
+ btrfs_dec_nocow_writers(fs_info, block_start);
+ if (type == BTRFS_ORDERED_PREALLOC) {
+ free_extent_map(em);
+ *map = em = em2;
+ }
+
+ if (em2 && IS_ERR(em2)) {
+ ret = PTR_ERR(em2);
+ goto out;
+ }
+ /*
+ * For inode marked NODATACOW or extent marked PREALLOC,
+ * use the existing or preallocated extent, so does not
+ * need to adjust btrfs_space_info's bytes_may_use.
+ */
+ btrfs_free_reserved_data_space_noquota(inode, start,
+ len);
+ goto skip_cow;
+ }
+ }
+
+ /* this will cow the extent */
+ len = bh_result->b_size;
+ free_extent_map(em);
+ *map = em = btrfs_new_extent_direct(inode, start, len);
+ if (IS_ERR(em)) {
+ ret = PTR_ERR(em);
+ goto out;
+ }
+
+ len = min(len, em->len - (start - em->start));
+
+skip_cow:
+ bh_result->b_blocknr = (em->block_start + (start - em->start)) >>
+ inode->i_blkbits;
+ bh_result->b_size = len;
+ bh_result->b_bdev = em->bdev;
+ set_buffer_mapped(bh_result);
+
+ if (!test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
+ set_buffer_new(bh_result);
+
+ /*
+ * Need to update the i_size under the extent lock so buffered
+ * readers will get the updated i_size when we unlock.
+ */
+ if (!dio_data->overwrite && start + len > i_size_read(inode))
+ i_size_write(inode, start + len);
+
+ WARN_ON(dio_data->reserve < len);
+ dio_data->reserve -= len;
+ dio_data->unsubmitted_oe_range_end = start + len;
+ current->journal_info = dio_data;
+out:
+ return ret;
+}
+
static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
struct buffer_head *bh_result, int create)
{
@@ -7636,116 +7727,36 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
goto unlock_err;
}
- /* Just a good old fashioned hole, return */
- if (!create && (em->block_start == EXTENT_MAP_HOLE ||
- test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) {
- free_extent_map(em);
- goto unlock_err;
- }
-
- /*
- * We don't allocate a new extent in the following cases
- *
- * 1) The inode is marked as NODATACOW. In this case we'll just use the
- * existing extent.
- * 2) The extent is marked as PREALLOC. We're good to go here and can
- * just use the extent.
- *
- */
- if (!create) {
- len = min(len, em->len - (start - em->start));
- lockstart = start + len;
- goto unlock;
- }
-
- if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags) ||
- ((BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW) &&
- em->block_start != EXTENT_MAP_HOLE)) {
- int type;
- u64 block_start, orig_start, orig_block_len, ram_bytes;
-
- if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
- type = BTRFS_ORDERED_PREALLOC;
- else
- type = BTRFS_ORDERED_NOCOW;
- len = min(len, em->len - (start - em->start));
- block_start = em->block_start + (start - em->start);
-
- if (can_nocow_extent(inode, start, &len, &orig_start,
- &orig_block_len, &ram_bytes) == 1 &&
- btrfs_inc_nocow_writers(fs_info, block_start)) {
- struct extent_map *em2;
-
- em2 = btrfs_create_dio_extent(inode, start, len,
- orig_start, block_start,
- len, orig_block_len,
- ram_bytes, type);
- btrfs_dec_nocow_writers(fs_info, block_start);
- if (type == BTRFS_ORDERED_PREALLOC) {
- free_extent_map(em);
- em = em2;
- }
- if (em2 && IS_ERR(em2)) {
- ret = PTR_ERR(em2);
- goto unlock_err;
- }
- /*
- * For inode marked NODATACOW or extent marked PREALLOC,
- * use the existing or preallocated extent, so does not
- * need to adjust btrfs_space_info's bytes_may_use.
- */
- btrfs_free_reserved_data_space_noquota(inode,
- start, len);
- goto unlock;
- }
- }
-
- /*
- * this will cow the extent, reset the len in case we changed
- * it above
- */
- len = bh_result->b_size;
- free_extent_map(em);
- em = btrfs_new_extent_direct(inode, start, len);
- if (IS_ERR(em)) {
- ret = PTR_ERR(em);
- goto unlock_err;
- }
- len = min(len, em->len - (start - em->start));
-unlock:
- bh_result->b_blocknr = (em->block_start + (start - em->start)) >>
- inode->i_blkbits;
- bh_result->b_size = len;
- bh_result->b_bdev = em->bdev;
- set_buffer_mapped(bh_result);
if (create) {
- if (!test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
- set_buffer_new(bh_result);
+ ret = btrfs_get_blocks_direct_write(&em, bh_result, inode,
+ dio_data, start, len);
+ if (ret < 0)
+ goto unlock_err;
+ /* clear and unlock the entire range */
+ clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend,
+ unlock_bits, 1, 0, &cached_state);
+ } else {
+ ret = btrfs_get_blocks_direct_read(em, bh_result, inode,
+ start, len);
+ /* Can be negative only if we read from a hole */
+ if (ret < 0) {
+ ret = 0;
+ free_extent_map(em);
+ goto unlock_err;
+ }
/*
- * Need to update the i_size under the extent lock so buffered
- * readers will get the updated i_size when we unlock.
+ * We need to unlock only the end area that we aren't using.
+ * The rest is going to be unlocked by the endio routine.
*/
- if (!dio_data->overwrite && start + len > i_size_read(inode))
- i_size_write(inode, start + len);
-
- WARN_ON(dio_data->reserve < len);
- dio_data->reserve -= len;
- dio_data->unsubmitted_oe_range_end = start + len;
- current->journal_info = dio_data;
- }
-
- /*
- * In the case of write we need to clear and unlock the entire range,
- * in the case of read we need to unlock only the end area that we
- * aren't using if there is any left over space.
- */
- if (lockstart < lockend) {
- clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
- lockend, unlock_bits, 1, 0,
- &cached_state);
- } else {
- free_extent_state(cached_state);
+ lockstart = start + bh_result->b_size;
+ if (lockstart < lockend) {
+ clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
+ lockend, unlock_bits, 1, 0,
+ &cached_state);
+ } else {
+ free_extent_state(cached_state);
+ }
}
free_extent_map(em);
@@ -8131,7 +8142,6 @@ static void __endio_write_update_ordered(struct inode *inode,
u64 ordered_offset = offset;
u64 ordered_bytes = bytes;
u64 last_offset;
- int ret;
if (btrfs_is_free_space_inode(BTRFS_I(inode))) {
wq = fs_info->endio_freespace_worker;
@@ -8141,32 +8151,31 @@ static void __endio_write_update_ordered(struct inode *inode,
func = btrfs_endio_write_helper;
}
-again:
- last_offset = ordered_offset;
- ret = btrfs_dec_test_first_ordered_pending(inode, &ordered,
- &ordered_offset,
- ordered_bytes,
- uptodate);
- if (!ret)
- goto out_test;
-
- btrfs_init_work(&ordered->work, func, finish_ordered_fn, NULL, NULL);
- btrfs_queue_work(wq, &ordered->work);
-out_test:
- /*
- * If btrfs_dec_test_ordered_pending does not find any ordered extent
- * in the range, we can exit.
- */
- if (ordered_offset == last_offset)
- return;
- /*
- * our bio might span multiple ordered extents. If we haven't
- * completed the accounting for the whole dio, go back and try again
- */
- if (ordered_offset < offset + bytes) {
- ordered_bytes = offset + bytes - ordered_offset;
- ordered = NULL;
- goto again;
+ while (ordered_offset < offset + bytes) {
+ last_offset = ordered_offset;
+ if (btrfs_dec_test_first_ordered_pending(inode, &ordered,
+ &ordered_offset,
+ ordered_bytes,
+ uptodate)) {
+ btrfs_init_work(&ordered->work, func,
+ finish_ordered_fn,
+ NULL, NULL);
+ btrfs_queue_work(wq, &ordered->work);
+ }
+ /*
+ * If btrfs_dec_test_ordered_pending does not find any ordered
+ * extent in the range, we can exit.
+ */
+ if (ordered_offset == last_offset)
+ return;
+ /*
+ * Our bio might span multiple ordered extents. In this case
+ * we keep goin until we have accounted the whole dio.
+ */
+ if (ordered_offset < offset + bytes) {
+ ordered_bytes = offset + bytes - ordered_offset;
+ ordered = NULL;
+ }
}
}
@@ -8705,29 +8714,19 @@ static int btrfs_writepage(struct page *page, struct writeback_control *wbc)
static int btrfs_writepages(struct address_space *mapping,
struct writeback_control *wbc)
{
- struct extent_io_tree *tree;
-
- tree = &BTRFS_I(mapping->host)->io_tree;
- return extent_writepages(tree, mapping, wbc);
+ return extent_writepages(mapping, wbc);
}
static int
btrfs_readpages(struct file *file, struct address_space *mapping,
struct list_head *pages, unsigned nr_pages)
{
- struct extent_io_tree *tree;
- tree = &BTRFS_I(mapping->host)->io_tree;
- return extent_readpages(tree, mapping, pages, nr_pages);
+ return extent_readpages(mapping, pages, nr_pages);
}
+
static int __btrfs_releasepage(struct page *page, gfp_t gfp_flags)
{
- struct extent_io_tree *tree;
- struct extent_map_tree *map;
- int ret;
-
- tree = &BTRFS_I(page->mapping->host)->io_tree;
- map = &BTRFS_I(page->mapping->host)->extent_tree;
- ret = try_release_extent_mapping(map, tree, page, gfp_flags);
+ int ret = try_release_extent_mapping(page, gfp_flags);
if (ret == 1) {
ClearPagePrivate(page);
set_page_private(page, 0);
@@ -8868,8 +8867,8 @@ again:
*
* We are not allowed to take the i_mutex here so we have to play games to
* protect against truncate races as the page could now be beyond EOF. Because
- * vmtruncate() writes the inode size before removing pages, once we have the
- * page lock we can determine safely if the page is beyond EOF. If it is not
+ * truncate_setsize() writes the inode size before removing pages, once we have
+ * the page lock we can determine safely if the page is beyond EOF. If it is not
* beyond EOF, then the page is guaranteed safe against truncation until we
* unlock the page.
*/
@@ -9031,8 +9030,7 @@ static int btrfs_truncate(struct inode *inode, bool skip_writeback)
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_block_rsv *rsv;
- int ret = 0;
- int err = 0;
+ int ret;
struct btrfs_trans_handle *trans;
u64 mask = fs_info->sectorsize - 1;
u64 min_size = btrfs_calc_trunc_metadata_size(fs_info, 1);
@@ -9045,39 +9043,31 @@ static int btrfs_truncate(struct inode *inode, bool skip_writeback)
}
/*
- * Yes ladies and gentlemen, this is indeed ugly. The fact is we have
- * 3 things going on here
- *
- * 1) We need to reserve space for our orphan item and the space to
- * delete our orphan item. Lord knows we don't want to have a dangling
- * orphan item because we didn't reserve space to remove it.
+ * Yes ladies and gentlemen, this is indeed ugly. We have a couple of
+ * things going on here:
*
- * 2) We need to reserve space to update our inode.
+ * 1) We need to reserve space to update our inode.
*
- * 3) We need to have something to cache all the space that is going to
+ * 2) We need to have something to cache all the space that is going to
* be free'd up by the truncate operation, but also have some slack
* space reserved in case it uses space during the truncate (thank you
* very much snapshotting).
*
- * And we need these to all be separate. The fact is we can use a lot of
+ * And we need these to be separate. The fact is we can use a lot of
* space doing the truncate, and we have no earthly idea how much space
* we will use, so we need the truncate reservation to be separate so it
- * doesn't end up using space reserved for updating the inode or
- * removing the orphan item. We also need to be able to stop the
- * transaction and start a new one, which means we need to be able to
- * update the inode several times, and we have no idea of knowing how
- * many times that will be, so we can't just reserve 1 item for the
- * entirety of the operation, so that has to be done separately as well.
- * Then there is the orphan item, which does indeed need to be held on
- * to for the whole operation, and we need nobody to touch this reserved
- * space except the orphan code.
+ * doesn't end up using space reserved for updating the inode. We also
+ * need to be able to stop the transaction and start a new one, which
+ * means we need to be able to update the inode several times, and we
+ * have no idea of knowing how many times that will be, so we can't just
+ * reserve 1 item for the entirety of the operation, so that has to be
+ * done separately as well.
*
* So that leaves us with
*
- * 1) root->orphan_block_rsv - for the orphan deletion.
- * 2) rsv - for the truncate reservation, which we will steal from the
+ * 1) rsv - for the truncate reservation, which we will steal from the
* transaction reservation.
- * 3) fs_info->trans_block_rsv - this will have 1 items worth left for
+ * 2) fs_info->trans_block_rsv - this will have 1 items worth left for
* updating the inode.
*/
rsv = btrfs_alloc_block_rsv(fs_info, BTRFS_BLOCK_RSV_TEMP);
@@ -9092,7 +9082,7 @@ static int btrfs_truncate(struct inode *inode, bool skip_writeback)
*/
trans = btrfs_start_transaction(root, 2);
if (IS_ERR(trans)) {
- err = PTR_ERR(trans);
+ ret = PTR_ERR(trans);
goto out;
}
@@ -9116,23 +9106,19 @@ static int btrfs_truncate(struct inode *inode, bool skip_writeback)
inode->i_size,
BTRFS_EXTENT_DATA_KEY);
trans->block_rsv = &fs_info->trans_block_rsv;
- if (ret != -ENOSPC && ret != -EAGAIN) {
- err = ret;
+ if (ret != -ENOSPC && ret != -EAGAIN)
break;
- }
ret = btrfs_update_inode(trans, root, inode);
- if (ret) {
- err = ret;
+ if (ret)
break;
- }
btrfs_end_transaction(trans);
btrfs_btree_balance_dirty(fs_info);
trans = btrfs_start_transaction(root, 2);
if (IS_ERR(trans)) {
- ret = err = PTR_ERR(trans);
+ ret = PTR_ERR(trans);
trans = NULL;
break;
}
@@ -9165,29 +9151,23 @@ static int btrfs_truncate(struct inode *inode, bool skip_writeback)
btrfs_ordered_update_i_size(inode, inode->i_size, NULL);
}
- if (ret == 0 && inode->i_nlink > 0) {
- trans->block_rsv = root->orphan_block_rsv;
- ret = btrfs_orphan_del(trans, BTRFS_I(inode));
- if (ret)
- err = ret;
- }
-
if (trans) {
+ int ret2;
+
trans->block_rsv = &fs_info->trans_block_rsv;
- ret = btrfs_update_inode(trans, root, inode);
- if (ret && !err)
- err = ret;
+ ret2 = btrfs_update_inode(trans, root, inode);
+ if (ret2 && !ret)
+ ret = ret2;
- ret = btrfs_end_transaction(trans);
+ ret2 = btrfs_end_transaction(trans);
+ if (ret2 && !ret)
+ ret = ret2;
btrfs_btree_balance_dirty(fs_info);
}
out:
btrfs_free_block_rsv(fs_info, rsv);
- if (ret && !err)
- err = ret;
-
- return err;
+ return ret;
}
/*
@@ -9323,13 +9303,6 @@ void btrfs_destroy_inode(struct inode *inode)
if (!root)
goto free;
- if (test_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
- &BTRFS_I(inode)->runtime_flags)) {
- btrfs_info(fs_info, "inode %llu still on the orphan list",
- btrfs_ino(BTRFS_I(inode)));
- atomic_dec(&root->orphan_inodes);
- }
-
while (1) {
ordered = btrfs_lookup_first_ordered_extent(inode, (u64)-1);
if (!ordered)
@@ -9963,6 +9936,13 @@ static int btrfs_rename2(struct inode *old_dir, struct dentry *old_dentry,
return btrfs_rename(old_dir, old_dentry, new_dir, new_dentry, flags);
}
+struct btrfs_delalloc_work {
+ struct inode *inode;
+ struct completion completion;
+ struct list_head list;
+ struct btrfs_work work;
+};
+
static void btrfs_run_delalloc_work(struct btrfs_work *work)
{
struct btrfs_delalloc_work *delalloc_work;
@@ -9976,15 +9956,11 @@ static void btrfs_run_delalloc_work(struct btrfs_work *work)
&BTRFS_I(inode)->runtime_flags))
filemap_flush(inode->i_mapping);
- if (delalloc_work->delay_iput)
- btrfs_add_delayed_iput(inode);
- else
- iput(inode);
+ iput(inode);
complete(&delalloc_work->completion);
}
-struct btrfs_delalloc_work *btrfs_alloc_delalloc_work(struct inode *inode,
- int delay_iput)
+static struct btrfs_delalloc_work *btrfs_alloc_delalloc_work(struct inode *inode)
{
struct btrfs_delalloc_work *work;
@@ -9995,7 +9971,6 @@ struct btrfs_delalloc_work *btrfs_alloc_delalloc_work(struct inode *inode,
init_completion(&work->completion);
INIT_LIST_HEAD(&work->list);
work->inode = inode;
- work->delay_iput = delay_iput;
WARN_ON_ONCE(!inode);
btrfs_init_work(&work->work, btrfs_flush_delalloc_helper,
btrfs_run_delalloc_work, NULL, NULL);
@@ -10003,18 +9978,11 @@ struct btrfs_delalloc_work *btrfs_alloc_delalloc_work(struct inode *inode,
return work;
}
-void btrfs_wait_and_free_delalloc_work(struct btrfs_delalloc_work *work)
-{
- wait_for_completion(&work->completion);
- kfree(work);
-}
-
/*
* some fairly slow code that needs optimization. This walks the list
* of all the inodes with pending delalloc and forces them to disk.
*/
-static int __start_delalloc_inodes(struct btrfs_root *root, int delay_iput,
- int nr)
+static int start_delalloc_inodes(struct btrfs_root *root, int nr)
{
struct btrfs_inode *binode;
struct inode *inode;
@@ -10042,12 +10010,9 @@ static int __start_delalloc_inodes(struct btrfs_root *root, int delay_iput,
}
spin_unlock(&root->delalloc_lock);
- work = btrfs_alloc_delalloc_work(inode, delay_iput);
+ work = btrfs_alloc_delalloc_work(inode);
if (!work) {
- if (delay_iput)
- btrfs_add_delayed_iput(inode);
- else
- iput(inode);
+ iput(inode);
ret = -ENOMEM;
goto out;
}
@@ -10065,10 +10030,11 @@ static int __start_delalloc_inodes(struct btrfs_root *root, int delay_iput,
out:
list_for_each_entry_safe(work, next, &works, list) {
list_del_init(&work->list);
- btrfs_wait_and_free_delalloc_work(work);
+ wait_for_completion(&work->completion);
+ kfree(work);
}
- if (!list_empty_careful(&splice)) {
+ if (!list_empty(&splice)) {
spin_lock(&root->delalloc_lock);
list_splice_tail(&splice, &root->delalloc_inodes);
spin_unlock(&root->delalloc_lock);
@@ -10077,7 +10043,7 @@ out:
return ret;
}
-int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput)
+int btrfs_start_delalloc_inodes(struct btrfs_root *root)
{
struct btrfs_fs_info *fs_info = root->fs_info;
int ret;
@@ -10085,14 +10051,13 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput)
if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state))
return -EROFS;
- ret = __start_delalloc_inodes(root, delay_iput, -1);
+ ret = start_delalloc_inodes(root, -1);
if (ret > 0)
ret = 0;
return ret;
}
-int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, int delay_iput,
- int nr)
+int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, int nr)
{
struct btrfs_root *root;
struct list_head splice;
@@ -10115,7 +10080,7 @@ int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, int delay_iput,
&fs_info->delalloc_roots);
spin_unlock(&fs_info->delalloc_root_lock);
- ret = __start_delalloc_inodes(root, delay_iput, nr);
+ ret = start_delalloc_inodes(root, nr);
btrfs_put_fs_root(root);
if (ret < 0)
goto out;
@@ -10130,7 +10095,7 @@ int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, int delay_iput,
ret = 0;
out:
- if (!list_empty_careful(&splice)) {
+ if (!list_empty(&splice)) {
spin_lock(&fs_info->delalloc_root_lock);
list_splice_tail(&splice, &fs_info->delalloc_roots);
spin_unlock(&fs_info->delalloc_root_lock);
@@ -10250,8 +10215,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
goto out_unlock_inode;
}
- unlock_new_inode(inode);
- d_instantiate(dentry, inode);
+ d_instantiate_new(dentry, inode);
out_unlock:
btrfs_end_transaction(trans);
@@ -10669,5 +10633,4 @@ static const struct inode_operations btrfs_symlink_inode_operations = {
const struct dentry_operations btrfs_dentry_operations = {
.d_delete = btrfs_dentry_delete,
- .d_release = btrfs_dentry_release,
};
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 632e26d6f7ce..d29992f7dc63 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -93,20 +93,22 @@ static int btrfs_clone(struct inode *src, struct inode *inode,
int no_time_update);
/* Mask out flags that are inappropriate for the given type of inode. */
-static unsigned int btrfs_mask_flags(umode_t mode, unsigned int flags)
+static unsigned int btrfs_mask_fsflags_for_type(struct inode *inode,
+ unsigned int flags)
{
- if (S_ISDIR(mode))
+ if (S_ISDIR(inode->i_mode))
return flags;
- else if (S_ISREG(mode))
+ else if (S_ISREG(inode->i_mode))
return flags & ~FS_DIRSYNC_FL;
else
return flags & (FS_NODUMP_FL | FS_NOATIME_FL);
}
/*
- * Export inode flags to the format expected by the FS_IOC_GETFLAGS ioctl.
+ * Export internal inode flags to the format expected by the FS_IOC_GETFLAGS
+ * ioctl.
*/
-static unsigned int btrfs_flags_to_ioctl(unsigned int flags)
+static unsigned int btrfs_inode_flags_to_fsflags(unsigned int flags)
{
unsigned int iflags = 0;
@@ -136,20 +138,20 @@ static unsigned int btrfs_flags_to_ioctl(unsigned int flags)
/*
* Update inode->i_flags based on the btrfs internal flags.
*/
-void btrfs_update_iflags(struct inode *inode)
+void btrfs_sync_inode_flags_to_i_flags(struct inode *inode)
{
- struct btrfs_inode *ip = BTRFS_I(inode);
+ struct btrfs_inode *binode = BTRFS_I(inode);
unsigned int new_fl = 0;
- if (ip->flags & BTRFS_INODE_SYNC)
+ if (binode->flags & BTRFS_INODE_SYNC)
new_fl |= S_SYNC;
- if (ip->flags & BTRFS_INODE_IMMUTABLE)
+ if (binode->flags & BTRFS_INODE_IMMUTABLE)
new_fl |= S_IMMUTABLE;
- if (ip->flags & BTRFS_INODE_APPEND)
+ if (binode->flags & BTRFS_INODE_APPEND)
new_fl |= S_APPEND;
- if (ip->flags & BTRFS_INODE_NOATIME)
+ if (binode->flags & BTRFS_INODE_NOATIME)
new_fl |= S_NOATIME;
- if (ip->flags & BTRFS_INODE_DIRSYNC)
+ if (binode->flags & BTRFS_INODE_DIRSYNC)
new_fl |= S_DIRSYNC;
set_mask_bits(&inode->i_flags,
@@ -159,15 +161,16 @@ void btrfs_update_iflags(struct inode *inode)
static int btrfs_ioctl_getflags(struct file *file, void __user *arg)
{
- struct btrfs_inode *ip = BTRFS_I(file_inode(file));
- unsigned int flags = btrfs_flags_to_ioctl(ip->flags);
+ struct btrfs_inode *binode = BTRFS_I(file_inode(file));
+ unsigned int flags = btrfs_inode_flags_to_fsflags(binode->flags);
if (copy_to_user(arg, &flags, sizeof(flags)))
return -EFAULT;
return 0;
}
-static int check_flags(unsigned int flags)
+/* Check if @flags are a supported and valid set of FS_*_FL flags */
+static int check_fsflags(unsigned int flags)
{
if (flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL | \
FS_NOATIME_FL | FS_NODUMP_FL | \
@@ -186,13 +189,13 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
{
struct inode *inode = file_inode(file);
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
- struct btrfs_inode *ip = BTRFS_I(inode);
- struct btrfs_root *root = ip->root;
+ struct btrfs_inode *binode = BTRFS_I(inode);
+ struct btrfs_root *root = binode->root;
struct btrfs_trans_handle *trans;
- unsigned int flags, oldflags;
+ unsigned int fsflags, old_fsflags;
int ret;
- u64 ip_oldflags;
- unsigned int i_oldflags;
+ u64 old_flags;
+ unsigned int old_i_flags;
umode_t mode;
if (!inode_owner_or_capable(inode))
@@ -201,10 +204,10 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
if (btrfs_root_readonly(root))
return -EROFS;
- if (copy_from_user(&flags, arg, sizeof(flags)))
+ if (copy_from_user(&fsflags, arg, sizeof(fsflags)))
return -EFAULT;
- ret = check_flags(flags);
+ ret = check_fsflags(fsflags);
if (ret)
return ret;
@@ -214,44 +217,44 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
inode_lock(inode);
- ip_oldflags = ip->flags;
- i_oldflags = inode->i_flags;
+ old_flags = binode->flags;
+ old_i_flags = inode->i_flags;
mode = inode->i_mode;
- flags = btrfs_mask_flags(inode->i_mode, flags);
- oldflags = btrfs_flags_to_ioctl(ip->flags);
- if ((flags ^ oldflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) {
+ fsflags = btrfs_mask_fsflags_for_type(inode, fsflags);
+ old_fsflags = btrfs_inode_flags_to_fsflags(binode->flags);
+ if ((fsflags ^ old_fsflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) {
if (!capable(CAP_LINUX_IMMUTABLE)) {
ret = -EPERM;
goto out_unlock;
}
}
- if (flags & FS_SYNC_FL)
- ip->flags |= BTRFS_INODE_SYNC;
+ if (fsflags & FS_SYNC_FL)
+ binode->flags |= BTRFS_INODE_SYNC;
else
- ip->flags &= ~BTRFS_INODE_SYNC;
- if (flags & FS_IMMUTABLE_FL)
- ip->flags |= BTRFS_INODE_IMMUTABLE;
+ binode->flags &= ~BTRFS_INODE_SYNC;
+ if (fsflags & FS_IMMUTABLE_FL)
+ binode->flags |= BTRFS_INODE_IMMUTABLE;
else
- ip->flags &= ~BTRFS_INODE_IMMUTABLE;
- if (flags & FS_APPEND_FL)
- ip->flags |= BTRFS_INODE_APPEND;
+ binode->flags &= ~BTRFS_INODE_IMMUTABLE;
+ if (fsflags & FS_APPEND_FL)
+ binode->flags |= BTRFS_INODE_APPEND;
else
- ip->flags &= ~BTRFS_INODE_APPEND;
- if (flags & FS_NODUMP_FL)
- ip->flags |= BTRFS_INODE_NODUMP;
+ binode->flags &= ~BTRFS_INODE_APPEND;
+ if (fsflags & FS_NODUMP_FL)
+ binode->flags |= BTRFS_INODE_NODUMP;
else
- ip->flags &= ~BTRFS_INODE_NODUMP;
- if (flags & FS_NOATIME_FL)
- ip->flags |= BTRFS_INODE_NOATIME;
+ binode->flags &= ~BTRFS_INODE_NODUMP;
+ if (fsflags & FS_NOATIME_FL)
+ binode->flags |= BTRFS_INODE_NOATIME;
else
- ip->flags &= ~BTRFS_INODE_NOATIME;
- if (flags & FS_DIRSYNC_FL)
- ip->flags |= BTRFS_INODE_DIRSYNC;
+ binode->flags &= ~BTRFS_INODE_NOATIME;
+ if (fsflags & FS_DIRSYNC_FL)
+ binode->flags |= BTRFS_INODE_DIRSYNC;
else
- ip->flags &= ~BTRFS_INODE_DIRSYNC;
- if (flags & FS_NOCOW_FL) {
+ binode->flags &= ~BTRFS_INODE_DIRSYNC;
+ if (fsflags & FS_NOCOW_FL) {
if (S_ISREG(mode)) {
/*
* It's safe to turn csums off here, no extents exist.
@@ -259,10 +262,10 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
* status of the file and will not set it.
*/
if (inode->i_size == 0)
- ip->flags |= BTRFS_INODE_NODATACOW
- | BTRFS_INODE_NODATASUM;
+ binode->flags |= BTRFS_INODE_NODATACOW
+ | BTRFS_INODE_NODATASUM;
} else {
- ip->flags |= BTRFS_INODE_NODATACOW;
+ binode->flags |= BTRFS_INODE_NODATACOW;
}
} else {
/*
@@ -270,10 +273,10 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
*/
if (S_ISREG(mode)) {
if (inode->i_size == 0)
- ip->flags &= ~(BTRFS_INODE_NODATACOW
+ binode->flags &= ~(BTRFS_INODE_NODATACOW
| BTRFS_INODE_NODATASUM);
} else {
- ip->flags &= ~BTRFS_INODE_NODATACOW;
+ binode->flags &= ~BTRFS_INODE_NODATACOW;
}
}
@@ -282,18 +285,18 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
* flag may be changed automatically if compression code won't make
* things smaller.
*/
- if (flags & FS_NOCOMP_FL) {
- ip->flags &= ~BTRFS_INODE_COMPRESS;
- ip->flags |= BTRFS_INODE_NOCOMPRESS;
+ if (fsflags & FS_NOCOMP_FL) {
+ binode->flags &= ~BTRFS_INODE_COMPRESS;
+ binode->flags |= BTRFS_INODE_NOCOMPRESS;
ret = btrfs_set_prop(inode, "btrfs.compression", NULL, 0, 0);
if (ret && ret != -ENODATA)
goto out_drop;
- } else if (flags & FS_COMPR_FL) {
+ } else if (fsflags & FS_COMPR_FL) {
const char *comp;
- ip->flags |= BTRFS_INODE_COMPRESS;
- ip->flags &= ~BTRFS_INODE_NOCOMPRESS;
+ binode->flags |= BTRFS_INODE_COMPRESS;
+ binode->flags &= ~BTRFS_INODE_NOCOMPRESS;
comp = btrfs_compress_type2str(fs_info->compress_type);
if (!comp || comp[0] == 0)
@@ -308,7 +311,7 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
ret = btrfs_set_prop(inode, "btrfs.compression", NULL, 0, 0);
if (ret && ret != -ENODATA)
goto out_drop;
- ip->flags &= ~(BTRFS_INODE_COMPRESS | BTRFS_INODE_NOCOMPRESS);
+ binode->flags &= ~(BTRFS_INODE_COMPRESS | BTRFS_INODE_NOCOMPRESS);
}
trans = btrfs_start_transaction(root, 1);
@@ -317,7 +320,7 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
goto out_drop;
}
- btrfs_update_iflags(inode);
+ btrfs_sync_inode_flags_to_i_flags(inode);
inode_inc_iversion(inode);
inode->i_ctime = current_time(inode);
ret = btrfs_update_inode(trans, root, inode);
@@ -325,8 +328,8 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
btrfs_end_transaction(trans);
out_drop:
if (ret) {
- ip->flags = ip_oldflags;
- inode->i_flags = i_oldflags;
+ binode->flags = old_flags;
+ inode->i_flags = old_i_flags;
}
out_unlock:
@@ -335,6 +338,148 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
return ret;
}
+/*
+ * Translate btrfs internal inode flags to xflags as expected by the
+ * FS_IOC_FSGETXATT ioctl. Filter only the supported ones, unknown flags are
+ * silently dropped.
+ */
+static unsigned int btrfs_inode_flags_to_xflags(unsigned int flags)
+{
+ unsigned int xflags = 0;
+
+ if (flags & BTRFS_INODE_APPEND)
+ xflags |= FS_XFLAG_APPEND;
+ if (flags & BTRFS_INODE_IMMUTABLE)
+ xflags |= FS_XFLAG_IMMUTABLE;
+ if (flags & BTRFS_INODE_NOATIME)
+ xflags |= FS_XFLAG_NOATIME;
+ if (flags & BTRFS_INODE_NODUMP)
+ xflags |= FS_XFLAG_NODUMP;
+ if (flags & BTRFS_INODE_SYNC)
+ xflags |= FS_XFLAG_SYNC;
+
+ return xflags;
+}
+
+/* Check if @flags are a supported and valid set of FS_XFLAGS_* flags */
+static int check_xflags(unsigned int flags)
+{
+ if (flags & ~(FS_XFLAG_APPEND | FS_XFLAG_IMMUTABLE | FS_XFLAG_NOATIME |
+ FS_XFLAG_NODUMP | FS_XFLAG_SYNC))
+ return -EOPNOTSUPP;
+ return 0;
+}
+
+/*
+ * Set the xflags from the internal inode flags. The remaining items of fsxattr
+ * are zeroed.
+ */
+static int btrfs_ioctl_fsgetxattr(struct file *file, void __user *arg)
+{
+ struct btrfs_inode *binode = BTRFS_I(file_inode(file));
+ struct fsxattr fa;
+
+ memset(&fa, 0, sizeof(fa));
+ fa.fsx_xflags = btrfs_inode_flags_to_xflags(binode->flags);
+
+ if (copy_to_user(arg, &fa, sizeof(fa)))
+ return -EFAULT;
+
+ return 0;
+}
+
+static int btrfs_ioctl_fssetxattr(struct file *file, void __user *arg)
+{
+ struct inode *inode = file_inode(file);
+ struct btrfs_inode *binode = BTRFS_I(inode);
+ struct btrfs_root *root = binode->root;
+ struct btrfs_trans_handle *trans;
+ struct fsxattr fa;
+ unsigned old_flags;
+ unsigned old_i_flags;
+ int ret = 0;
+
+ if (!inode_owner_or_capable(inode))
+ return -EPERM;
+
+ if (btrfs_root_readonly(root))
+ return -EROFS;
+
+ memset(&fa, 0, sizeof(fa));
+ if (copy_from_user(&fa, arg, sizeof(fa)))
+ return -EFAULT;
+
+ ret = check_xflags(fa.fsx_xflags);
+ if (ret)
+ return ret;
+
+ if (fa.fsx_extsize != 0 || fa.fsx_projid != 0 || fa.fsx_cowextsize != 0)
+ return -EOPNOTSUPP;
+
+ ret = mnt_want_write_file(file);
+ if (ret)
+ return ret;
+
+ inode_lock(inode);
+
+ old_flags = binode->flags;
+ old_i_flags = inode->i_flags;
+
+ /* We need the capabilities to change append-only or immutable inode */
+ if (((old_flags & (BTRFS_INODE_APPEND | BTRFS_INODE_IMMUTABLE)) ||
+ (fa.fsx_xflags & (FS_XFLAG_APPEND | FS_XFLAG_IMMUTABLE))) &&
+ !capable(CAP_LINUX_IMMUTABLE)) {
+ ret = -EPERM;
+ goto out_unlock;
+ }
+
+ if (fa.fsx_xflags & FS_XFLAG_SYNC)
+ binode->flags |= BTRFS_INODE_SYNC;
+ else
+ binode->flags &= ~BTRFS_INODE_SYNC;
+ if (fa.fsx_xflags & FS_XFLAG_IMMUTABLE)
+ binode->flags |= BTRFS_INODE_IMMUTABLE;
+ else
+ binode->flags &= ~BTRFS_INODE_IMMUTABLE;
+ if (fa.fsx_xflags & FS_XFLAG_APPEND)
+ binode->flags |= BTRFS_INODE_APPEND;
+ else
+ binode->flags &= ~BTRFS_INODE_APPEND;
+ if (fa.fsx_xflags & FS_XFLAG_NODUMP)
+ binode->flags |= BTRFS_INODE_NODUMP;
+ else
+ binode->flags &= ~BTRFS_INODE_NODUMP;
+ if (fa.fsx_xflags & FS_XFLAG_NOATIME)
+ binode->flags |= BTRFS_INODE_NOATIME;
+ else
+ binode->flags &= ~BTRFS_INODE_NOATIME;
+
+ /* 1 item for the inode */
+ trans = btrfs_start_transaction(root, 1);
+ if (IS_ERR(trans)) {
+ ret = PTR_ERR(trans);
+ goto out_unlock;
+ }
+
+ btrfs_sync_inode_flags_to_i_flags(inode);
+ inode_inc_iversion(inode);
+ inode->i_ctime = current_time(inode);
+ ret = btrfs_update_inode(trans, root, inode);
+
+ btrfs_end_transaction(trans);
+
+out_unlock:
+ if (ret) {
+ binode->flags = old_flags;
+ inode->i_flags = old_i_flags;
+ }
+
+ inode_unlock(inode);
+ mnt_drop_write_file(file);
+
+ return ret;
+}
+
static int btrfs_ioctl_getversion(struct file *file, int __user *arg)
{
struct inode *inode = file_inode(file);
@@ -424,7 +569,6 @@ static noinline int create_subvol(struct inode *dir,
u64 objectid;
u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID;
u64 index = 0;
- u64 qgroup_reserved;
uuid_le new_uuid;
root_item = kzalloc(sizeof(*root_item), GFP_KERNEL);
@@ -449,8 +593,7 @@ static noinline int create_subvol(struct inode *dir,
* The same as the snapshot creation, please see the comment
* of create_snapshot().
*/
- ret = btrfs_subvolume_reserve_metadata(root, &block_rsv,
- 8, &qgroup_reserved, false);
+ ret = btrfs_subvolume_reserve_metadata(root, &block_rsv, 8, false);
if (ret)
goto fail_free;
@@ -573,7 +716,7 @@ static noinline int create_subvol(struct inode *dir,
btrfs_ino(BTRFS_I(dir)), index, name, namelen);
BUG_ON(ret);
- ret = btrfs_uuid_tree_add(trans, fs_info, root_item->uuid,
+ ret = btrfs_uuid_tree_add(trans, root_item->uuid,
BTRFS_UUID_KEY_SUBVOL, objectid);
if (ret)
btrfs_abort_transaction(trans, ret);
@@ -640,7 +783,7 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
wait_event(root->subv_writers->wait,
percpu_counter_sum(&root->subv_writers->counter) == 0);
- ret = btrfs_start_delalloc_inodes(root, 0);
+ ret = btrfs_start_delalloc_inodes(root);
if (ret)
goto dec_and_free;
@@ -658,7 +801,6 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
*/
ret = btrfs_subvolume_reserve_metadata(BTRFS_I(dir)->root,
&pending_snapshot->block_rsv, 8,
- &pending_snapshot->qgroup_reserved,
false);
if (ret)
goto dec_and_free;
@@ -1457,7 +1599,6 @@ static noinline int btrfs_ioctl_resize(struct file *file,
return BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS;
}
- mutex_lock(&fs_info->volume_mutex);
vol_args = memdup_user(arg, sizeof(*vol_args));
if (IS_ERR(vol_args)) {
ret = PTR_ERR(vol_args);
@@ -1565,7 +1706,6 @@ static noinline int btrfs_ioctl_resize(struct file *file,
out_free:
kfree(vol_args);
out:
- mutex_unlock(&fs_info->volume_mutex);
clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
mnt_drop_write_file(file);
return ret;
@@ -1832,60 +1972,6 @@ out:
return ret;
}
-/*
- * helper to check if the subvolume references other subvolumes
- */
-static noinline int may_destroy_subvol(struct btrfs_root *root)
-{
- struct btrfs_fs_info *fs_info = root->fs_info;
- struct btrfs_path *path;
- struct btrfs_dir_item *di;
- struct btrfs_key key;
- u64 dir_id;
- int ret;
-
- path = btrfs_alloc_path();
- if (!path)
- return -ENOMEM;
-
- /* Make sure this root isn't set as the default subvol */
- dir_id = btrfs_super_root_dir(fs_info->super_copy);
- di = btrfs_lookup_dir_item(NULL, fs_info->tree_root, path,
- dir_id, "default", 7, 0);
- if (di && !IS_ERR(di)) {
- btrfs_dir_item_key_to_cpu(path->nodes[0], di, &key);
- if (key.objectid == root->root_key.objectid) {
- ret = -EPERM;
- btrfs_err(fs_info,
- "deleting default subvolume %llu is not allowed",
- key.objectid);
- goto out;
- }
- btrfs_release_path(path);
- }
-
- key.objectid = root->root_key.objectid;
- key.type = BTRFS_ROOT_REF_KEY;
- key.offset = (u64)-1;
-
- ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, path, 0, 0);
- if (ret < 0)
- goto out;
- BUG_ON(ret == 0);
-
- ret = 0;
- if (path->slots[0] > 0) {
- path->slots[0]--;
- btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
- if (key.objectid == root->root_key.objectid &&
- key.type == BTRFS_ROOT_REF_KEY)
- ret = -ENOTEMPTY;
- }
-out:
- btrfs_free_path(path);
- return ret;
-}
-
static noinline int key_in_sk(struct btrfs_key *key,
struct btrfs_ioctl_search_key *sk)
{
@@ -2066,7 +2152,7 @@ static noinline int search_ioctl(struct inode *inode,
root = btrfs_read_fs_root_no_name(info, &key);
if (IS_ERR(root)) {
btrfs_free_path(path);
- return -ENOENT;
+ return PTR_ERR(root);
}
}
@@ -2200,8 +2286,7 @@ static noinline int btrfs_search_path_in_tree(struct btrfs_fs_info *info,
key.offset = (u64)-1;
root = btrfs_read_fs_root_no_name(info, &key);
if (IS_ERR(root)) {
- btrfs_err(info, "could not find root %llu", tree_id);
- ret = -ENOENT;
+ ret = PTR_ERR(root);
goto out;
}
@@ -2256,6 +2341,165 @@ out:
return ret;
}
+static int btrfs_search_path_in_tree_user(struct inode *inode,
+ struct btrfs_ioctl_ino_lookup_user_args *args)
+{
+ struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
+ struct super_block *sb = inode->i_sb;
+ struct btrfs_key upper_limit = BTRFS_I(inode)->location;
+ u64 treeid = BTRFS_I(inode)->root->root_key.objectid;
+ u64 dirid = args->dirid;
+ unsigned long item_off;
+ unsigned long item_len;
+ struct btrfs_inode_ref *iref;
+ struct btrfs_root_ref *rref;
+ struct btrfs_root *root;
+ struct btrfs_path *path;
+ struct btrfs_key key, key2;
+ struct extent_buffer *leaf;
+ struct inode *temp_inode;
+ char *ptr;
+ int slot;
+ int len;
+ int total_len = 0;
+ int ret;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ /*
+ * If the bottom subvolume does not exist directly under upper_limit,
+ * construct the path in from the bottom up.
+ */
+ if (dirid != upper_limit.objectid) {
+ ptr = &args->path[BTRFS_INO_LOOKUP_USER_PATH_MAX - 1];
+
+ key.objectid = treeid;
+ key.type = BTRFS_ROOT_ITEM_KEY;
+ key.offset = (u64)-1;
+ root = btrfs_read_fs_root_no_name(fs_info, &key);
+ if (IS_ERR(root)) {
+ ret = PTR_ERR(root);
+ goto out;
+ }
+
+ key.objectid = dirid;
+ key.type = BTRFS_INODE_REF_KEY;
+ key.offset = (u64)-1;
+ while (1) {
+ ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+ if (ret < 0) {
+ goto out;
+ } else if (ret > 0) {
+ ret = btrfs_previous_item(root, path, dirid,
+ BTRFS_INODE_REF_KEY);
+ if (ret < 0) {
+ goto out;
+ } else if (ret > 0) {
+ ret = -ENOENT;
+ goto out;
+ }
+ }
+
+ leaf = path->nodes[0];
+ slot = path->slots[0];
+ btrfs_item_key_to_cpu(leaf, &key, slot);
+
+ iref = btrfs_item_ptr(leaf, slot, struct btrfs_inode_ref);
+ len = btrfs_inode_ref_name_len(leaf, iref);
+ ptr -= len + 1;
+ total_len += len + 1;
+ if (ptr < args->path) {
+ ret = -ENAMETOOLONG;
+ goto out;
+ }
+
+ *(ptr + len) = '/';
+ read_extent_buffer(leaf, ptr,
+ (unsigned long)(iref + 1), len);
+
+ /* Check the read+exec permission of this directory */
+ ret = btrfs_previous_item(root, path, dirid,
+ BTRFS_INODE_ITEM_KEY);
+ if (ret < 0) {
+ goto out;
+ } else if (ret > 0) {
+ ret = -ENOENT;
+ goto out;
+ }
+
+ leaf = path->nodes[0];
+ slot = path->slots[0];
+ btrfs_item_key_to_cpu(leaf, &key2, slot);
+ if (key2.objectid != dirid) {
+ ret = -ENOENT;
+ goto out;
+ }
+
+ temp_inode = btrfs_iget(sb, &key2, root, NULL);
+ ret = inode_permission(temp_inode, MAY_READ | MAY_EXEC);
+ iput(temp_inode);
+ if (ret) {
+ ret = -EACCES;
+ goto out;
+ }
+
+ if (key.offset == upper_limit.objectid)
+ break;
+ if (key.objectid == BTRFS_FIRST_FREE_OBJECTID) {
+ ret = -EACCES;
+ goto out;
+ }
+
+ btrfs_release_path(path);
+ key.objectid = key.offset;
+ key.offset = (u64)-1;
+ dirid = key.objectid;
+ }
+
+ memmove(args->path, ptr, total_len);
+ args->path[total_len] = '\0';
+ btrfs_release_path(path);
+ }
+
+ /* Get the bottom subvolume's name from ROOT_REF */
+ root = fs_info->tree_root;
+ key.objectid = treeid;
+ key.type = BTRFS_ROOT_REF_KEY;
+ key.offset = args->treeid;
+ ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+ if (ret < 0) {
+ goto out;
+ } else if (ret > 0) {
+ ret = -ENOENT;
+ goto out;
+ }
+
+ leaf = path->nodes[0];
+ slot = path->slots[0];
+ btrfs_item_key_to_cpu(leaf, &key, slot);
+
+ item_off = btrfs_item_ptr_offset(leaf, slot);
+ item_len = btrfs_item_size_nr(leaf, slot);
+ /* Check if dirid in ROOT_REF corresponds to passed dirid */
+ rref = btrfs_item_ptr(leaf, slot, struct btrfs_root_ref);
+ if (args->dirid != btrfs_root_ref_dirid(leaf, rref)) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ /* Copy subvolume's name */
+ item_off += sizeof(struct btrfs_root_ref);
+ item_len -= sizeof(struct btrfs_root_ref);
+ read_extent_buffer(leaf, args->name, item_off, item_len);
+ args->name[item_len] = 0;
+
+out:
+ btrfs_free_path(path);
+ return ret;
+}
+
static noinline int btrfs_ioctl_ino_lookup(struct file *file,
void __user *argp)
{
@@ -2298,6 +2542,265 @@ out:
return ret;
}
+/*
+ * Version of ino_lookup ioctl (unprivileged)
+ *
+ * The main differences from ino_lookup ioctl are:
+ *
+ * 1. Read + Exec permission will be checked using inode_permission() during
+ * path construction. -EACCES will be returned in case of failure.
+ * 2. Path construction will be stopped at the inode number which corresponds
+ * to the fd with which this ioctl is called. If constructed path does not
+ * exist under fd's inode, -EACCES will be returned.
+ * 3. The name of bottom subvolume is also searched and filled.
+ */
+static int btrfs_ioctl_ino_lookup_user(struct file *file, void __user *argp)
+{
+ struct btrfs_ioctl_ino_lookup_user_args *args;
+ struct inode *inode;
+ int ret;
+
+ args = memdup_user(argp, sizeof(*args));
+ if (IS_ERR(args))
+ return PTR_ERR(args);
+
+ inode = file_inode(file);
+
+ if (args->dirid == BTRFS_FIRST_FREE_OBJECTID &&
+ BTRFS_I(inode)->location.objectid != BTRFS_FIRST_FREE_OBJECTID) {
+ /*
+ * The subvolume does not exist under fd with which this is
+ * called
+ */
+ kfree(args);
+ return -EACCES;
+ }
+
+ ret = btrfs_search_path_in_tree_user(inode, args);
+
+ if (ret == 0 && copy_to_user(argp, args, sizeof(*args)))
+ ret = -EFAULT;
+
+ kfree(args);
+ return ret;
+}
+
+/* Get the subvolume information in BTRFS_ROOT_ITEM and BTRFS_ROOT_BACKREF */
+static int btrfs_ioctl_get_subvol_info(struct file *file, void __user *argp)
+{
+ struct btrfs_ioctl_get_subvol_info_args *subvol_info;
+ struct btrfs_fs_info *fs_info;
+ struct btrfs_root *root;
+ struct btrfs_path *path;
+ struct btrfs_key key;
+ struct btrfs_root_item *root_item;
+ struct btrfs_root_ref *rref;
+ struct extent_buffer *leaf;
+ unsigned long item_off;
+ unsigned long item_len;
+ struct inode *inode;
+ int slot;
+ int ret = 0;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ subvol_info = kzalloc(sizeof(*subvol_info), GFP_KERNEL);
+ if (!subvol_info) {
+ btrfs_free_path(path);
+ return -ENOMEM;
+ }
+
+ inode = file_inode(file);
+ fs_info = BTRFS_I(inode)->root->fs_info;
+
+ /* Get root_item of inode's subvolume */
+ key.objectid = BTRFS_I(inode)->root->root_key.objectid;
+ key.type = BTRFS_ROOT_ITEM_KEY;
+ key.offset = (u64)-1;
+ root = btrfs_read_fs_root_no_name(fs_info, &key);
+ if (IS_ERR(root)) {
+ ret = PTR_ERR(root);
+ goto out;
+ }
+ root_item = &root->root_item;
+
+ subvol_info->treeid = key.objectid;
+
+ subvol_info->generation = btrfs_root_generation(root_item);
+ subvol_info->flags = btrfs_root_flags(root_item);
+
+ memcpy(subvol_info->uuid, root_item->uuid, BTRFS_UUID_SIZE);
+ memcpy(subvol_info->parent_uuid, root_item->parent_uuid,
+ BTRFS_UUID_SIZE);
+ memcpy(subvol_info->received_uuid, root_item->received_uuid,
+ BTRFS_UUID_SIZE);
+
+ subvol_info->ctransid = btrfs_root_ctransid(root_item);
+ subvol_info->ctime.sec = btrfs_stack_timespec_sec(&root_item->ctime);
+ subvol_info->ctime.nsec = btrfs_stack_timespec_nsec(&root_item->ctime);
+
+ subvol_info->otransid = btrfs_root_otransid(root_item);
+ subvol_info->otime.sec = btrfs_stack_timespec_sec(&root_item->otime);
+ subvol_info->otime.nsec = btrfs_stack_timespec_nsec(&root_item->otime);
+
+ subvol_info->stransid = btrfs_root_stransid(root_item);
+ subvol_info->stime.sec = btrfs_stack_timespec_sec(&root_item->stime);
+ subvol_info->stime.nsec = btrfs_stack_timespec_nsec(&root_item->stime);
+
+ subvol_info->rtransid = btrfs_root_rtransid(root_item);
+ subvol_info->rtime.sec = btrfs_stack_timespec_sec(&root_item->rtime);
+ subvol_info->rtime.nsec = btrfs_stack_timespec_nsec(&root_item->rtime);
+
+ if (key.objectid != BTRFS_FS_TREE_OBJECTID) {
+ /* Search root tree for ROOT_BACKREF of this subvolume */
+ root = fs_info->tree_root;
+
+ key.type = BTRFS_ROOT_BACKREF_KEY;
+ key.offset = 0;
+ ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+ if (ret < 0) {
+ goto out;
+ } else if (path->slots[0] >=
+ btrfs_header_nritems(path->nodes[0])) {
+ ret = btrfs_next_leaf(root, path);
+ if (ret < 0) {
+ goto out;
+ } else if (ret > 0) {
+ ret = -EUCLEAN;
+ goto out;
+ }
+ }
+
+ leaf = path->nodes[0];
+ slot = path->slots[0];
+ btrfs_item_key_to_cpu(leaf, &key, slot);
+ if (key.objectid == subvol_info->treeid &&
+ key.type == BTRFS_ROOT_BACKREF_KEY) {
+ subvol_info->parent_id = key.offset;
+
+ rref = btrfs_item_ptr(leaf, slot, struct btrfs_root_ref);
+ subvol_info->dirid = btrfs_root_ref_dirid(leaf, rref);
+
+ item_off = btrfs_item_ptr_offset(leaf, slot)
+ + sizeof(struct btrfs_root_ref);
+ item_len = btrfs_item_size_nr(leaf, slot)
+ - sizeof(struct btrfs_root_ref);
+ read_extent_buffer(leaf, subvol_info->name,
+ item_off, item_len);
+ } else {
+ ret = -ENOENT;
+ goto out;
+ }
+ }
+
+ if (copy_to_user(argp, subvol_info, sizeof(*subvol_info)))
+ ret = -EFAULT;
+
+out:
+ btrfs_free_path(path);
+ kzfree(subvol_info);
+ return ret;
+}
+
+/*
+ * Return ROOT_REF information of the subvolume containing this inode
+ * except the subvolume name.
+ */
+static int btrfs_ioctl_get_subvol_rootref(struct file *file, void __user *argp)
+{
+ struct btrfs_ioctl_get_subvol_rootref_args *rootrefs;
+ struct btrfs_root_ref *rref;
+ struct btrfs_root *root;
+ struct btrfs_path *path;
+ struct btrfs_key key;
+ struct extent_buffer *leaf;
+ struct inode *inode;
+ u64 objectid;
+ int slot;
+ int ret;
+ u8 found;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ rootrefs = memdup_user(argp, sizeof(*rootrefs));
+ if (IS_ERR(rootrefs)) {
+ btrfs_free_path(path);
+ return PTR_ERR(rootrefs);
+ }
+
+ inode = file_inode(file);
+ root = BTRFS_I(inode)->root->fs_info->tree_root;
+ objectid = BTRFS_I(inode)->root->root_key.objectid;
+
+ key.objectid = objectid;
+ key.type = BTRFS_ROOT_REF_KEY;
+ key.offset = rootrefs->min_treeid;
+ found = 0;
+
+ ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+ if (ret < 0) {
+ goto out;
+ } else if (path->slots[0] >=
+ btrfs_header_nritems(path->nodes[0])) {
+ ret = btrfs_next_leaf(root, path);
+ if (ret < 0) {
+ goto out;
+ } else if (ret > 0) {
+ ret = -EUCLEAN;
+ goto out;
+ }
+ }
+ while (1) {
+ leaf = path->nodes[0];
+ slot = path->slots[0];
+
+ btrfs_item_key_to_cpu(leaf, &key, slot);
+ if (key.objectid != objectid || key.type != BTRFS_ROOT_REF_KEY) {
+ ret = 0;
+ goto out;
+ }
+
+ if (found == BTRFS_MAX_ROOTREF_BUFFER_NUM) {
+ ret = -EOVERFLOW;
+ goto out;
+ }
+
+ rref = btrfs_item_ptr(leaf, slot, struct btrfs_root_ref);
+ rootrefs->rootref[found].treeid = key.offset;
+ rootrefs->rootref[found].dirid =
+ btrfs_root_ref_dirid(leaf, rref);
+ found++;
+
+ ret = btrfs_next_item(root, path);
+ if (ret < 0) {
+ goto out;
+ } else if (ret > 0) {
+ ret = -EUCLEAN;
+ goto out;
+ }
+ }
+
+out:
+ if (!ret || ret == -EOVERFLOW) {
+ rootrefs->num_items = found;
+ /* update min_treeid for next search */
+ if (found)
+ rootrefs->min_treeid =
+ rootrefs->rootref[found - 1].treeid + 1;
+ if (copy_to_user(argp, rootrefs, sizeof(*rootrefs)))
+ ret = -EFAULT;
+ }
+
+ kfree(rootrefs);
+ btrfs_free_path(path);
+
+ return ret;
+}
+
static noinline int btrfs_ioctl_snap_destroy(struct file *file,
void __user *arg)
{
@@ -2309,12 +2812,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
struct btrfs_root *root = BTRFS_I(dir)->root;
struct btrfs_root *dest = NULL;
struct btrfs_ioctl_vol_args *vol_args;
- struct btrfs_trans_handle *trans;
- struct btrfs_block_rsv block_rsv;
- u64 root_flags;
- u64 qgroup_reserved;
int namelen;
- int ret;
int err = 0;
if (!S_ISDIR(dir->i_mode))
@@ -2398,133 +2896,11 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
}
inode_lock(inode);
-
- /*
- * Don't allow to delete a subvolume with send in progress. This is
- * inside the i_mutex so the error handling that has to drop the bit
- * again is not run concurrently.
- */
- spin_lock(&dest->root_item_lock);
- root_flags = btrfs_root_flags(&dest->root_item);
- if (dest->send_in_progress == 0) {
- btrfs_set_root_flags(&dest->root_item,
- root_flags | BTRFS_ROOT_SUBVOL_DEAD);
- spin_unlock(&dest->root_item_lock);
- } else {
- spin_unlock(&dest->root_item_lock);
- btrfs_warn(fs_info,
- "Attempt to delete subvolume %llu during send",
- dest->root_key.objectid);
- err = -EPERM;
- goto out_unlock_inode;
- }
-
- down_write(&fs_info->subvol_sem);
-
- err = may_destroy_subvol(dest);
- if (err)
- goto out_up_write;
-
- btrfs_init_block_rsv(&block_rsv, BTRFS_BLOCK_RSV_TEMP);
- /*
- * One for dir inode, two for dir entries, two for root
- * ref/backref.
- */
- err = btrfs_subvolume_reserve_metadata(root, &block_rsv,
- 5, &qgroup_reserved, true);
- if (err)
- goto out_up_write;
-
- trans = btrfs_start_transaction(root, 0);
- if (IS_ERR(trans)) {
- err = PTR_ERR(trans);
- goto out_release;
- }
- trans->block_rsv = &block_rsv;
- trans->bytes_reserved = block_rsv.size;
-
- btrfs_record_snapshot_destroy(trans, BTRFS_I(dir));
-
- ret = btrfs_unlink_subvol(trans, root, dir,
- dest->root_key.objectid,
- dentry->d_name.name,
- dentry->d_name.len);
- if (ret) {
- err = ret;
- btrfs_abort_transaction(trans, ret);
- goto out_end_trans;
- }
-
- btrfs_record_root_in_trans(trans, dest);
-
- memset(&dest->root_item.drop_progress, 0,
- sizeof(dest->root_item.drop_progress));
- dest->root_item.drop_level = 0;
- btrfs_set_root_refs(&dest->root_item, 0);
-
- if (!test_and_set_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED, &dest->state)) {
- ret = btrfs_insert_orphan_item(trans,
- fs_info->tree_root,
- dest->root_key.objectid);
- if (ret) {
- btrfs_abort_transaction(trans, ret);
- err = ret;
- goto out_end_trans;
- }
- }
-
- ret = btrfs_uuid_tree_rem(trans, fs_info, dest->root_item.uuid,
- BTRFS_UUID_KEY_SUBVOL,
- dest->root_key.objectid);
- if (ret && ret != -ENOENT) {
- btrfs_abort_transaction(trans, ret);
- err = ret;
- goto out_end_trans;
- }
- if (!btrfs_is_empty_uuid(dest->root_item.received_uuid)) {
- ret = btrfs_uuid_tree_rem(trans, fs_info,
- dest->root_item.received_uuid,
- BTRFS_UUID_KEY_RECEIVED_SUBVOL,
- dest->root_key.objectid);
- if (ret && ret != -ENOENT) {
- btrfs_abort_transaction(trans, ret);
- err = ret;
- goto out_end_trans;
- }
- }
-
-out_end_trans:
- trans->block_rsv = NULL;
- trans->bytes_reserved = 0;
- ret = btrfs_end_transaction(trans);
- if (ret && !err)
- err = ret;
- inode->i_flags |= S_DEAD;
-out_release:
- btrfs_subvolume_release_metadata(fs_info, &block_rsv);
-out_up_write:
- up_write(&fs_info->subvol_sem);
- if (err) {
- spin_lock(&dest->root_item_lock);
- root_flags = btrfs_root_flags(&dest->root_item);
- btrfs_set_root_flags(&dest->root_item,
- root_flags & ~BTRFS_ROOT_SUBVOL_DEAD);
- spin_unlock(&dest->root_item_lock);
- }
-out_unlock_inode:
+ err = btrfs_delete_subvolume(dir, dentry);
inode_unlock(inode);
- if (!err) {
- d_invalidate(dentry);
- btrfs_invalidate_inodes(dest);
+ if (!err)
d_delete(dentry);
- ASSERT(dest->send_in_progress == 0);
- /* the last ref */
- if (dest->ino_cache_inode) {
- iput(dest->ino_cache_inode);
- dest->ino_cache_inode = NULL;
- }
- }
out_dput:
dput(dentry);
out_unlock_dir:
@@ -2613,7 +2989,6 @@ static long btrfs_ioctl_add_dev(struct btrfs_fs_info *fs_info, void __user *arg)
if (test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags))
return BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS;
- mutex_lock(&fs_info->volume_mutex);
vol_args = memdup_user(arg, sizeof(*vol_args));
if (IS_ERR(vol_args)) {
ret = PTR_ERR(vol_args);
@@ -2628,7 +3003,6 @@ static long btrfs_ioctl_add_dev(struct btrfs_fs_info *fs_info, void __user *arg)
kfree(vol_args);
out:
- mutex_unlock(&fs_info->volume_mutex);
clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
return ret;
}
@@ -2654,8 +3028,10 @@ static long btrfs_ioctl_rm_dev_v2(struct file *file, void __user *arg)
}
/* Check for compatibility reject unknown flags */
- if (vol_args->flags & ~BTRFS_VOL_ARG_V2_FLAGS_SUPPORTED)
- return -EOPNOTSUPP;
+ if (vol_args->flags & ~BTRFS_VOL_ARG_V2_FLAGS_SUPPORTED) {
+ ret = -EOPNOTSUPP;
+ goto out;
+ }
if (test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags)) {
ret = BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS;
@@ -2954,8 +3330,6 @@ static void btrfs_cmp_data_free(struct cmp_pages *cmp)
put_page(pg);
}
}
- kfree(cmp->src_pages);
- kfree(cmp->dst_pages);
}
static int btrfs_cmp_data_prepare(struct inode *src, u64 loff,
@@ -2964,40 +3338,14 @@ static int btrfs_cmp_data_prepare(struct inode *src, u64 loff,
{
int ret;
int num_pages = PAGE_ALIGN(len) >> PAGE_SHIFT;
- struct page **src_pgarr, **dst_pgarr;
- /*
- * We must gather up all the pages before we initiate our
- * extent locking. We use an array for the page pointers. Size
- * of the array is bounded by len, which is in turn bounded by
- * BTRFS_MAX_DEDUPE_LEN.
- */
- src_pgarr = kcalloc(num_pages, sizeof(struct page *), GFP_KERNEL);
- dst_pgarr = kcalloc(num_pages, sizeof(struct page *), GFP_KERNEL);
- if (!src_pgarr || !dst_pgarr) {
- kfree(src_pgarr);
- kfree(dst_pgarr);
- return -ENOMEM;
- }
cmp->num_pages = num_pages;
- cmp->src_pages = src_pgarr;
- cmp->dst_pages = dst_pgarr;
-
- /*
- * If deduping ranges in the same inode, locking rules make it mandatory
- * to always lock pages in ascending order to avoid deadlocks with
- * concurrent tasks (such as starting writeback/delalloc).
- */
- if (src == dst && dst_loff < loff) {
- swap(src_pgarr, dst_pgarr);
- swap(loff, dst_loff);
- }
- ret = gather_extent_pages(src, src_pgarr, cmp->num_pages, loff);
+ ret = gather_extent_pages(src, cmp->src_pages, num_pages, loff);
if (ret)
goto out;
- ret = gather_extent_pages(dst, dst_pgarr, cmp->num_pages, dst_loff);
+ ret = gather_extent_pages(dst, cmp->dst_pages, num_pages, dst_loff);
out:
if (ret)
@@ -3067,31 +3415,23 @@ static int extent_same_check_offsets(struct inode *inode, u64 off, u64 *plen,
return 0;
}
-static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen,
- struct inode *dst, u64 dst_loff)
+static int btrfs_extent_same_range(struct inode *src, u64 loff, u64 olen,
+ struct inode *dst, u64 dst_loff,
+ struct cmp_pages *cmp)
{
int ret;
u64 len = olen;
- struct cmp_pages cmp;
bool same_inode = (src == dst);
u64 same_lock_start = 0;
u64 same_lock_len = 0;
- if (len == 0)
- return 0;
-
- if (same_inode)
- inode_lock(src);
- else
- btrfs_double_inode_lock(src, dst);
-
ret = extent_same_check_offsets(src, loff, &len, olen);
if (ret)
- goto out_unlock;
+ return ret;
ret = extent_same_check_offsets(dst, dst_loff, &len, olen);
if (ret)
- goto out_unlock;
+ return ret;
if (same_inode) {
/*
@@ -3108,32 +3448,21 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen,
* allow an unaligned length so long as it ends at
* i_size.
*/
- if (len != olen) {
- ret = -EINVAL;
- goto out_unlock;
- }
+ if (len != olen)
+ return -EINVAL;
/* Check for overlapping ranges */
- if (dst_loff + len > loff && dst_loff < loff + len) {
- ret = -EINVAL;
- goto out_unlock;
- }
+ if (dst_loff + len > loff && dst_loff < loff + len)
+ return -EINVAL;
same_lock_start = min_t(u64, loff, dst_loff);
same_lock_len = max_t(u64, loff, dst_loff) + len - same_lock_start;
}
- /* don't make the dst file partly checksummed */
- if ((BTRFS_I(src)->flags & BTRFS_INODE_NODATASUM) !=
- (BTRFS_I(dst)->flags & BTRFS_INODE_NODATASUM)) {
- ret = -EINVAL;
- goto out_unlock;
- }
-
again:
- ret = btrfs_cmp_data_prepare(src, loff, dst, dst_loff, olen, &cmp);
+ ret = btrfs_cmp_data_prepare(src, loff, dst, dst_loff, olen, cmp);
if (ret)
- goto out_unlock;
+ return ret;
if (same_inode)
ret = lock_extent_range(src, same_lock_start, same_lock_len,
@@ -3154,7 +3483,7 @@ again:
* Ranges in the io trees already unlocked. Now unlock all
* pages before waiting for all IO to complete.
*/
- btrfs_cmp_data_free(&cmp);
+ btrfs_cmp_data_free(cmp);
if (same_inode) {
btrfs_wait_ordered_range(src, same_lock_start,
same_lock_len);
@@ -3167,12 +3496,12 @@ again:
ASSERT(ret == 0);
if (WARN_ON(ret)) {
/* ranges in the io trees already unlocked */
- btrfs_cmp_data_free(&cmp);
+ btrfs_cmp_data_free(cmp);
return ret;
}
/* pass original length for comparison so we stay within i_size */
- ret = btrfs_cmp_data(olen, &cmp);
+ ret = btrfs_cmp_data(olen, cmp);
if (ret == 0)
ret = btrfs_clone(src, dst, loff, olen, len, dst_loff, 1);
@@ -3182,18 +3511,91 @@ again:
else
btrfs_double_extent_unlock(src, loff, dst, dst_loff, len);
- btrfs_cmp_data_free(&cmp);
+ btrfs_cmp_data_free(cmp);
+
+ return ret;
+}
+
+#define BTRFS_MAX_DEDUPE_LEN SZ_16M
+
+static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen,
+ struct inode *dst, u64 dst_loff)
+{
+ int ret;
+ struct cmp_pages cmp;
+ int num_pages = PAGE_ALIGN(BTRFS_MAX_DEDUPE_LEN) >> PAGE_SHIFT;
+ bool same_inode = (src == dst);
+ u64 i, tail_len, chunk_count;
+
+ if (olen == 0)
+ return 0;
+
+ if (same_inode)
+ inode_lock(src);
+ else
+ btrfs_double_inode_lock(src, dst);
+
+ /* don't make the dst file partly checksummed */
+ if ((BTRFS_I(src)->flags & BTRFS_INODE_NODATASUM) !=
+ (BTRFS_I(dst)->flags & BTRFS_INODE_NODATASUM)) {
+ ret = -EINVAL;
+ goto out_unlock;
+ }
+
+ tail_len = olen % BTRFS_MAX_DEDUPE_LEN;
+ chunk_count = div_u64(olen, BTRFS_MAX_DEDUPE_LEN);
+ if (chunk_count == 0)
+ num_pages = PAGE_ALIGN(tail_len) >> PAGE_SHIFT;
+
+ /*
+ * If deduping ranges in the same inode, locking rules make it
+ * mandatory to always lock pages in ascending order to avoid deadlocks
+ * with concurrent tasks (such as starting writeback/delalloc).
+ */
+ if (same_inode && dst_loff < loff)
+ swap(loff, dst_loff);
+
+ /*
+ * We must gather up all the pages before we initiate our extent
+ * locking. We use an array for the page pointers. Size of the array is
+ * bounded by len, which is in turn bounded by BTRFS_MAX_DEDUPE_LEN.
+ */
+ cmp.src_pages = kvmalloc_array(num_pages, sizeof(struct page *),
+ GFP_KERNEL | __GFP_ZERO);
+ cmp.dst_pages = kvmalloc_array(num_pages, sizeof(struct page *),
+ GFP_KERNEL | __GFP_ZERO);
+ if (!cmp.src_pages || !cmp.dst_pages) {
+ ret = -ENOMEM;
+ goto out_free;
+ }
+
+ for (i = 0; i < chunk_count; i++) {
+ ret = btrfs_extent_same_range(src, loff, BTRFS_MAX_DEDUPE_LEN,
+ dst, dst_loff, &cmp);
+ if (ret)
+ goto out_unlock;
+
+ loff += BTRFS_MAX_DEDUPE_LEN;
+ dst_loff += BTRFS_MAX_DEDUPE_LEN;
+ }
+
+ if (tail_len > 0)
+ ret = btrfs_extent_same_range(src, loff, tail_len, dst,
+ dst_loff, &cmp);
+
out_unlock:
if (same_inode)
inode_unlock(src);
else
btrfs_double_inode_unlock(src, dst);
+out_free:
+ kvfree(cmp.src_pages);
+ kvfree(cmp.dst_pages);
+
return ret;
}
-#define BTRFS_MAX_DEDUPE_LEN SZ_16M
-
ssize_t btrfs_dedupe_file_range(struct file *src_file, u64 loff, u64 olen,
struct file *dst_file, u64 dst_loff)
{
@@ -3202,9 +3604,6 @@ ssize_t btrfs_dedupe_file_range(struct file *src_file, u64 loff, u64 olen,
u64 bs = BTRFS_I(src)->root->fs_info->sb->s_blocksize;
ssize_t res;
- if (olen > BTRFS_MAX_DEDUPE_LEN)
- olen = BTRFS_MAX_DEDUPE_LEN;
-
if (WARN_ON_ONCE(bs < PAGE_SIZE)) {
/*
* Btrfs does not support blocksize < page_size. As a
@@ -3826,11 +4225,6 @@ static noinline int btrfs_clone_files(struct file *file, struct file *file_src,
src->i_sb != inode->i_sb)
return -EXDEV;
- /* don't make the dst file partly checksummed */
- if ((BTRFS_I(src)->flags & BTRFS_INODE_NODATASUM) !=
- (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM))
- return -EINVAL;
-
if (S_ISDIR(src->i_mode) || S_ISDIR(inode->i_mode))
return -EISDIR;
@@ -3840,6 +4234,13 @@ static noinline int btrfs_clone_files(struct file *file, struct file *file_src,
inode_lock(src);
}
+ /* don't make the dst file partly checksummed */
+ if ((BTRFS_I(src)->flags & BTRFS_INODE_NODATASUM) !=
+ (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) {
+ ret = -EINVAL;
+ goto out_unlock;
+ }
+
/* determine range to clone */
ret = -EINVAL;
if (off + len > src->i_size || off + len < off)
@@ -4007,8 +4408,8 @@ out:
return ret;
}
-void btrfs_get_block_group_info(struct list_head *groups_list,
- struct btrfs_ioctl_space_info *space)
+static void get_block_group_info(struct list_head *groups_list,
+ struct btrfs_ioctl_space_info *space)
{
struct btrfs_block_group_cache *block_group;
@@ -4124,8 +4525,8 @@ static long btrfs_ioctl_space_info(struct btrfs_fs_info *fs_info,
down_read(&info->groups_sem);
for (c = 0; c < BTRFS_NR_RAID_TYPES; c++) {
if (!list_empty(&info->block_groups[c])) {
- btrfs_get_block_group_info(
- &info->block_groups[c], &space);
+ get_block_group_info(&info->block_groups[c],
+ &space);
memcpy(dest, &space, sizeof(space));
dest++;
space_args.total_spaces++;
@@ -4490,14 +4891,14 @@ out_loi:
return ret;
}
-void update_ioctl_balance_args(struct btrfs_fs_info *fs_info, int lock,
+void btrfs_update_ioctl_balance_args(struct btrfs_fs_info *fs_info,
struct btrfs_ioctl_balance_args *bargs)
{
struct btrfs_balance_control *bctl = fs_info->balance_ctl;
bargs->flags = bctl->flags;
- if (atomic_read(&fs_info->balance_running))
+ if (test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags))
bargs->state |= BTRFS_BALANCE_STATE_RUNNING;
if (atomic_read(&fs_info->balance_pause_req))
bargs->state |= BTRFS_BALANCE_STATE_PAUSE_REQ;
@@ -4508,13 +4909,9 @@ void update_ioctl_balance_args(struct btrfs_fs_info *fs_info, int lock,
memcpy(&bargs->meta, &bctl->meta, sizeof(bargs->meta));
memcpy(&bargs->sys, &bctl->sys, sizeof(bargs->sys));
- if (lock) {
- spin_lock(&fs_info->balance_lock);
- memcpy(&bargs->stat, &bctl->stat, sizeof(bargs->stat));
- spin_unlock(&fs_info->balance_lock);
- } else {
- memcpy(&bargs->stat, &bctl->stat, sizeof(bargs->stat));
- }
+ spin_lock(&fs_info->balance_lock);
+ memcpy(&bargs->stat, &bctl->stat, sizeof(bargs->stat));
+ spin_unlock(&fs_info->balance_lock);
}
static long btrfs_ioctl_balance(struct file *file, void __user *arg)
@@ -4535,7 +4932,6 @@ static long btrfs_ioctl_balance(struct file *file, void __user *arg)
again:
if (!test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags)) {
- mutex_lock(&fs_info->volume_mutex);
mutex_lock(&fs_info->balance_mutex);
need_unlock = true;
goto locked;
@@ -4550,21 +4946,22 @@ again:
mutex_lock(&fs_info->balance_mutex);
if (fs_info->balance_ctl) {
/* this is either (2) or (3) */
- if (!atomic_read(&fs_info->balance_running)) {
+ if (!test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags)) {
mutex_unlock(&fs_info->balance_mutex);
- if (!mutex_trylock(&fs_info->volume_mutex))
- goto again;
+ /*
+ * Lock released to allow other waiters to continue,
+ * we'll reexamine the status again.
+ */
mutex_lock(&fs_info->balance_mutex);
if (fs_info->balance_ctl &&
- !atomic_read(&fs_info->balance_running)) {
+ !test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags)) {
/* this is (3) */
need_unlock = false;
goto locked;
}
mutex_unlock(&fs_info->balance_mutex);
- mutex_unlock(&fs_info->volume_mutex);
goto again;
} else {
/* this is (2) */
@@ -4617,7 +5014,6 @@ locked:
goto out_bargs;
}
- bctl->fs_info = fs_info;
if (arg) {
memcpy(&bctl->data, &bargs->data, sizeof(bctl->data));
memcpy(&bctl->meta, &bargs->meta, sizeof(bctl->meta));
@@ -4636,14 +5032,14 @@ locked:
do_balance:
/*
- * Ownership of bctl and filesystem flag BTRFS_FS_EXCL_OP
- * goes to to btrfs_balance. bctl is freed in __cancel_balance,
- * or, if restriper was paused all the way until unmount, in
- * free_fs_info. The flag is cleared in __cancel_balance.
+ * Ownership of bctl and filesystem flag BTRFS_FS_EXCL_OP goes to
+ * btrfs_balance. bctl is freed in reset_balance_state, or, if
+ * restriper was paused all the way until unmount, in free_fs_info.
+ * The flag should be cleared after reset_balance_state.
*/
need_unlock = false;
- ret = btrfs_balance(bctl, bargs);
+ ret = btrfs_balance(fs_info, bctl, bargs);
bctl = NULL;
if (arg) {
@@ -4657,7 +5053,6 @@ out_bargs:
kfree(bargs);
out_unlock:
mutex_unlock(&fs_info->balance_mutex);
- mutex_unlock(&fs_info->volume_mutex);
if (need_unlock)
clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
out:
@@ -4701,7 +5096,7 @@ static long btrfs_ioctl_balance_progress(struct btrfs_fs_info *fs_info,
goto out;
}
- update_ioctl_balance_args(fs_info, 1, bargs);
+ btrfs_update_ioctl_balance_args(fs_info, bargs);
if (copy_to_user(arg, bargs, sizeof(*bargs)))
ret = -EFAULT;
@@ -5038,8 +5433,7 @@ static long _btrfs_ioctl_set_received_subvol(struct file *file,
BTRFS_UUID_SIZE);
if (received_uuid_changed &&
!btrfs_is_empty_uuid(root_item->received_uuid)) {
- ret = btrfs_uuid_tree_rem(trans, fs_info,
- root_item->received_uuid,
+ ret = btrfs_uuid_tree_remove(trans, root_item->received_uuid,
BTRFS_UUID_KEY_RECEIVED_SUBVOL,
root->root_key.objectid);
if (ret && ret != -ENOENT) {
@@ -5063,7 +5457,7 @@ static long _btrfs_ioctl_set_received_subvol(struct file *file,
goto out;
}
if (received_uuid_changed && !btrfs_is_empty_uuid(sa->uuid)) {
- ret = btrfs_uuid_tree_add(trans, fs_info, sa->uuid,
+ ret = btrfs_uuid_tree_add(trans, sa->uuid,
BTRFS_UUID_KEY_RECEIVED_SUBVOL,
root->root_key.objectid);
if (ret < 0 && ret != -EEXIST) {
@@ -5497,7 +5891,7 @@ long btrfs_ioctl(struct file *file, unsigned int
case BTRFS_IOC_SYNC: {
int ret;
- ret = btrfs_start_delalloc_roots(fs_info, 0, -1);
+ ret = btrfs_start_delalloc_roots(fs_info, -1);
if (ret)
return ret;
ret = btrfs_sync_fs(inode->i_sb, 1);
@@ -5565,6 +5959,16 @@ long btrfs_ioctl(struct file *file, unsigned int
return btrfs_ioctl_get_features(file, argp);
case BTRFS_IOC_SET_FEATURES:
return btrfs_ioctl_set_features(file, argp);
+ case FS_IOC_FSGETXATTR:
+ return btrfs_ioctl_fsgetxattr(file, argp);
+ case FS_IOC_FSSETXATTR:
+ return btrfs_ioctl_fssetxattr(file, argp);
+ case BTRFS_IOC_GET_SUBVOL_INFO:
+ return btrfs_ioctl_get_subvol_info(file, argp);
+ case BTRFS_IOC_GET_SUBVOL_ROOTREF:
+ return btrfs_ioctl_get_subvol_rootref(file, argp);
+ case BTRFS_IOC_INO_LOOKUP_USER:
+ return btrfs_ioctl_ino_lookup_user(file, argp);
}
return -ENOTTY;
diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c
index e4faefac9d16..1da768e5ef75 100644
--- a/fs/btrfs/locking.c
+++ b/fs/btrfs/locking.c
@@ -66,22 +66,16 @@ void btrfs_clear_lock_blocking_rw(struct extent_buffer *eb, int rw)
write_lock(&eb->lock);
WARN_ON(atomic_read(&eb->spinning_writers));
atomic_inc(&eb->spinning_writers);
- /*
- * atomic_dec_and_test implies a barrier for waitqueue_active
- */
- if (atomic_dec_and_test(&eb->blocking_writers) &&
- waitqueue_active(&eb->write_lock_wq))
- wake_up(&eb->write_lock_wq);
+ /* atomic_dec_and_test implies a barrier */
+ if (atomic_dec_and_test(&eb->blocking_writers))
+ cond_wake_up_nomb(&eb->write_lock_wq);
} else if (rw == BTRFS_READ_LOCK_BLOCKING) {
BUG_ON(atomic_read(&eb->blocking_readers) == 0);
read_lock(&eb->lock);
atomic_inc(&eb->spinning_readers);
- /*
- * atomic_dec_and_test implies a barrier for waitqueue_active
- */
- if (atomic_dec_and_test(&eb->blocking_readers) &&
- waitqueue_active(&eb->read_lock_wq))
- wake_up(&eb->read_lock_wq);
+ /* atomic_dec_and_test implies a barrier */
+ if (atomic_dec_and_test(&eb->blocking_readers))
+ cond_wake_up_nomb(&eb->read_lock_wq);
}
}
@@ -221,12 +215,9 @@ void btrfs_tree_read_unlock_blocking(struct extent_buffer *eb)
}
btrfs_assert_tree_read_locked(eb);
WARN_ON(atomic_read(&eb->blocking_readers) == 0);
- /*
- * atomic_dec_and_test implies a barrier for waitqueue_active
- */
- if (atomic_dec_and_test(&eb->blocking_readers) &&
- waitqueue_active(&eb->read_lock_wq))
- wake_up(&eb->read_lock_wq);
+ /* atomic_dec_and_test implies a barrier */
+ if (atomic_dec_and_test(&eb->blocking_readers))
+ cond_wake_up_nomb(&eb->read_lock_wq);
atomic_dec(&eb->read_locks);
}
@@ -275,12 +266,9 @@ void btrfs_tree_unlock(struct extent_buffer *eb)
if (blockers) {
WARN_ON(atomic_read(&eb->spinning_writers));
atomic_dec(&eb->blocking_writers);
- /*
- * Make sure counter is updated before we wake up waiters.
- */
+ /* Use the lighter barrier after atomic */
smp_mb__after_atomic();
- if (waitqueue_active(&eb->write_lock_wq))
- wake_up(&eb->write_lock_wq);
+ cond_wake_up_nomb(&eb->write_lock_wq);
} else {
WARN_ON(atomic_read(&eb->spinning_writers) != 1);
atomic_dec(&eb->spinning_writers);
diff --git a/fs/btrfs/lzo.c b/fs/btrfs/lzo.c
index 0667ea07f766..b6a4cc178bee 100644
--- a/fs/btrfs/lzo.c
+++ b/fs/btrfs/lzo.c
@@ -17,6 +17,43 @@
#define LZO_LEN 4
+/*
+ * Btrfs LZO compression format
+ *
+ * Regular and inlined LZO compressed data extents consist of:
+ *
+ * 1. Header
+ * Fixed size. LZO_LEN (4) bytes long, LE32.
+ * Records the total size (including the header) of compressed data.
+ *
+ * 2. Segment(s)
+ * Variable size. Each segment includes one segment header, followd by data
+ * payload.
+ * One regular LZO compressed extent can have one or more segments.
+ * For inlined LZO compressed extent, only one segment is allowed.
+ * One segment represents at most one page of uncompressed data.
+ *
+ * 2.1 Segment header
+ * Fixed size. LZO_LEN (4) bytes long, LE32.
+ * Records the total size of the segment (not including the header).
+ * Segment header never crosses page boundary, thus it's possible to
+ * have at most 3 padding zeros at the end of the page.
+ *
+ * 2.2 Data Payload
+ * Variable size. Size up limit should be lzo1x_worst_compress(PAGE_SIZE)
+ * which is 4419 for a 4KiB page.
+ *
+ * Example:
+ * Page 1:
+ * 0 0x2 0x4 0x6 0x8 0xa 0xc 0xe 0x10
+ * 0x0000 | Header | SegHdr 01 | Data payload 01 ... |
+ * ...
+ * 0x0ff0 | SegHdr N | Data payload N ... |00|
+ * ^^ padding zeros
+ * Page 2:
+ * 0x1000 | SegHdr N+1| Data payload N+1 ... |
+ */
+
struct workspace {
void *mem;
void *buf; /* where decompressed data goes */
@@ -258,6 +295,7 @@ static int lzo_decompress_bio(struct list_head *ws, struct compressed_bio *cb)
unsigned long working_bytes;
size_t in_len;
size_t out_len;
+ const size_t max_segment_len = lzo1x_worst_compress(PAGE_SIZE);
unsigned long in_offset;
unsigned long in_page_bytes_left;
unsigned long tot_in;
@@ -271,10 +309,22 @@ static int lzo_decompress_bio(struct list_head *ws, struct compressed_bio *cb)
data_in = kmap(pages_in[0]);
tot_len = read_compress_length(data_in);
+ /*
+ * Compressed data header check.
+ *
+ * The real compressed size can't exceed the maximum extent length, and
+ * all pages should be used (whole unused page with just the segment
+ * header is not possible). If this happens it means the compressed
+ * extent is corrupted.
+ */
+ if (tot_len > min_t(size_t, BTRFS_MAX_COMPRESSED, srclen) ||
+ tot_len < srclen - PAGE_SIZE) {
+ ret = -EUCLEAN;
+ goto done;
+ }
tot_in = LZO_LEN;
in_offset = LZO_LEN;
- tot_len = min_t(size_t, srclen, tot_len);
in_page_bytes_left = PAGE_SIZE - LZO_LEN;
tot_out = 0;
@@ -285,6 +335,17 @@ static int lzo_decompress_bio(struct list_head *ws, struct compressed_bio *cb)
in_offset += LZO_LEN;
tot_in += LZO_LEN;
+ /*
+ * Segment header check.
+ *
+ * The segment length must not exceed the maximum LZO
+ * compression size, nor the total compressed size.
+ */
+ if (in_len > max_segment_len || tot_in + in_len > tot_len) {
+ ret = -EUCLEAN;
+ goto done;
+ }
+
tot_in += in_len;
working_bytes = in_len;
may_late_unmap = need_unmap = false;
@@ -335,7 +396,7 @@ cont:
}
}
- out_len = lzo1x_worst_compress(PAGE_SIZE);
+ out_len = max_segment_len;
ret = lzo1x_decompress_safe(buf, in_len, workspace->buf,
&out_len);
if (need_unmap)
@@ -369,15 +430,24 @@ static int lzo_decompress(struct list_head *ws, unsigned char *data_in,
struct workspace *workspace = list_entry(ws, struct workspace, list);
size_t in_len;
size_t out_len;
+ size_t max_segment_len = lzo1x_worst_compress(PAGE_SIZE);
int ret = 0;
char *kaddr;
unsigned long bytes;
- BUG_ON(srclen < LZO_LEN);
+ if (srclen < LZO_LEN || srclen > max_segment_len + LZO_LEN * 2)
+ return -EUCLEAN;
+ in_len = read_compress_length(data_in);
+ if (in_len != srclen)
+ return -EUCLEAN;
data_in += LZO_LEN;
in_len = read_compress_length(data_in);
+ if (in_len != srclen - LZO_LEN * 2) {
+ ret = -EUCLEAN;
+ goto out;
+ }
data_in += LZO_LEN;
out_len = PAGE_SIZE;
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 6db8bb2f2c28..2e1a1694a33d 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -343,11 +343,8 @@ int btrfs_dec_test_first_ordered_pending(struct inode *inode,
if (entry->bytes_left == 0) {
ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags);
- /*
- * Implicit memory barrier after test_and_set_bit
- */
- if (waitqueue_active(&entry->wait))
- wake_up(&entry->wait);
+ /* test_and_set_bit implies a barrier */
+ cond_wake_up_nomb(&entry->wait);
} else {
ret = 1;
}
@@ -410,11 +407,8 @@ have_entry:
if (entry->bytes_left == 0) {
ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags);
- /*
- * Implicit memory barrier after test_and_set_bit
- */
- if (waitqueue_active(&entry->wait))
- wake_up(&entry->wait);
+ /* test_and_set_bit implies a barrier */
+ cond_wake_up_nomb(&entry->wait);
} else {
ret = 1;
}
diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c
index 21a831d3d087..a4e11cf04671 100644
--- a/fs/btrfs/print-tree.c
+++ b/fs/btrfs/print-tree.c
@@ -166,6 +166,25 @@ static void print_uuid_item(struct extent_buffer *l, unsigned long offset,
}
}
+/*
+ * Helper to output refs and locking status of extent buffer. Useful to debug
+ * race condition related problems.
+ */
+static void print_eb_refs_lock(struct extent_buffer *eb)
+{
+#ifdef CONFIG_BTRFS_DEBUG
+ btrfs_info(eb->fs_info,
+"refs %u lock (w:%d r:%d bw:%d br:%d sw:%d sr:%d) lock_owner %u current %u",
+ atomic_read(&eb->refs), atomic_read(&eb->write_locks),
+ atomic_read(&eb->read_locks),
+ atomic_read(&eb->blocking_writers),
+ atomic_read(&eb->blocking_readers),
+ atomic_read(&eb->spinning_writers),
+ atomic_read(&eb->spinning_readers),
+ eb->lock_owner, current->pid);
+#endif
+}
+
void btrfs_print_leaf(struct extent_buffer *l)
{
struct btrfs_fs_info *fs_info;
@@ -193,6 +212,7 @@ void btrfs_print_leaf(struct extent_buffer *l)
"leaf %llu gen %llu total ptrs %d free space %d owner %llu",
btrfs_header_bytenr(l), btrfs_header_generation(l), nr,
btrfs_leaf_free_space(fs_info, l), btrfs_header_owner(l));
+ print_eb_refs_lock(l);
for (i = 0 ; i < nr ; i++) {
item = btrfs_item_nr(i);
btrfs_item_key_to_cpu(l, &key, i);
@@ -347,6 +367,7 @@ void btrfs_print_tree(struct extent_buffer *c, bool follow)
btrfs_header_bytenr(c), level, btrfs_header_generation(c),
nr, (u32)BTRFS_NODEPTRS_PER_BLOCK(fs_info) - nr,
btrfs_header_owner(c));
+ print_eb_refs_lock(c);
for (i = 0; i < nr; i++) {
btrfs_node_key_to_cpu(c, &key, i);
pr_info("\tkey %d (%llu %u %llu) block %llu gen %llu\n",
diff --git a/fs/btrfs/props.c b/fs/btrfs/props.c
index 53a8c95828e3..dc6140013ae8 100644
--- a/fs/btrfs/props.c
+++ b/fs/btrfs/props.c
@@ -380,6 +380,7 @@ static int prop_compression_apply(struct inode *inode,
const char *value,
size_t len)
{
+ struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
int type;
if (len == 0) {
@@ -390,14 +391,17 @@ static int prop_compression_apply(struct inode *inode,
return 0;
}
- if (!strncmp("lzo", value, 3))
+ if (!strncmp("lzo", value, 3)) {
type = BTRFS_COMPRESS_LZO;
- else if (!strncmp("zlib", value, 4))
+ btrfs_set_fs_incompat(fs_info, COMPRESS_LZO);
+ } else if (!strncmp("zlib", value, 4)) {
type = BTRFS_COMPRESS_ZLIB;
- else if (!strncmp("zstd", value, len))
+ } else if (!strncmp("zstd", value, len)) {
type = BTRFS_COMPRESS_ZSTD;
- else
+ btrfs_set_fs_incompat(fs_info, COMPRESS_ZSTD);
+ } else {
return -EINVAL;
+ }
BTRFS_I(inode)->flags &= ~BTRFS_INODE_NOCOMPRESS;
BTRFS_I(inode)->flags |= BTRFS_INODE_COMPRESS;
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 9fb758d5077a..1874a6d2e6f5 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -1882,8 +1882,8 @@ static int qgroup_update_counters(struct btrfs_fs_info *fs_info,
cur_old_count = btrfs_qgroup_get_old_refcnt(qg, seq);
cur_new_count = btrfs_qgroup_get_new_refcnt(qg, seq);
- trace_qgroup_update_counters(fs_info, qg->qgroupid,
- cur_old_count, cur_new_count);
+ trace_qgroup_update_counters(fs_info, qg, cur_old_count,
+ cur_new_count);
/* Rfer update part */
if (cur_old_count == 0 && cur_new_count > 0) {
@@ -2014,8 +2014,8 @@ btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans,
BUG_ON(!fs_info->quota_root);
- trace_btrfs_qgroup_account_extent(fs_info, bytenr, num_bytes,
- nr_old_roots, nr_new_roots);
+ trace_btrfs_qgroup_account_extent(fs_info, trans->transid, bytenr,
+ num_bytes, nr_old_roots, nr_new_roots);
qgroups = ulist_alloc(GFP_NOFS);
if (!qgroups) {
@@ -2580,6 +2580,21 @@ out:
}
/*
+ * Check if the leaf is the last leaf. Which means all node pointers
+ * are at their last position.
+ */
+static bool is_last_leaf(struct btrfs_path *path)
+{
+ int i;
+
+ for (i = 1; i < BTRFS_MAX_LEVEL && path->nodes[i]; i++) {
+ if (path->slots[i] != btrfs_header_nritems(path->nodes[i]) - 1)
+ return false;
+ }
+ return true;
+}
+
+/*
* returns < 0 on error, 0 when more leafs are to be scanned.
* returns 1 when done.
*/
@@ -2590,8 +2605,8 @@ qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
struct btrfs_key found;
struct extent_buffer *scratch_leaf = NULL;
struct ulist *roots = NULL;
- struct seq_list tree_mod_seq_elem = SEQ_LIST_INIT(tree_mod_seq_elem);
u64 num_bytes;
+ bool done;
int slot;
int ret;
@@ -2620,12 +2635,12 @@ qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
mutex_unlock(&fs_info->qgroup_rescan_lock);
return ret;
}
+ done = is_last_leaf(path);
btrfs_item_key_to_cpu(path->nodes[0], &found,
btrfs_header_nritems(path->nodes[0]) - 1);
fs_info->qgroup_rescan_progress.objectid = found.objectid + 1;
- btrfs_get_tree_mod_seq(fs_info, &tree_mod_seq_elem);
scratch_leaf = btrfs_clone_extent_buffer(path->nodes[0]);
if (!scratch_leaf) {
ret = -ENOMEM;
@@ -2664,8 +2679,9 @@ out:
btrfs_tree_read_unlock_blocking(scratch_leaf);
free_extent_buffer(scratch_leaf);
}
- btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem);
+ if (done && !ret)
+ ret = 1;
return ret;
}
@@ -2681,6 +2697,12 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
path = btrfs_alloc_path();
if (!path)
goto out;
+ /*
+ * Rescan should only search for commit root, and any later difference
+ * should be recorded by qgroup
+ */
+ path->search_commit_root = 1;
+ path->skip_locking = 1;
err = 0;
while (!err && !btrfs_fs_closing(fs_info)) {
@@ -2760,26 +2782,36 @@ qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid,
{
int ret = 0;
- if (!init_flags &&
- (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) ||
- !(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON))) {
- ret = -EINVAL;
- goto err;
+ if (!init_flags) {
+ /* we're resuming qgroup rescan at mount time */
+ if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN))
+ btrfs_warn(fs_info,
+ "qgroup rescan init failed, qgroup is not enabled");
+ else if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON))
+ btrfs_warn(fs_info,
+ "qgroup rescan init failed, qgroup rescan is not queued");
+ return -EINVAL;
}
mutex_lock(&fs_info->qgroup_rescan_lock);
spin_lock(&fs_info->qgroup_lock);
if (init_flags) {
- if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN)
+ if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
+ btrfs_warn(fs_info,
+ "qgroup rescan is already in progress");
ret = -EINPROGRESS;
- else if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON))
+ } else if (!(fs_info->qgroup_flags &
+ BTRFS_QGROUP_STATUS_FLAG_ON)) {
+ btrfs_warn(fs_info,
+ "qgroup rescan init failed, qgroup is not enabled");
ret = -EINVAL;
+ }
if (ret) {
spin_unlock(&fs_info->qgroup_lock);
mutex_unlock(&fs_info->qgroup_rescan_lock);
- goto err;
+ return ret;
}
fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_RESCAN;
}
@@ -2798,13 +2830,6 @@ qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid,
btrfs_init_work(&fs_info->qgroup_rescan_work,
btrfs_qgroup_rescan_helper,
btrfs_qgroup_rescan_worker, NULL, NULL);
-
- if (ret) {
-err:
- btrfs_info(fs_info, "qgroup_rescan_init failed with %d", ret);
- return ret;
- }
-
return 0;
}
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index 9abd950e7f78..5e4ad134b9ad 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -163,6 +163,12 @@ struct btrfs_raid_bio {
* bitmap to record which horizontal stripe has data
*/
unsigned long *dbitmap;
+
+ /* allocated with real_stripes-many pointers for finish_*() calls */
+ void **finish_pointers;
+
+ /* allocated with stripe_npages-many bits for finish_*() calls */
+ unsigned long *finish_pbitmap;
};
static int __raid56_parity_recover(struct btrfs_raid_bio *rbio);
@@ -981,9 +987,14 @@ static struct btrfs_raid_bio *alloc_rbio(struct btrfs_fs_info *fs_info,
int stripe_npages = DIV_ROUND_UP(stripe_len, PAGE_SIZE);
void *p;
- rbio = kzalloc(sizeof(*rbio) + num_pages * sizeof(struct page *) * 2 +
- DIV_ROUND_UP(stripe_npages, BITS_PER_LONG) *
- sizeof(long), GFP_NOFS);
+ rbio = kzalloc(sizeof(*rbio) +
+ sizeof(*rbio->stripe_pages) * num_pages +
+ sizeof(*rbio->bio_pages) * num_pages +
+ sizeof(*rbio->finish_pointers) * real_stripes +
+ sizeof(*rbio->dbitmap) * BITS_TO_LONGS(stripe_npages) +
+ sizeof(*rbio->finish_pbitmap) *
+ BITS_TO_LONGS(stripe_npages),
+ GFP_NOFS);
if (!rbio)
return ERR_PTR(-ENOMEM);
@@ -1005,13 +1016,20 @@ static struct btrfs_raid_bio *alloc_rbio(struct btrfs_fs_info *fs_info,
atomic_set(&rbio->stripes_pending, 0);
/*
- * the stripe_pages and bio_pages array point to the extra
+ * the stripe_pages, bio_pages, etc arrays point to the extra
* memory we allocated past the end of the rbio
*/
p = rbio + 1;
- rbio->stripe_pages = p;
- rbio->bio_pages = p + sizeof(struct page *) * num_pages;
- rbio->dbitmap = p + sizeof(struct page *) * num_pages * 2;
+#define CONSUME_ALLOC(ptr, count) do { \
+ ptr = p; \
+ p = (unsigned char *)p + sizeof(*(ptr)) * (count); \
+ } while (0)
+ CONSUME_ALLOC(rbio->stripe_pages, num_pages);
+ CONSUME_ALLOC(rbio->bio_pages, num_pages);
+ CONSUME_ALLOC(rbio->finish_pointers, real_stripes);
+ CONSUME_ALLOC(rbio->dbitmap, BITS_TO_LONGS(stripe_npages));
+ CONSUME_ALLOC(rbio->finish_pbitmap, BITS_TO_LONGS(stripe_npages));
+#undef CONSUME_ALLOC
if (bbio->map_type & BTRFS_BLOCK_GROUP_RAID5)
nr_data = real_stripes - 1;
@@ -1180,7 +1198,7 @@ static void index_rbio_pages(struct btrfs_raid_bio *rbio)
static noinline void finish_rmw(struct btrfs_raid_bio *rbio)
{
struct btrfs_bio *bbio = rbio->bbio;
- void *pointers[rbio->real_stripes];
+ void **pointers = rbio->finish_pointers;
int nr_data = rbio->nr_data;
int stripe;
int pagenr;
@@ -2350,8 +2368,8 @@ static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio,
int need_check)
{
struct btrfs_bio *bbio = rbio->bbio;
- void *pointers[rbio->real_stripes];
- DECLARE_BITMAP(pbitmap, rbio->stripe_npages);
+ void **pointers = rbio->finish_pointers;
+ unsigned long *pbitmap = rbio->finish_pbitmap;
int nr_data = rbio->nr_data;
int stripe;
int pagenr;
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 00b7d3231821..879b76fa881a 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -1841,7 +1841,7 @@ again:
old_bytenr = btrfs_node_blockptr(parent, slot);
blocksize = fs_info->nodesize;
old_ptr_gen = btrfs_node_ptr_generation(parent, slot);
- btrfs_node_key_to_cpu(parent, &key, slot);
+ btrfs_node_key_to_cpu(parent, &first_key, slot);
if (level <= max_level) {
eb = path->nodes[level];
@@ -4299,7 +4299,7 @@ out:
return inode;
}
-static struct reloc_control *alloc_reloc_control(struct btrfs_fs_info *fs_info)
+static struct reloc_control *alloc_reloc_control(void)
{
struct reloc_control *rc;
@@ -4344,7 +4344,7 @@ static void describe_relocation(struct btrfs_fs_info *fs_info,
DESCRIBE_FLAG(RAID5, "raid5");
DESCRIBE_FLAG(RAID6, "raid6");
if (flags)
- snprintf(buf, buf - bp + sizeof(buf), "|0x%llx", flags);
+ snprintf(bp, buf - bp + sizeof(buf), "|0x%llx", flags);
#undef DESCRIBE_FLAG
}
@@ -4366,7 +4366,7 @@ int btrfs_relocate_block_group(struct btrfs_fs_info *fs_info, u64 group_start)
int rw = 0;
int err = 0;
- rc = alloc_reloc_control(fs_info);
+ rc = alloc_reloc_control();
if (!rc)
return -ENOMEM;
@@ -4562,7 +4562,7 @@ int btrfs_recover_relocation(struct btrfs_root *root)
if (list_empty(&reloc_roots))
goto out;
- rc = alloc_reloc_control(fs_info);
+ rc = alloc_reloc_control();
if (!rc) {
err = -ENOMEM;
goto out;
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 52b39a0924e9..a59005862010 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -3984,6 +3984,7 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
spin_lock(&fs_info->unused_bgs_lock);
if (list_empty(&cache->bg_list)) {
btrfs_get_block_group(cache);
+ trace_btrfs_add_unused_block_group(cache);
list_add_tail(&cache->bg_list,
&fs_info->unused_bgs);
}
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 221e5cdb060b..c47f62b19226 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -235,6 +235,7 @@ struct orphan_dir_info {
struct rb_node node;
u64 ino;
u64 gen;
+ u64 last_dir_index_offset;
};
struct name_cache_entry {
@@ -2844,12 +2845,6 @@ add_orphan_dir_info(struct send_ctx *sctx, u64 dir_ino)
struct rb_node *parent = NULL;
struct orphan_dir_info *entry, *odi;
- odi = kmalloc(sizeof(*odi), GFP_KERNEL);
- if (!odi)
- return ERR_PTR(-ENOMEM);
- odi->ino = dir_ino;
- odi->gen = 0;
-
while (*p) {
parent = *p;
entry = rb_entry(parent, struct orphan_dir_info, node);
@@ -2858,11 +2853,17 @@ add_orphan_dir_info(struct send_ctx *sctx, u64 dir_ino)
} else if (dir_ino > entry->ino) {
p = &(*p)->rb_right;
} else {
- kfree(odi);
return entry;
}
}
+ odi = kmalloc(sizeof(*odi), GFP_KERNEL);
+ if (!odi)
+ return ERR_PTR(-ENOMEM);
+ odi->ino = dir_ino;
+ odi->gen = 0;
+ odi->last_dir_index_offset = 0;
+
rb_link_node(&odi->node, parent, p);
rb_insert_color(&odi->node, &sctx->orphan_dirs);
return odi;
@@ -2917,6 +2918,7 @@ static int can_rmdir(struct send_ctx *sctx, u64 dir, u64 dir_gen,
struct btrfs_key found_key;
struct btrfs_key loc;
struct btrfs_dir_item *di;
+ struct orphan_dir_info *odi = NULL;
/*
* Don't try to rmdir the top/root subvolume dir.
@@ -2931,6 +2933,11 @@ static int can_rmdir(struct send_ctx *sctx, u64 dir, u64 dir_gen,
key.objectid = dir;
key.type = BTRFS_DIR_INDEX_KEY;
key.offset = 0;
+
+ odi = get_orphan_dir_info(sctx, dir);
+ if (odi)
+ key.offset = odi->last_dir_index_offset;
+
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
if (ret < 0)
goto out;
@@ -2958,30 +2965,33 @@ static int can_rmdir(struct send_ctx *sctx, u64 dir, u64 dir_gen,
dm = get_waiting_dir_move(sctx, loc.objectid);
if (dm) {
- struct orphan_dir_info *odi;
-
odi = add_orphan_dir_info(sctx, dir);
if (IS_ERR(odi)) {
ret = PTR_ERR(odi);
goto out;
}
odi->gen = dir_gen;
+ odi->last_dir_index_offset = found_key.offset;
dm->rmdir_ino = dir;
ret = 0;
goto out;
}
if (loc.objectid > send_progress) {
- struct orphan_dir_info *odi;
-
- odi = get_orphan_dir_info(sctx, dir);
- free_orphan_dir_info(sctx, odi);
+ odi = add_orphan_dir_info(sctx, dir);
+ if (IS_ERR(odi)) {
+ ret = PTR_ERR(odi);
+ goto out;
+ }
+ odi->gen = dir_gen;
+ odi->last_dir_index_offset = found_key.offset;
ret = 0;
goto out;
}
path->slots[0]++;
}
+ free_orphan_dir_info(sctx, odi);
ret = 1;
@@ -3259,13 +3269,16 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm)
if (rmdir_ino) {
struct orphan_dir_info *odi;
+ u64 gen;
odi = get_orphan_dir_info(sctx, rmdir_ino);
if (!odi) {
/* already deleted */
goto finish;
}
- ret = can_rmdir(sctx, rmdir_ino, odi->gen, sctx->cur_ino);
+ gen = odi->gen;
+
+ ret = can_rmdir(sctx, rmdir_ino, gen, sctx->cur_ino);
if (ret < 0)
goto out;
if (!ret)
@@ -3276,13 +3289,12 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm)
ret = -ENOMEM;
goto out;
}
- ret = get_cur_path(sctx, rmdir_ino, odi->gen, name);
+ ret = get_cur_path(sctx, rmdir_ino, gen, name);
if (ret < 0)
goto out;
ret = send_rmdir(sctx, name);
if (ret < 0)
goto out;
- free_orphan_dir_info(sctx, odi);
}
finish:
@@ -5236,6 +5248,10 @@ static int send_write_or_clone(struct send_ctx *sctx,
len = btrfs_file_extent_num_bytes(path->nodes[0], ei);
}
+ if (offset >= sctx->cur_inode_size) {
+ ret = 0;
+ goto out;
+ }
if (offset + len > sctx->cur_inode_size)
len = sctx->cur_inode_size - offset;
if (len == 0) {
@@ -6450,7 +6466,7 @@ static void btrfs_root_dec_send_in_progress(struct btrfs_root* root)
*/
if (root->send_in_progress < 0)
btrfs_err(root->fs_info,
- "send_in_progres unbalanced %d root %llu",
+ "send_in_progress unbalanced %d root %llu",
root->send_in_progress, root->root_key.objectid);
spin_unlock(&root->root_item_lock);
}
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 0628092b0b1b..81107ad49f3a 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -323,6 +323,7 @@ enum {
Opt_ssd, Opt_nossd,
Opt_ssd_spread, Opt_nossd_spread,
Opt_subvol,
+ Opt_subvol_empty,
Opt_subvolid,
Opt_thread_pool,
Opt_treelog, Opt_notreelog,
@@ -388,6 +389,7 @@ static const match_table_t tokens = {
{Opt_ssd_spread, "ssd_spread"},
{Opt_nossd_spread, "nossd_spread"},
{Opt_subvol, "subvol=%s"},
+ {Opt_subvol_empty, "subvol="},
{Opt_subvolid, "subvolid=%s"},
{Opt_thread_pool, "thread_pool=%u"},
{Opt_treelog, "treelog"},
@@ -461,6 +463,7 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
btrfs_set_opt(info->mount_opt, DEGRADED);
break;
case Opt_subvol:
+ case Opt_subvol_empty:
case Opt_subvolid:
case Opt_subvolrootid:
case Opt_device:
@@ -1782,10 +1785,8 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
}
ret = btrfs_parse_options(fs_info, data, *flags);
- if (ret) {
- ret = -EINVAL;
+ if (ret)
goto restore;
- }
btrfs_remount_begin(fs_info, old_opts, *flags);
btrfs_resize_thread_pool(fs_info,
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index 4848a4318fb5..4a4e960c7c66 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -210,12 +210,42 @@ static struct attribute *btrfs_supported_feature_attrs[] = {
NULL
};
+/*
+ * Features which depend on feature bits and may differ between each fs.
+ *
+ * /sys/fs/btrfs/features lists all available features of this kernel while
+ * /sys/fs/btrfs/UUID/features shows features of the fs which are enabled or
+ * can be changed online.
+ */
static const struct attribute_group btrfs_feature_attr_group = {
.name = "features",
.is_visible = btrfs_feature_visible,
.attrs = btrfs_supported_feature_attrs,
};
+static ssize_t rmdir_subvol_show(struct kobject *kobj,
+ struct kobj_attribute *ka, char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, "0\n");
+}
+BTRFS_ATTR(static_feature, rmdir_subvol, rmdir_subvol_show);
+
+static struct attribute *btrfs_supported_static_feature_attrs[] = {
+ BTRFS_ATTR_PTR(static_feature, rmdir_subvol),
+ NULL
+};
+
+/*
+ * Features which only depend on kernel version.
+ *
+ * These are listed in /sys/fs/btrfs/features along with
+ * btrfs_feature_attr_group
+ */
+static const struct attribute_group btrfs_static_feature_attr_group = {
+ .name = "features",
+ .attrs = btrfs_supported_static_feature_attrs,
+};
+
static ssize_t btrfs_show_u64(u64 *value_ptr, spinlock_t *lock, char *buf)
{
u64 val;
@@ -514,10 +544,11 @@ static inline struct btrfs_fs_info *to_fs_info(struct kobject *kobj)
}
#define NUM_FEATURE_BITS 64
-static char btrfs_unknown_feature_names[3][NUM_FEATURE_BITS][13];
-static struct btrfs_feature_attr btrfs_feature_attrs[3][NUM_FEATURE_BITS];
+#define BTRFS_FEATURE_NAME_MAX 13
+static char btrfs_unknown_feature_names[FEAT_MAX][NUM_FEATURE_BITS][BTRFS_FEATURE_NAME_MAX];
+static struct btrfs_feature_attr btrfs_feature_attrs[FEAT_MAX][NUM_FEATURE_BITS];
-static const u64 supported_feature_masks[3] = {
+static const u64 supported_feature_masks[FEAT_MAX] = {
[FEAT_COMPAT] = BTRFS_FEATURE_COMPAT_SUPP,
[FEAT_COMPAT_RO] = BTRFS_FEATURE_COMPAT_RO_SUPP,
[FEAT_INCOMPAT] = BTRFS_FEATURE_INCOMPAT_SUPP,
@@ -589,7 +620,7 @@ void btrfs_sysfs_remove_fsid(struct btrfs_fs_devices *fs_devs)
return;
}
- list_for_each_entry(fs_devs, fs_uuids, list) {
+ list_for_each_entry(fs_devs, fs_uuids, fs_list) {
__btrfs_sysfs_remove_fsid(fs_devs);
}
}
@@ -609,7 +640,7 @@ void btrfs_sysfs_remove_mounted(struct btrfs_fs_info *fs_info)
btrfs_sysfs_rm_device_link(fs_info->fs_devices, NULL);
}
-const char * const btrfs_feature_set_names[3] = {
+const char * const btrfs_feature_set_names[FEAT_MAX] = {
[FEAT_COMPAT] = "compat",
[FEAT_COMPAT_RO] = "compat_ro",
[FEAT_INCOMPAT] = "incompat",
@@ -673,7 +704,7 @@ static void init_feature_attrs(void)
if (fa->kobj_attr.attr.name)
continue;
- snprintf(name, 13, "%s:%u",
+ snprintf(name, BTRFS_FEATURE_NAME_MAX, "%s:%u",
btrfs_feature_set_names[set], i);
fa->kobj_attr.attr.name = name;
@@ -900,8 +931,15 @@ int __init btrfs_init_sysfs(void)
ret = sysfs_create_group(&btrfs_kset->kobj, &btrfs_feature_attr_group);
if (ret)
goto out2;
+ ret = sysfs_merge_group(&btrfs_kset->kobj,
+ &btrfs_static_feature_attr_group);
+ if (ret)
+ goto out_remove_group;
return 0;
+
+out_remove_group:
+ sysfs_remove_group(&btrfs_kset->kobj, &btrfs_feature_attr_group);
out2:
debugfs_remove_recursive(btrfs_debugfs_root_dentry);
out1:
@@ -912,6 +950,8 @@ out1:
void __cold btrfs_exit_sysfs(void)
{
+ sysfs_unmerge_group(&btrfs_kset->kobj,
+ &btrfs_static_feature_attr_group);
sysfs_remove_group(&btrfs_kset->kobj, &btrfs_feature_attr_group);
kset_unregister(btrfs_kset);
debugfs_remove_recursive(btrfs_debugfs_root_dentry);
diff --git a/fs/btrfs/sysfs.h b/fs/btrfs/sysfs.h
index b567560d9aa9..c6ee600aff89 100644
--- a/fs/btrfs/sysfs.h
+++ b/fs/btrfs/sysfs.h
@@ -9,7 +9,7 @@
extern u64 btrfs_debugfs_test;
enum btrfs_feature_set {
- FEAT_COMPAT,
+ FEAT_COMPAT = 0,
FEAT_COMPAT_RO,
FEAT_INCOMPAT,
FEAT_MAX
@@ -77,7 +77,7 @@ attr_to_btrfs_feature_attr(struct attribute *attr)
}
char *btrfs_printable_features(enum btrfs_feature_set set, u64 flags);
-extern const char * const btrfs_feature_set_names[3];
+extern const char * const btrfs_feature_set_names[FEAT_MAX];
extern struct kobj_type space_info_ktype;
extern struct kobj_type btrfs_raid_ktype;
int btrfs_sysfs_add_device_link(struct btrfs_fs_devices *fs_devices,
diff --git a/fs/btrfs/tests/btrfs-tests.c b/fs/btrfs/tests/btrfs-tests.c
index 30ed438da2a9..db72b3b6209e 100644
--- a/fs/btrfs/tests/btrfs-tests.c
+++ b/fs/btrfs/tests/btrfs-tests.c
@@ -219,11 +219,13 @@ void btrfs_free_dummy_block_group(struct btrfs_block_group_cache *cache)
kfree(cache);
}
-void btrfs_init_dummy_trans(struct btrfs_trans_handle *trans)
+void btrfs_init_dummy_trans(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info)
{
memset(trans, 0, sizeof(*trans));
trans->transid = 1;
trans->type = __TRANS_DUMMY;
+ trans->fs_info = fs_info;
}
int btrfs_run_sanity_tests(void)
diff --git a/fs/btrfs/tests/btrfs-tests.h b/fs/btrfs/tests/btrfs-tests.h
index a5a0b9500d3e..70ff9f9d86a1 100644
--- a/fs/btrfs/tests/btrfs-tests.h
+++ b/fs/btrfs/tests/btrfs-tests.h
@@ -9,7 +9,8 @@
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
int btrfs_run_sanity_tests(void);
-#define test_msg(fmt, ...) pr_info("BTRFS: selftest: " fmt, ##__VA_ARGS__)
+#define test_msg(fmt, ...) pr_info("BTRFS: selftest: " fmt "\n", ##__VA_ARGS__)
+#define test_err(fmt, ...) pr_err("BTRFS: selftest: " fmt "\n", ##__VA_ARGS__)
struct btrfs_root;
struct btrfs_trans_handle;
@@ -28,7 +29,8 @@ void btrfs_free_dummy_root(struct btrfs_root *root);
struct btrfs_block_group_cache *
btrfs_alloc_dummy_block_group(struct btrfs_fs_info *fs_info, unsigned long length);
void btrfs_free_dummy_block_group(struct btrfs_block_group_cache *cache);
-void btrfs_init_dummy_trans(struct btrfs_trans_handle *trans);
+void btrfs_init_dummy_trans(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info);
#else
static inline int btrfs_run_sanity_tests(void)
{
diff --git a/fs/btrfs/tests/extent-buffer-tests.c b/fs/btrfs/tests/extent-buffer-tests.c
index 31e8a9ec228c..7d72eab6d32c 100644
--- a/fs/btrfs/tests/extent-buffer-tests.c
+++ b/fs/btrfs/tests/extent-buffer-tests.c
@@ -26,31 +26,31 @@ static int test_btrfs_split_item(u32 sectorsize, u32 nodesize)
u32 value_len = strlen(value);
int ret = 0;
- test_msg("Running btrfs_split_item tests\n");
+ test_msg("running btrfs_split_item tests");
fs_info = btrfs_alloc_dummy_fs_info(nodesize, sectorsize);
if (!fs_info) {
- test_msg("Could not allocate fs_info\n");
+ test_err("could not allocate fs_info");
return -ENOMEM;
}
root = btrfs_alloc_dummy_root(fs_info);
if (IS_ERR(root)) {
- test_msg("Could not allocate root\n");
+ test_err("could not allocate root");
ret = PTR_ERR(root);
goto out;
}
path = btrfs_alloc_path();
if (!path) {
- test_msg("Could not allocate path\n");
+ test_err("could not allocate path");
ret = -ENOMEM;
goto out;
}
path->nodes[0] = eb = alloc_dummy_extent_buffer(fs_info, nodesize);
if (!eb) {
- test_msg("Could not allocate dummy buffer\n");
+ test_err("could not allocate dummy buffer");
ret = -ENOMEM;
goto out;
}
@@ -75,7 +75,7 @@ static int test_btrfs_split_item(u32 sectorsize, u32 nodesize)
*/
ret = btrfs_split_item(NULL, root, path, &key, 17);
if (ret) {
- test_msg("Split item failed %d\n", ret);
+ test_err("split item failed %d", ret);
goto out;
}
@@ -86,14 +86,14 @@ static int test_btrfs_split_item(u32 sectorsize, u32 nodesize)
btrfs_item_key_to_cpu(eb, &key, 0);
if (key.objectid != 0 || key.type != BTRFS_EXTENT_CSUM_KEY ||
key.offset != 0) {
- test_msg("Invalid key at slot 0\n");
+ test_err("invalid key at slot 0");
ret = -EINVAL;
goto out;
}
item = btrfs_item_nr(0);
if (btrfs_item_size(eb, item) != strlen(split1)) {
- test_msg("Invalid len in the first split\n");
+ test_err("invalid len in the first split");
ret = -EINVAL;
goto out;
}
@@ -101,8 +101,8 @@ static int test_btrfs_split_item(u32 sectorsize, u32 nodesize)
read_extent_buffer(eb, buf, btrfs_item_ptr_offset(eb, 0),
strlen(split1));
if (memcmp(buf, split1, strlen(split1))) {
- test_msg("Data in the buffer doesn't match what it should "
- "in the first split have='%.*s' want '%s'\n",
+ test_err(
+"data in the buffer doesn't match what it should in the first split have='%.*s' want '%s'",
(int)strlen(split1), buf, split1);
ret = -EINVAL;
goto out;
@@ -111,14 +111,14 @@ static int test_btrfs_split_item(u32 sectorsize, u32 nodesize)
btrfs_item_key_to_cpu(eb, &key, 1);
if (key.objectid != 0 || key.type != BTRFS_EXTENT_CSUM_KEY ||
key.offset != 3) {
- test_msg("Invalid key at slot 1\n");
+ test_err("invalid key at slot 1");
ret = -EINVAL;
goto out;
}
item = btrfs_item_nr(1);
if (btrfs_item_size(eb, item) != strlen(split2)) {
- test_msg("Invalid len in the second split\n");
+ test_err("invalid len in the second split");
ret = -EINVAL;
goto out;
}
@@ -126,8 +126,8 @@ static int test_btrfs_split_item(u32 sectorsize, u32 nodesize)
read_extent_buffer(eb, buf, btrfs_item_ptr_offset(eb, 1),
strlen(split2));
if (memcmp(buf, split2, strlen(split2))) {
- test_msg("Data in the buffer doesn't match what it should "
- "in the second split\n");
+ test_err(
+ "data in the buffer doesn't match what it should in the second split");
ret = -EINVAL;
goto out;
}
@@ -136,21 +136,21 @@ static int test_btrfs_split_item(u32 sectorsize, u32 nodesize)
/* Do it again so we test memmoving the other items in the leaf */
ret = btrfs_split_item(NULL, root, path, &key, 4);
if (ret) {
- test_msg("Second split item failed %d\n", ret);
+ test_err("second split item failed %d", ret);
goto out;
}
btrfs_item_key_to_cpu(eb, &key, 0);
if (key.objectid != 0 || key.type != BTRFS_EXTENT_CSUM_KEY ||
key.offset != 0) {
- test_msg("Invalid key at slot 0\n");
+ test_err("invalid key at slot 0");
ret = -EINVAL;
goto out;
}
item = btrfs_item_nr(0);
if (btrfs_item_size(eb, item) != strlen(split3)) {
- test_msg("Invalid len in the first split\n");
+ test_err("invalid len in the first split");
ret = -EINVAL;
goto out;
}
@@ -158,8 +158,8 @@ static int test_btrfs_split_item(u32 sectorsize, u32 nodesize)
read_extent_buffer(eb, buf, btrfs_item_ptr_offset(eb, 0),
strlen(split3));
if (memcmp(buf, split3, strlen(split3))) {
- test_msg("Data in the buffer doesn't match what it should "
- "in the third split");
+ test_err(
+ "data in the buffer doesn't match what it should in the third split");
ret = -EINVAL;
goto out;
}
@@ -167,14 +167,14 @@ static int test_btrfs_split_item(u32 sectorsize, u32 nodesize)
btrfs_item_key_to_cpu(eb, &key, 1);
if (key.objectid != 0 || key.type != BTRFS_EXTENT_CSUM_KEY ||
key.offset != 1) {
- test_msg("Invalid key at slot 1\n");
+ test_err("invalid key at slot 1");
ret = -EINVAL;
goto out;
}
item = btrfs_item_nr(1);
if (btrfs_item_size(eb, item) != strlen(split4)) {
- test_msg("Invalid len in the second split\n");
+ test_err("invalid len in the second split");
ret = -EINVAL;
goto out;
}
@@ -182,8 +182,8 @@ static int test_btrfs_split_item(u32 sectorsize, u32 nodesize)
read_extent_buffer(eb, buf, btrfs_item_ptr_offset(eb, 1),
strlen(split4));
if (memcmp(buf, split4, strlen(split4))) {
- test_msg("Data in the buffer doesn't match what it should "
- "in the fourth split\n");
+ test_err(
+ "data in the buffer doesn't match what it should in the fourth split");
ret = -EINVAL;
goto out;
}
@@ -191,14 +191,14 @@ static int test_btrfs_split_item(u32 sectorsize, u32 nodesize)
btrfs_item_key_to_cpu(eb, &key, 2);
if (key.objectid != 0 || key.type != BTRFS_EXTENT_CSUM_KEY ||
key.offset != 3) {
- test_msg("Invalid key at slot 2\n");
+ test_err("invalid key at slot 2");
ret = -EINVAL;
goto out;
}
item = btrfs_item_nr(2);
if (btrfs_item_size(eb, item) != strlen(split2)) {
- test_msg("Invalid len in the second split\n");
+ test_err("invalid len in the second split");
ret = -EINVAL;
goto out;
}
@@ -206,8 +206,8 @@ static int test_btrfs_split_item(u32 sectorsize, u32 nodesize)
read_extent_buffer(eb, buf, btrfs_item_ptr_offset(eb, 2),
strlen(split2));
if (memcmp(buf, split2, strlen(split2))) {
- test_msg("Data in the buffer doesn't match what it should "
- "in the last chunk\n");
+ test_err(
+ "data in the buffer doesn't match what it should in the last chunk");
ret = -EINVAL;
goto out;
}
@@ -220,6 +220,6 @@ out:
int btrfs_test_extent_buffer_operations(u32 sectorsize, u32 nodesize)
{
- test_msg("Running extent buffer operation tests\n");
+ test_msg("running extent buffer operation tests");
return test_btrfs_split_item(sectorsize, nodesize);
}
diff --git a/fs/btrfs/tests/extent-io-tests.c b/fs/btrfs/tests/extent-io-tests.c
index 76aa5a678a96..d9269a531a4d 100644
--- a/fs/btrfs/tests/extent-io-tests.c
+++ b/fs/btrfs/tests/extent-io-tests.c
@@ -46,7 +46,9 @@ static noinline int process_page_range(struct inode *inode, u64 start, u64 end,
cond_resched();
loops++;
if (loops > 100000) {
- printk(KERN_ERR "stuck in a loop, start %Lu, end %Lu, nr_pages %lu, ret %d\n", start, end, nr_pages, ret);
+ printk(KERN_ERR
+ "stuck in a loop, start %llu, end %llu, nr_pages %lu, ret %d\n",
+ start, end, nr_pages, ret);
break;
}
}
@@ -66,11 +68,11 @@ static int test_find_delalloc(u32 sectorsize)
u64 found;
int ret = -EINVAL;
- test_msg("Running find delalloc tests\n");
+ test_msg("running find delalloc tests");
inode = btrfs_new_test_inode();
if (!inode) {
- test_msg("Failed to allocate test inode\n");
+ test_err("failed to allocate test inode");
return -ENOMEM;
}
@@ -84,7 +86,7 @@ static int test_find_delalloc(u32 sectorsize)
for (index = 0; index < (total_dirty >> PAGE_SHIFT); index++) {
page = find_or_create_page(inode->i_mapping, index, GFP_KERNEL);
if (!page) {
- test_msg("Failed to allocate test page\n");
+ test_err("failed to allocate test page");
ret = -ENOMEM;
goto out;
}
@@ -107,11 +109,11 @@ static int test_find_delalloc(u32 sectorsize)
found = find_lock_delalloc_range(inode, &tmp, locked_page, &start,
&end, max_bytes);
if (!found) {
- test_msg("Should have found at least one delalloc\n");
+ test_err("should have found at least one delalloc");
goto out_bits;
}
if (start != 0 || end != (sectorsize - 1)) {
- test_msg("Expected start 0 end %u, got start %llu end %llu\n",
+ test_err("expected start 0 end %u, got start %llu end %llu",
sectorsize - 1, start, end);
goto out_bits;
}
@@ -129,7 +131,7 @@ static int test_find_delalloc(u32 sectorsize)
locked_page = find_lock_page(inode->i_mapping,
test_start >> PAGE_SHIFT);
if (!locked_page) {
- test_msg("Couldn't find the locked page\n");
+ test_err("couldn't find the locked page");
goto out_bits;
}
set_extent_delalloc(&tmp, sectorsize, max_bytes - 1, 0, NULL);
@@ -138,17 +140,17 @@ static int test_find_delalloc(u32 sectorsize)
found = find_lock_delalloc_range(inode, &tmp, locked_page, &start,
&end, max_bytes);
if (!found) {
- test_msg("Couldn't find delalloc in our range\n");
+ test_err("couldn't find delalloc in our range");
goto out_bits;
}
if (start != test_start || end != max_bytes - 1) {
- test_msg("Expected start %Lu end %Lu, got start %Lu, end "
- "%Lu\n", test_start, max_bytes - 1, start, end);
+ test_err("expected start %llu end %llu, got start %llu, end %llu",
+ test_start, max_bytes - 1, start, end);
goto out_bits;
}
if (process_page_range(inode, start, end,
PROCESS_TEST_LOCKED | PROCESS_UNLOCK)) {
- test_msg("There were unlocked pages in the range\n");
+ test_err("there were unlocked pages in the range");
goto out_bits;
}
unlock_extent(&tmp, start, end);
@@ -164,7 +166,7 @@ static int test_find_delalloc(u32 sectorsize)
locked_page = find_lock_page(inode->i_mapping, test_start >>
PAGE_SHIFT);
if (!locked_page) {
- test_msg("Couldn't find the locked page\n");
+ test_err("couldn't find the locked page");
goto out_bits;
}
start = test_start;
@@ -172,11 +174,11 @@ static int test_find_delalloc(u32 sectorsize)
found = find_lock_delalloc_range(inode, &tmp, locked_page, &start,
&end, max_bytes);
if (found) {
- test_msg("Found range when we shouldn't have\n");
+ test_err("found range when we shouldn't have");
goto out_bits;
}
if (end != (u64)-1) {
- test_msg("Did not return the proper end offset\n");
+ test_err("did not return the proper end offset");
goto out_bits;
}
@@ -193,17 +195,17 @@ static int test_find_delalloc(u32 sectorsize)
found = find_lock_delalloc_range(inode, &tmp, locked_page, &start,
&end, max_bytes);
if (!found) {
- test_msg("Didn't find our range\n");
+ test_err("didn't find our range");
goto out_bits;
}
if (start != test_start || end != total_dirty - 1) {
- test_msg("Expected start %Lu end %Lu, got start %Lu end %Lu\n",
+ test_err("expected start %llu end %llu, got start %llu end %llu",
test_start, total_dirty - 1, start, end);
goto out_bits;
}
if (process_page_range(inode, start, end,
PROCESS_TEST_LOCKED | PROCESS_UNLOCK)) {
- test_msg("Pages in range were not all locked\n");
+ test_err("pages in range were not all locked");
goto out_bits;
}
unlock_extent(&tmp, start, end);
@@ -215,7 +217,7 @@ static int test_find_delalloc(u32 sectorsize)
page = find_get_page(inode->i_mapping,
(max_bytes + SZ_1M) >> PAGE_SHIFT);
if (!page) {
- test_msg("Couldn't find our page\n");
+ test_err("couldn't find our page");
goto out_bits;
}
ClearPageDirty(page);
@@ -234,18 +236,17 @@ static int test_find_delalloc(u32 sectorsize)
found = find_lock_delalloc_range(inode, &tmp, locked_page, &start,
&end, max_bytes);
if (!found) {
- test_msg("Didn't find our range\n");
+ test_err("didn't find our range");
goto out_bits;
}
if (start != test_start && end != test_start + PAGE_SIZE - 1) {
- test_msg("Expected start %Lu end %Lu, got start %Lu end %Lu\n",
- test_start, test_start + PAGE_SIZE - 1, start,
- end);
+ test_err("expected start %llu end %llu, got start %llu end %llu",
+ test_start, test_start + PAGE_SIZE - 1, start, end);
goto out_bits;
}
if (process_page_range(inode, start, end, PROCESS_TEST_LOCKED |
PROCESS_UNLOCK)) {
- test_msg("Pages in range were not all locked\n");
+ test_err("pages in range were not all locked");
goto out_bits;
}
ret = 0;
@@ -271,14 +272,14 @@ static int check_eb_bitmap(unsigned long *bitmap, struct extent_buffer *eb,
bit = !!test_bit(i, bitmap);
bit1 = !!extent_buffer_test_bit(eb, 0, i);
if (bit1 != bit) {
- test_msg("Bits do not match\n");
+ test_err("bits do not match");
return -EINVAL;
}
bit1 = !!extent_buffer_test_bit(eb, i / BITS_PER_BYTE,
i % BITS_PER_BYTE);
if (bit1 != bit) {
- test_msg("Offset bits do not match\n");
+ test_err("offset bits do not match");
return -EINVAL;
}
}
@@ -295,7 +296,7 @@ static int __test_eb_bitmaps(unsigned long *bitmap, struct extent_buffer *eb,
memset(bitmap, 0, len);
memzero_extent_buffer(eb, 0, len);
if (memcmp_extent_buffer(eb, bitmap, 0, len) != 0) {
- test_msg("Bitmap was not zeroed\n");
+ test_err("bitmap was not zeroed");
return -EINVAL;
}
@@ -303,7 +304,7 @@ static int __test_eb_bitmaps(unsigned long *bitmap, struct extent_buffer *eb,
extent_buffer_bitmap_set(eb, 0, 0, len * BITS_PER_BYTE);
ret = check_eb_bitmap(bitmap, eb, len);
if (ret) {
- test_msg("Setting all bits failed\n");
+ test_err("setting all bits failed");
return ret;
}
@@ -311,7 +312,7 @@ static int __test_eb_bitmaps(unsigned long *bitmap, struct extent_buffer *eb,
extent_buffer_bitmap_clear(eb, 0, 0, len * BITS_PER_BYTE);
ret = check_eb_bitmap(bitmap, eb, len);
if (ret) {
- test_msg("Clearing all bits failed\n");
+ test_err("clearing all bits failed");
return ret;
}
@@ -324,7 +325,7 @@ static int __test_eb_bitmaps(unsigned long *bitmap, struct extent_buffer *eb,
sizeof(long) * BITS_PER_BYTE);
ret = check_eb_bitmap(bitmap, eb, len);
if (ret) {
- test_msg("Setting straddling pages failed\n");
+ test_err("setting straddling pages failed");
return ret;
}
@@ -337,7 +338,7 @@ static int __test_eb_bitmaps(unsigned long *bitmap, struct extent_buffer *eb,
sizeof(long) * BITS_PER_BYTE);
ret = check_eb_bitmap(bitmap, eb, len);
if (ret) {
- test_msg("Clearing straddling pages failed\n");
+ test_err("clearing straddling pages failed");
return ret;
}
}
@@ -361,7 +362,7 @@ static int __test_eb_bitmaps(unsigned long *bitmap, struct extent_buffer *eb,
ret = check_eb_bitmap(bitmap, eb, len);
if (ret) {
- test_msg("Random bit pattern failed\n");
+ test_err("random bit pattern failed");
return ret;
}
@@ -376,7 +377,7 @@ static int test_eb_bitmaps(u32 sectorsize, u32 nodesize)
struct extent_buffer *eb;
int ret;
- test_msg("Running extent buffer bitmap tests\n");
+ test_msg("running extent buffer bitmap tests");
/*
* In ppc64, sectorsize can be 64K, thus 4 * 64K will be larger than
@@ -389,13 +390,13 @@ static int test_eb_bitmaps(u32 sectorsize, u32 nodesize)
bitmap = kmalloc(len, GFP_KERNEL);
if (!bitmap) {
- test_msg("Couldn't allocate test bitmap\n");
+ test_err("couldn't allocate test bitmap");
return -ENOMEM;
}
eb = __alloc_dummy_extent_buffer(fs_info, 0, len);
if (!eb) {
- test_msg("Couldn't allocate test extent buffer\n");
+ test_err("couldn't allocate test extent buffer");
kfree(bitmap);
return -ENOMEM;
}
@@ -408,7 +409,7 @@ static int test_eb_bitmaps(u32 sectorsize, u32 nodesize)
free_extent_buffer(eb);
eb = __alloc_dummy_extent_buffer(NULL, nodesize / 2, len);
if (!eb) {
- test_msg("Couldn't allocate test extent buffer\n");
+ test_err("couldn't allocate test extent buffer");
kfree(bitmap);
return -ENOMEM;
}
@@ -424,7 +425,7 @@ int btrfs_test_extent_io(u32 sectorsize, u32 nodesize)
{
int ret;
- test_msg("Running extent I/O tests\n");
+ test_msg("running extent I/O tests");
ret = test_find_delalloc(sectorsize);
if (ret)
@@ -432,6 +433,6 @@ int btrfs_test_extent_io(u32 sectorsize, u32 nodesize)
ret = test_eb_bitmaps(sectorsize, nodesize);
out:
- test_msg("Extent I/O tests finished\n");
+ test_msg("extent I/O tests finished");
return ret;
}
diff --git a/fs/btrfs/tests/extent-map-tests.c b/fs/btrfs/tests/extent-map-tests.c
index 79e0a5f4d9c9..385a5316e4bf 100644
--- a/fs/btrfs/tests/extent-map-tests.c
+++ b/fs/btrfs/tests/extent-map-tests.c
@@ -19,8 +19,8 @@ static void free_extent_map_tree(struct extent_map_tree *em_tree)
#ifdef CONFIG_BTRFS_DEBUG
if (refcount_read(&em->refs) != 1) {
- test_msg(
-"em leak: em (start 0x%llx len 0x%llx block_start 0x%llx block_len 0x%llx) refs %d\n",
+ test_err(
+"em leak: em (start 0x%llx len 0x%llx block_start 0x%llx block_len 0x%llx) refs %d",
em->start, em->len, em->block_start,
em->block_len, refcount_read(&em->refs));
@@ -47,7 +47,8 @@ static void free_extent_map_tree(struct extent_map_tree *em_tree)
* ->add_extent_mapping(0, 16K)
* -> #handle -EEXIST
*/
-static void test_case_1(struct extent_map_tree *em_tree)
+static void test_case_1(struct btrfs_fs_info *fs_info,
+ struct extent_map_tree *em_tree)
{
struct extent_map *em;
u64 start = 0;
@@ -90,14 +91,14 @@ static void test_case_1(struct extent_map_tree *em_tree)
em->len = len;
em->block_start = start;
em->block_len = len;
- ret = btrfs_add_extent_mapping(em_tree, &em, em->start, em->len);
+ ret = btrfs_add_extent_mapping(fs_info, em_tree, &em, em->start, em->len);
if (ret)
- test_msg("case1 [%llu %llu]: ret %d\n", start, start + len, ret);
+ test_err("case1 [%llu %llu]: ret %d", start, start + len, ret);
if (em &&
(em->start != 0 || extent_map_end(em) != SZ_16K ||
em->block_start != 0 || em->block_len != SZ_16K))
- test_msg(
-"case1 [%llu %llu]: ret %d return a wrong em (start %llu len %llu block_start %llu block_len %llu\n",
+ test_err(
+"case1 [%llu %llu]: ret %d return a wrong em (start %llu len %llu block_start %llu block_len %llu",
start, start + len, ret, em->start, em->len,
em->block_start, em->block_len);
free_extent_map(em);
@@ -112,7 +113,8 @@ out:
* Reading the inline ending up with EEXIST, ie. read an inline
* extent and discard page cache and read it again.
*/
-static void test_case_2(struct extent_map_tree *em_tree)
+static void test_case_2(struct btrfs_fs_info *fs_info,
+ struct extent_map_tree *em_tree)
{
struct extent_map *em;
int ret;
@@ -153,14 +155,14 @@ static void test_case_2(struct extent_map_tree *em_tree)
em->len = SZ_1K;
em->block_start = EXTENT_MAP_INLINE;
em->block_len = (u64)-1;
- ret = btrfs_add_extent_mapping(em_tree, &em, em->start, em->len);
+ ret = btrfs_add_extent_mapping(fs_info, em_tree, &em, em->start, em->len);
if (ret)
- test_msg("case2 [0 1K]: ret %d\n", ret);
+ test_err("case2 [0 1K]: ret %d", ret);
if (em &&
(em->start != 0 || extent_map_end(em) != SZ_1K ||
em->block_start != EXTENT_MAP_INLINE || em->block_len != (u64)-1))
- test_msg(
-"case2 [0 1K]: ret %d return a wrong em (start %llu len %llu block_start %llu block_len %llu\n",
+ test_err(
+"case2 [0 1K]: ret %d return a wrong em (start %llu len %llu block_start %llu block_len %llu",
ret, em->start, em->len, em->block_start,
em->block_len);
free_extent_map(em);
@@ -169,7 +171,8 @@ out:
free_extent_map_tree(em_tree);
}
-static void __test_case_3(struct extent_map_tree *em_tree, u64 start)
+static void __test_case_3(struct btrfs_fs_info *fs_info,
+ struct extent_map_tree *em_tree, u64 start)
{
struct extent_map *em;
u64 len = SZ_4K;
@@ -198,9 +201,9 @@ static void __test_case_3(struct extent_map_tree *em_tree, u64 start)
em->len = SZ_16K;
em->block_start = 0;
em->block_len = SZ_16K;
- ret = btrfs_add_extent_mapping(em_tree, &em, start, len);
+ ret = btrfs_add_extent_mapping(fs_info, em_tree, &em, start, len);
if (ret)
- test_msg("case3 [0x%llx 0x%llx): ret %d\n",
+ test_err("case3 [0x%llx 0x%llx): ret %d",
start, start + len, ret);
/*
* Since bytes within em are contiguous, em->block_start is identical to
@@ -209,8 +212,8 @@ static void __test_case_3(struct extent_map_tree *em_tree, u64 start)
if (em &&
(start < em->start || start + len > extent_map_end(em) ||
em->start != em->block_start || em->len != em->block_len))
- test_msg(
-"case3 [0x%llx 0x%llx): ret %d em (start 0x%llx len 0x%llx block_start 0x%llx block_len 0x%llx)\n",
+ test_err(
+"case3 [0x%llx 0x%llx): ret %d em (start 0x%llx len 0x%llx block_start 0x%llx block_len 0x%llx)",
start, start + len, ret, em->start, em->len,
em->block_start, em->block_len);
free_extent_map(em);
@@ -235,14 +238,16 @@ out:
* -> add_extent_mapping()
* -> add_extent_mapping()
*/
-static void test_case_3(struct extent_map_tree *em_tree)
+static void test_case_3(struct btrfs_fs_info *fs_info,
+ struct extent_map_tree *em_tree)
{
- __test_case_3(em_tree, 0);
- __test_case_3(em_tree, SZ_8K);
- __test_case_3(em_tree, (12 * 1024ULL));
+ __test_case_3(fs_info, em_tree, 0);
+ __test_case_3(fs_info, em_tree, SZ_8K);
+ __test_case_3(fs_info, em_tree, (12 * 1024ULL));
}
-static void __test_case_4(struct extent_map_tree *em_tree, u64 start)
+static void __test_case_4(struct btrfs_fs_info *fs_info,
+ struct extent_map_tree *em_tree, u64 start)
{
struct extent_map *em;
u64 len = SZ_4K;
@@ -283,14 +288,14 @@ static void __test_case_4(struct extent_map_tree *em_tree, u64 start)
em->len = SZ_32K;
em->block_start = 0;
em->block_len = SZ_32K;
- ret = btrfs_add_extent_mapping(em_tree, &em, start, len);
+ ret = btrfs_add_extent_mapping(fs_info, em_tree, &em, start, len);
if (ret)
- test_msg("case4 [0x%llx 0x%llx): ret %d\n",
+ test_err("case4 [0x%llx 0x%llx): ret %d",
start, len, ret);
if (em &&
(start < em->start || start + len > extent_map_end(em)))
- test_msg(
-"case4 [0x%llx 0x%llx): ret %d, added wrong em (start 0x%llx len 0x%llx block_start 0x%llx block_len 0x%llx)\n",
+ test_err(
+"case4 [0x%llx 0x%llx): ret %d, added wrong em (start 0x%llx len 0x%llx block_start 0x%llx block_len 0x%llx)",
start, len, ret, em->start, em->len, em->block_start,
em->block_len);
free_extent_map(em);
@@ -324,30 +329,45 @@ out:
* # handle -EEXIST when adding
* # [0, 32K)
*/
-static void test_case_4(struct extent_map_tree *em_tree)
+static void test_case_4(struct btrfs_fs_info *fs_info,
+ struct extent_map_tree *em_tree)
{
- __test_case_4(em_tree, 0);
- __test_case_4(em_tree, SZ_4K);
+ __test_case_4(fs_info, em_tree, 0);
+ __test_case_4(fs_info, em_tree, SZ_4K);
}
int btrfs_test_extent_map(void)
{
+ struct btrfs_fs_info *fs_info = NULL;
struct extent_map_tree *em_tree;
- test_msg("Running extent_map tests\n");
+ test_msg("running extent_map tests");
+
+ /*
+ * Note: the fs_info is not set up completely, we only need
+ * fs_info::fsid for the tracepoint.
+ */
+ fs_info = btrfs_alloc_dummy_fs_info(PAGE_SIZE, PAGE_SIZE);
+ if (!fs_info) {
+ test_msg("Couldn't allocate dummy fs info");
+ return -ENOMEM;
+ }
em_tree = kzalloc(sizeof(*em_tree), GFP_KERNEL);
if (!em_tree)
/* Skip the test on error. */
- return 0;
+ goto out;
extent_map_tree_init(em_tree);
- test_case_1(em_tree);
- test_case_2(em_tree);
- test_case_3(em_tree);
- test_case_4(em_tree);
+ test_case_1(fs_info, em_tree);
+ test_case_2(fs_info, em_tree);
+ test_case_3(fs_info, em_tree);
+ test_case_4(fs_info, em_tree);
kfree(em_tree);
+out:
+ btrfs_free_dummy_fs_info(fs_info);
+
return 0;
}
diff --git a/fs/btrfs/tests/free-space-tests.c b/fs/btrfs/tests/free-space-tests.c
index d3c9f8a59ba5..5c2f77e9439b 100644
--- a/fs/btrfs/tests/free-space-tests.c
+++ b/fs/btrfs/tests/free-space-tests.c
@@ -20,63 +20,63 @@ static int test_extents(struct btrfs_block_group_cache *cache)
{
int ret = 0;
- test_msg("Running extent only tests\n");
+ test_msg("running extent only tests");
/* First just make sure we can remove an entire entry */
ret = btrfs_add_free_space(cache, 0, SZ_4M);
if (ret) {
- test_msg("Error adding initial extents %d\n", ret);
+ test_err("error adding initial extents %d", ret);
return ret;
}
ret = btrfs_remove_free_space(cache, 0, SZ_4M);
if (ret) {
- test_msg("Error removing extent %d\n", ret);
+ test_err("error removing extent %d", ret);
return ret;
}
if (test_check_exists(cache, 0, SZ_4M)) {
- test_msg("Full remove left some lingering space\n");
+ test_err("full remove left some lingering space");
return -1;
}
/* Ok edge and middle cases now */
ret = btrfs_add_free_space(cache, 0, SZ_4M);
if (ret) {
- test_msg("Error adding half extent %d\n", ret);
+ test_err("error adding half extent %d", ret);
return ret;
}
ret = btrfs_remove_free_space(cache, 3 * SZ_1M, SZ_1M);
if (ret) {
- test_msg("Error removing tail end %d\n", ret);
+ test_err("error removing tail end %d", ret);
return ret;
}
ret = btrfs_remove_free_space(cache, 0, SZ_1M);
if (ret) {
- test_msg("Error removing front end %d\n", ret);
+ test_err("error removing front end %d", ret);
return ret;
}
ret = btrfs_remove_free_space(cache, SZ_2M, 4096);
if (ret) {
- test_msg("Error removing middle piece %d\n", ret);
+ test_err("error removing middle piece %d", ret);
return ret;
}
if (test_check_exists(cache, 0, SZ_1M)) {
- test_msg("Still have space at the front\n");
+ test_err("still have space at the front");
return -1;
}
if (test_check_exists(cache, SZ_2M, 4096)) {
- test_msg("Still have space in the middle\n");
+ test_err("still have space in the middle");
return -1;
}
if (test_check_exists(cache, 3 * SZ_1M, SZ_1M)) {
- test_msg("Still have space at the end\n");
+ test_err("still have space at the end");
return -1;
}
@@ -92,34 +92,34 @@ static int test_bitmaps(struct btrfs_block_group_cache *cache,
u64 next_bitmap_offset;
int ret;
- test_msg("Running bitmap only tests\n");
+ test_msg("running bitmap only tests");
ret = test_add_free_space_entry(cache, 0, SZ_4M, 1);
if (ret) {
- test_msg("Couldn't create a bitmap entry %d\n", ret);
+ test_err("couldn't create a bitmap entry %d", ret);
return ret;
}
ret = btrfs_remove_free_space(cache, 0, SZ_4M);
if (ret) {
- test_msg("Error removing bitmap full range %d\n", ret);
+ test_err("error removing bitmap full range %d", ret);
return ret;
}
if (test_check_exists(cache, 0, SZ_4M)) {
- test_msg("Left some space in bitmap\n");
+ test_err("left some space in bitmap");
return -1;
}
ret = test_add_free_space_entry(cache, 0, SZ_4M, 1);
if (ret) {
- test_msg("Couldn't add to our bitmap entry %d\n", ret);
+ test_err("couldn't add to our bitmap entry %d", ret);
return ret;
}
ret = btrfs_remove_free_space(cache, SZ_1M, SZ_2M);
if (ret) {
- test_msg("Couldn't remove middle chunk %d\n", ret);
+ test_err("couldn't remove middle chunk %d", ret);
return ret;
}
@@ -133,19 +133,19 @@ static int test_bitmaps(struct btrfs_block_group_cache *cache,
ret = test_add_free_space_entry(cache, next_bitmap_offset - SZ_2M,
SZ_4M, 1);
if (ret) {
- test_msg("Couldn't add space that straddles two bitmaps %d\n",
+ test_err("couldn't add space that straddles two bitmaps %d",
ret);
return ret;
}
ret = btrfs_remove_free_space(cache, next_bitmap_offset - SZ_1M, SZ_2M);
if (ret) {
- test_msg("Couldn't remove overlapping space %d\n", ret);
+ test_err("couldn't remove overlapping space %d", ret);
return ret;
}
if (test_check_exists(cache, next_bitmap_offset - SZ_1M, SZ_2M)) {
- test_msg("Left some space when removing overlapping\n");
+ test_err("left some space when removing overlapping");
return -1;
}
@@ -161,7 +161,7 @@ static int test_bitmaps_and_extents(struct btrfs_block_group_cache *cache,
u64 bitmap_offset = (u64)(BITS_PER_BITMAP * sectorsize);
int ret;
- test_msg("Running bitmap and extent tests\n");
+ test_msg("running bitmap and extent tests");
/*
* First let's do something simple, an extent at the same offset as the
@@ -170,42 +170,42 @@ static int test_bitmaps_and_extents(struct btrfs_block_group_cache *cache,
*/
ret = test_add_free_space_entry(cache, SZ_4M, SZ_1M, 1);
if (ret) {
- test_msg("Couldn't create bitmap entry %d\n", ret);
+ test_err("couldn't create bitmap entry %d", ret);
return ret;
}
ret = test_add_free_space_entry(cache, 0, SZ_1M, 0);
if (ret) {
- test_msg("Couldn't add extent entry %d\n", ret);
+ test_err("couldn't add extent entry %d", ret);
return ret;
}
ret = btrfs_remove_free_space(cache, 0, SZ_1M);
if (ret) {
- test_msg("Couldn't remove extent entry %d\n", ret);
+ test_err("couldn't remove extent entry %d", ret);
return ret;
}
if (test_check_exists(cache, 0, SZ_1M)) {
- test_msg("Left remnants after our remove\n");
+ test_err("left remnants after our remove");
return -1;
}
/* Now to add back the extent entry and remove from the bitmap */
ret = test_add_free_space_entry(cache, 0, SZ_1M, 0);
if (ret) {
- test_msg("Couldn't re-add extent entry %d\n", ret);
+ test_err("couldn't re-add extent entry %d", ret);
return ret;
}
ret = btrfs_remove_free_space(cache, SZ_4M, SZ_1M);
if (ret) {
- test_msg("Couldn't remove from bitmap %d\n", ret);
+ test_err("couldn't remove from bitmap %d", ret);
return ret;
}
if (test_check_exists(cache, SZ_4M, SZ_1M)) {
- test_msg("Left remnants in the bitmap\n");
+ test_err("left remnants in the bitmap");
return -1;
}
@@ -215,18 +215,18 @@ static int test_bitmaps_and_extents(struct btrfs_block_group_cache *cache,
*/
ret = test_add_free_space_entry(cache, SZ_1M, SZ_4M, 1);
if (ret) {
- test_msg("Couldn't add to a bitmap %d\n", ret);
+ test_err("couldn't add to a bitmap %d", ret);
return ret;
}
ret = btrfs_remove_free_space(cache, SZ_512K, 3 * SZ_1M);
if (ret) {
- test_msg("Couldn't remove overlapping space %d\n", ret);
+ test_err("couldn't remove overlapping space %d", ret);
return ret;
}
if (test_check_exists(cache, SZ_512K, 3 * SZ_1M)) {
- test_msg("Left over pieces after removing overlapping\n");
+ test_err("left over pieces after removing overlapping");
return -1;
}
@@ -235,24 +235,24 @@ static int test_bitmaps_and_extents(struct btrfs_block_group_cache *cache,
/* Now with the extent entry offset into the bitmap */
ret = test_add_free_space_entry(cache, SZ_4M, SZ_4M, 1);
if (ret) {
- test_msg("Couldn't add space to the bitmap %d\n", ret);
+ test_err("couldn't add space to the bitmap %d", ret);
return ret;
}
ret = test_add_free_space_entry(cache, SZ_2M, SZ_2M, 0);
if (ret) {
- test_msg("Couldn't add extent to the cache %d\n", ret);
+ test_err("couldn't add extent to the cache %d", ret);
return ret;
}
ret = btrfs_remove_free_space(cache, 3 * SZ_1M, SZ_4M);
if (ret) {
- test_msg("Problem removing overlapping space %d\n", ret);
+ test_err("problem removing overlapping space %d", ret);
return ret;
}
if (test_check_exists(cache, 3 * SZ_1M, SZ_4M)) {
- test_msg("Left something behind when removing space");
+ test_err("left something behind when removing space");
return -1;
}
@@ -269,25 +269,25 @@ static int test_bitmaps_and_extents(struct btrfs_block_group_cache *cache,
__btrfs_remove_free_space_cache(cache->free_space_ctl);
ret = test_add_free_space_entry(cache, bitmap_offset + SZ_4M, SZ_4M, 1);
if (ret) {
- test_msg("Couldn't add bitmap %d\n", ret);
+ test_err("couldn't add bitmap %d", ret);
return ret;
}
ret = test_add_free_space_entry(cache, bitmap_offset - SZ_1M,
5 * SZ_1M, 0);
if (ret) {
- test_msg("Couldn't add extent entry %d\n", ret);
+ test_err("couldn't add extent entry %d", ret);
return ret;
}
ret = btrfs_remove_free_space(cache, bitmap_offset + SZ_1M, 5 * SZ_1M);
if (ret) {
- test_msg("Failed to free our space %d\n", ret);
+ test_err("failed to free our space %d", ret);
return ret;
}
if (test_check_exists(cache, bitmap_offset + SZ_1M, 5 * SZ_1M)) {
- test_msg("Left stuff over\n");
+ test_err("left stuff over");
return -1;
}
@@ -301,19 +301,19 @@ static int test_bitmaps_and_extents(struct btrfs_block_group_cache *cache,
*/
ret = test_add_free_space_entry(cache, SZ_1M, SZ_2M, 1);
if (ret) {
- test_msg("Couldn't add bitmap entry %d\n", ret);
+ test_err("couldn't add bitmap entry %d", ret);
return ret;
}
ret = test_add_free_space_entry(cache, 3 * SZ_1M, SZ_1M, 0);
if (ret) {
- test_msg("Couldn't add extent entry %d\n", ret);
+ test_err("couldn't add extent entry %d", ret);
return ret;
}
ret = btrfs_remove_free_space(cache, SZ_1M, 3 * SZ_1M);
if (ret) {
- test_msg("Error removing bitmap and extent overlapping %d\n", ret);
+ test_err("error removing bitmap and extent overlapping %d", ret);
return ret;
}
@@ -335,12 +335,14 @@ check_num_extents_and_bitmaps(const struct btrfs_block_group_cache *cache,
const int num_bitmaps)
{
if (cache->free_space_ctl->free_extents != num_extents) {
- test_msg("Incorrect # of extent entries in the cache: %d, expected %d\n",
+ test_err(
+ "incorrect # of extent entries in the cache: %d, expected %d",
cache->free_space_ctl->free_extents, num_extents);
return -EINVAL;
}
if (cache->free_space_ctl->total_bitmaps != num_bitmaps) {
- test_msg("Incorrect # of extent entries in the cache: %d, expected %d\n",
+ test_err(
+ "incorrect # of extent entries in the cache: %d, expected %d",
cache->free_space_ctl->total_bitmaps, num_bitmaps);
return -EINVAL;
}
@@ -358,7 +360,7 @@ static int check_cache_empty(struct btrfs_block_group_cache *cache)
* allocate.
*/
if (cache->free_space_ctl->free_space != 0) {
- test_msg("Cache free space is not 0\n");
+ test_err("cache free space is not 0");
return -EINVAL;
}
@@ -366,7 +368,7 @@ static int check_cache_empty(struct btrfs_block_group_cache *cache)
offset = btrfs_find_space_for_alloc(cache, 0, 4096, 0,
&max_extent_size);
if (offset != 0) {
- test_msg("Space allocation did not fail, returned offset: %llu",
+ test_err("space allocation did not fail, returned offset: %llu",
offset);
return -EINVAL;
}
@@ -402,7 +404,7 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache,
};
const struct btrfs_free_space_op *orig_free_space_ops;
- test_msg("Running space stealing from bitmap to extent\n");
+ test_msg("running space stealing from bitmap to extent");
/*
* For this test, we want to ensure we end up with an extent entry
@@ -430,7 +432,7 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache,
*/
ret = test_add_free_space_entry(cache, SZ_128M - SZ_256K, SZ_128K, 0);
if (ret) {
- test_msg("Couldn't add extent entry %d\n", ret);
+ test_err("couldn't add extent entry %d", ret);
return ret;
}
@@ -438,7 +440,7 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache,
ret = test_add_free_space_entry(cache, SZ_128M + SZ_512K,
SZ_128M - SZ_512K, 1);
if (ret) {
- test_msg("Couldn't add bitmap entry %d\n", ret);
+ test_err("couldn't add bitmap entry %d", ret);
return ret;
}
@@ -457,17 +459,17 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache,
SZ_128M + 768 * SZ_1K,
SZ_128M - 768 * SZ_1K);
if (ret) {
- test_msg("Failed to free part of bitmap space %d\n", ret);
+ test_err("failed to free part of bitmap space %d", ret);
return ret;
}
/* Confirm that only those 2 ranges are marked as free. */
if (!test_check_exists(cache, SZ_128M - SZ_256K, SZ_128K)) {
- test_msg("Free space range missing\n");
+ test_err("free space range missing");
return -ENOENT;
}
if (!test_check_exists(cache, SZ_128M + SZ_512K, SZ_256K)) {
- test_msg("Free space range missing\n");
+ test_err("free space range missing");
return -ENOENT;
}
@@ -477,7 +479,7 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache,
*/
if (test_check_exists(cache, SZ_128M + 768 * SZ_1K,
SZ_128M - 768 * SZ_1K)) {
- test_msg("Bitmap region not removed from space cache\n");
+ test_err("bitmap region not removed from space cache");
return -EINVAL;
}
@@ -486,7 +488,7 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache,
* covered by the bitmap, isn't marked as free.
*/
if (test_check_exists(cache, SZ_128M + SZ_256K, SZ_256K)) {
- test_msg("Invalid bitmap region marked as free\n");
+ test_err("invalid bitmap region marked as free");
return -EINVAL;
}
@@ -495,7 +497,7 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache,
* by the bitmap too, isn't marked as free either.
*/
if (test_check_exists(cache, SZ_128M, SZ_256K)) {
- test_msg("Invalid bitmap region marked as free\n");
+ test_err("invalid bitmap region marked as free");
return -EINVAL;
}
@@ -506,12 +508,12 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache,
*/
ret = btrfs_add_free_space(cache, SZ_128M, SZ_512K);
if (ret) {
- test_msg("Error adding free space: %d\n", ret);
+ test_err("error adding free space: %d", ret);
return ret;
}
/* Confirm the region is marked as free. */
if (!test_check_exists(cache, SZ_128M, SZ_512K)) {
- test_msg("Bitmap region not marked as free\n");
+ test_err("bitmap region not marked as free");
return -ENOENT;
}
@@ -531,7 +533,7 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache,
*/
ret = btrfs_add_free_space(cache, SZ_128M + SZ_16M, sectorsize);
if (ret) {
- test_msg("Error adding free space: %d\n", ret);
+ test_err("error adding free space: %d", ret);
return ret;
}
@@ -550,12 +552,12 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache,
*/
ret = btrfs_add_free_space(cache, SZ_128M - SZ_128K, SZ_128K);
if (ret) {
- test_msg("Error adding free space: %d\n", ret);
+ test_err("error adding free space: %d", ret);
return ret;
}
/* Confirm the region is marked as free. */
if (!test_check_exists(cache, SZ_128M - SZ_128K, SZ_128K)) {
- test_msg("Extent region not marked as free\n");
+ test_err("extent region not marked as free");
return -ENOENT;
}
@@ -583,12 +585,12 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache,
* allocate the whole free space at once.
*/
if (!test_check_exists(cache, SZ_128M - SZ_256K, SZ_1M)) {
- test_msg("Expected region not marked as free\n");
+ test_err("expected region not marked as free");
return -ENOENT;
}
if (cache->free_space_ctl->free_space != (SZ_1M + sectorsize)) {
- test_msg("Cache free space is not 1Mb + %u\n", sectorsize);
+ test_err("cache free space is not 1Mb + %u", sectorsize);
return -EINVAL;
}
@@ -596,7 +598,8 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache,
0, SZ_1M, 0,
&max_extent_size);
if (offset != (SZ_128M - SZ_256K)) {
- test_msg("Failed to allocate 1Mb from space cache, returned offset is: %llu\n",
+ test_err(
+ "failed to allocate 1Mb from space cache, returned offset is: %llu",
offset);
return -EINVAL;
}
@@ -610,7 +613,7 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache,
return ret;
if (cache->free_space_ctl->free_space != sectorsize) {
- test_msg("Cache free space is not %u\n", sectorsize);
+ test_err("cache free space is not %u", sectorsize);
return -EINVAL;
}
@@ -618,7 +621,7 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache,
0, sectorsize, 0,
&max_extent_size);
if (offset != (SZ_128M + SZ_16M)) {
- test_msg("Failed to allocate %u, returned offset : %llu\n",
+ test_err("failed to allocate %u, returned offset : %llu",
sectorsize, offset);
return -EINVAL;
}
@@ -640,14 +643,14 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache,
*/
ret = test_add_free_space_entry(cache, SZ_128M + SZ_128K, SZ_128K, 0);
if (ret) {
- test_msg("Couldn't add extent entry %d\n", ret);
+ test_err("couldn't add extent entry %d", ret);
return ret;
}
/* Bitmap entry covering free space range [0, 128Mb - 512Kb[ */
ret = test_add_free_space_entry(cache, 0, SZ_128M - SZ_512K, 1);
if (ret) {
- test_msg("Couldn't add bitmap entry %d\n", ret);
+ test_err("couldn't add bitmap entry %d", ret);
return ret;
}
@@ -664,17 +667,17 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache,
*/
ret = btrfs_remove_free_space(cache, 0, SZ_128M - 768 * SZ_1K);
if (ret) {
- test_msg("Failed to free part of bitmap space %d\n", ret);
+ test_err("failed to free part of bitmap space %d", ret);
return ret;
}
/* Confirm that only those 2 ranges are marked as free. */
if (!test_check_exists(cache, SZ_128M + SZ_128K, SZ_128K)) {
- test_msg("Free space range missing\n");
+ test_err("free space range missing");
return -ENOENT;
}
if (!test_check_exists(cache, SZ_128M - 768 * SZ_1K, SZ_256K)) {
- test_msg("Free space range missing\n");
+ test_err("free space range missing");
return -ENOENT;
}
@@ -683,7 +686,7 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache,
* as free anymore.
*/
if (test_check_exists(cache, 0, SZ_128M - 768 * SZ_1K)) {
- test_msg("Bitmap region not removed from space cache\n");
+ test_err("bitmap region not removed from space cache");
return -EINVAL;
}
@@ -692,7 +695,7 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache,
* covered by the bitmap, isn't marked as free.
*/
if (test_check_exists(cache, SZ_128M - SZ_512K, SZ_512K)) {
- test_msg("Invalid bitmap region marked as free\n");
+ test_err("invalid bitmap region marked as free");
return -EINVAL;
}
@@ -703,12 +706,12 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache,
*/
ret = btrfs_add_free_space(cache, SZ_128M - SZ_512K, SZ_512K);
if (ret) {
- test_msg("Error adding free space: %d\n", ret);
+ test_err("error adding free space: %d", ret);
return ret;
}
/* Confirm the region is marked as free. */
if (!test_check_exists(cache, SZ_128M - SZ_512K, SZ_512K)) {
- test_msg("Bitmap region not marked as free\n");
+ test_err("bitmap region not marked as free");
return -ENOENT;
}
@@ -728,7 +731,7 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache,
*/
ret = btrfs_add_free_space(cache, SZ_32M, 2 * sectorsize);
if (ret) {
- test_msg("Error adding free space: %d\n", ret);
+ test_err("error adding free space: %d", ret);
return ret;
}
@@ -739,12 +742,12 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache,
*/
ret = btrfs_add_free_space(cache, SZ_128M, SZ_128K);
if (ret) {
- test_msg("Error adding free space: %d\n", ret);
+ test_err("error adding free space: %d", ret);
return ret;
}
/* Confirm the region is marked as free. */
if (!test_check_exists(cache, SZ_128M, SZ_128K)) {
- test_msg("Extent region not marked as free\n");
+ test_err("extent region not marked as free");
return -ENOENT;
}
@@ -772,19 +775,20 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache,
* allocate the whole free space at once.
*/
if (!test_check_exists(cache, SZ_128M - 768 * SZ_1K, SZ_1M)) {
- test_msg("Expected region not marked as free\n");
+ test_err("expected region not marked as free");
return -ENOENT;
}
if (cache->free_space_ctl->free_space != (SZ_1M + 2 * sectorsize)) {
- test_msg("Cache free space is not 1Mb + %u\n", 2 * sectorsize);
+ test_err("cache free space is not 1Mb + %u", 2 * sectorsize);
return -EINVAL;
}
offset = btrfs_find_space_for_alloc(cache, 0, SZ_1M, 0,
&max_extent_size);
if (offset != (SZ_128M - 768 * SZ_1K)) {
- test_msg("Failed to allocate 1Mb from space cache, returned offset is: %llu\n",
+ test_err(
+ "failed to allocate 1Mb from space cache, returned offset is: %llu",
offset);
return -EINVAL;
}
@@ -798,7 +802,7 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache,
return ret;
if (cache->free_space_ctl->free_space != 2 * sectorsize) {
- test_msg("Cache free space is not %u\n", 2 * sectorsize);
+ test_err("cache free space is not %u", 2 * sectorsize);
return -EINVAL;
}
@@ -806,9 +810,8 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache,
0, 2 * sectorsize, 0,
&max_extent_size);
if (offset != SZ_32M) {
- test_msg("Failed to allocate %u, offset: %llu\n",
- 2 * sectorsize,
- offset);
+ test_err("failed to allocate %u, offset: %llu",
+ 2 * sectorsize, offset);
return -EINVAL;
}
@@ -829,7 +832,7 @@ int btrfs_test_free_space_cache(u32 sectorsize, u32 nodesize)
struct btrfs_root *root = NULL;
int ret = -ENOMEM;
- test_msg("Running btrfs free space cache tests\n");
+ test_msg("running btrfs free space cache tests");
fs_info = btrfs_alloc_dummy_fs_info(nodesize, sectorsize);
if (!fs_info)
return -ENOMEM;
@@ -843,7 +846,7 @@ int btrfs_test_free_space_cache(u32 sectorsize, u32 nodesize)
cache = btrfs_alloc_dummy_block_group(fs_info,
BITS_PER_BITMAP * sectorsize + PAGE_SIZE);
if (!cache) {
- test_msg("Couldn't run the tests\n");
+ test_err("couldn't run the tests");
btrfs_free_dummy_fs_info(fs_info);
return 0;
}
@@ -871,6 +874,6 @@ out:
btrfs_free_dummy_block_group(cache);
btrfs_free_dummy_root(root);
btrfs_free_dummy_fs_info(fs_info);
- test_msg("Free space cache tests finished\n");
+ test_msg("free space cache tests finished");
return ret;
}
diff --git a/fs/btrfs/tests/free-space-tree-tests.c b/fs/btrfs/tests/free-space-tree-tests.c
index e1f9666c4974..89346da890cf 100644
--- a/fs/btrfs/tests/free-space-tree-tests.c
+++ b/fs/btrfs/tests/free-space-tree-tests.c
@@ -32,7 +32,7 @@ static int __check_free_space_extents(struct btrfs_trans_handle *trans,
info = search_free_space_info(trans, fs_info, cache, path, 0);
if (IS_ERR(info)) {
- test_msg("Could not find free space info\n");
+ test_err("could not find free space info");
ret = PTR_ERR(info);
goto out;
}
@@ -40,7 +40,7 @@ static int __check_free_space_extents(struct btrfs_trans_handle *trans,
extent_count = btrfs_free_space_extent_count(path->nodes[0], info);
if (extent_count != num_extents) {
- test_msg("Extent count is wrong\n");
+ test_err("extent count is wrong");
ret = -EINVAL;
goto out;
}
@@ -99,7 +99,7 @@ out:
btrfs_release_path(path);
return ret;
invalid:
- test_msg("Free space tree is invalid\n");
+ test_err("free space tree is invalid");
ret = -EINVAL;
goto out;
}
@@ -117,7 +117,7 @@ static int check_free_space_extents(struct btrfs_trans_handle *trans,
info = search_free_space_info(trans, fs_info, cache, path, 0);
if (IS_ERR(info)) {
- test_msg("Could not find free space info\n");
+ test_err("could not find free space info");
btrfs_release_path(path);
return PTR_ERR(info);
}
@@ -131,15 +131,15 @@ static int check_free_space_extents(struct btrfs_trans_handle *trans,
/* Flip it to the other format and check that for good measure. */
if (flags & BTRFS_FREE_SPACE_USING_BITMAPS) {
- ret = convert_free_space_to_extents(trans, fs_info, cache, path);
+ ret = convert_free_space_to_extents(trans, cache, path);
if (ret) {
- test_msg("Could not convert to extents\n");
+ test_err("could not convert to extents");
return ret;
}
} else {
- ret = convert_free_space_to_bitmaps(trans, fs_info, cache, path);
+ ret = convert_free_space_to_bitmaps(trans, cache, path);
if (ret) {
- test_msg("Could not convert to bitmaps\n");
+ test_err("could not convert to bitmaps");
return ret;
}
}
@@ -170,11 +170,11 @@ static int test_remove_all(struct btrfs_trans_handle *trans,
const struct free_space_extent extents[] = {};
int ret;
- ret = __remove_from_free_space_tree(trans, fs_info, cache, path,
+ ret = __remove_from_free_space_tree(trans, cache, path,
cache->key.objectid,
cache->key.offset);
if (ret) {
- test_msg("Could not remove free space\n");
+ test_err("could not remove free space");
return ret;
}
@@ -194,10 +194,10 @@ static int test_remove_beginning(struct btrfs_trans_handle *trans,
};
int ret;
- ret = __remove_from_free_space_tree(trans, fs_info, cache, path,
+ ret = __remove_from_free_space_tree(trans, cache, path,
cache->key.objectid, alignment);
if (ret) {
- test_msg("Could not remove free space\n");
+ test_err("could not remove free space");
return ret;
}
@@ -217,12 +217,12 @@ static int test_remove_end(struct btrfs_trans_handle *trans,
};
int ret;
- ret = __remove_from_free_space_tree(trans, fs_info, cache, path,
+ ret = __remove_from_free_space_tree(trans, cache, path,
cache->key.objectid +
cache->key.offset - alignment,
alignment);
if (ret) {
- test_msg("Could not remove free space\n");
+ test_err("could not remove free space");
return ret;
}
@@ -243,11 +243,11 @@ static int test_remove_middle(struct btrfs_trans_handle *trans,
};
int ret;
- ret = __remove_from_free_space_tree(trans, fs_info, cache, path,
+ ret = __remove_from_free_space_tree(trans, cache, path,
cache->key.objectid + alignment,
alignment);
if (ret) {
- test_msg("Could not remove free space\n");
+ test_err("could not remove free space");
return ret;
}
@@ -266,26 +266,26 @@ static int test_merge_left(struct btrfs_trans_handle *trans,
};
int ret;
- ret = __remove_from_free_space_tree(trans, fs_info, cache, path,
+ ret = __remove_from_free_space_tree(trans, cache, path,
cache->key.objectid,
cache->key.offset);
if (ret) {
- test_msg("Could not remove free space\n");
+ test_err("could not remove free space");
return ret;
}
- ret = __add_to_free_space_tree(trans, fs_info, cache, path,
- cache->key.objectid, alignment);
+ ret = __add_to_free_space_tree(trans, cache, path, cache->key.objectid,
+ alignment);
if (ret) {
- test_msg("Could not add free space\n");
+ test_err("could not add free space");
return ret;
}
- ret = __add_to_free_space_tree(trans, fs_info, cache, path,
+ ret = __add_to_free_space_tree(trans, cache, path,
cache->key.objectid + alignment,
alignment);
if (ret) {
- test_msg("Could not add free space\n");
+ test_err("could not add free space");
return ret;
}
@@ -304,27 +304,27 @@ static int test_merge_right(struct btrfs_trans_handle *trans,
};
int ret;
- ret = __remove_from_free_space_tree(trans, fs_info, cache, path,
+ ret = __remove_from_free_space_tree(trans, cache, path,
cache->key.objectid,
cache->key.offset);
if (ret) {
- test_msg("Could not remove free space\n");
+ test_err("could not remove free space");
return ret;
}
- ret = __add_to_free_space_tree(trans, fs_info, cache, path,
+ ret = __add_to_free_space_tree(trans, cache, path,
cache->key.objectid + 2 * alignment,
alignment);
if (ret) {
- test_msg("Could not add free space\n");
+ test_err("could not add free space");
return ret;
}
- ret = __add_to_free_space_tree(trans, fs_info, cache, path,
+ ret = __add_to_free_space_tree(trans, cache, path,
cache->key.objectid + alignment,
alignment);
if (ret) {
- test_msg("Could not add free space\n");
+ test_err("could not add free space");
return ret;
}
@@ -343,34 +343,34 @@ static int test_merge_both(struct btrfs_trans_handle *trans,
};
int ret;
- ret = __remove_from_free_space_tree(trans, fs_info, cache, path,
+ ret = __remove_from_free_space_tree(trans, cache, path,
cache->key.objectid,
cache->key.offset);
if (ret) {
- test_msg("Could not remove free space\n");
+ test_err("could not remove free space");
return ret;
}
- ret = __add_to_free_space_tree(trans, fs_info, cache, path,
- cache->key.objectid, alignment);
+ ret = __add_to_free_space_tree(trans, cache, path, cache->key.objectid,
+ alignment);
if (ret) {
- test_msg("Could not add free space\n");
+ test_err("could not add free space");
return ret;
}
- ret = __add_to_free_space_tree(trans, fs_info, cache, path,
+ ret = __add_to_free_space_tree(trans, cache, path,
cache->key.objectid + 2 * alignment,
alignment);
if (ret) {
- test_msg("Could not add free space\n");
+ test_err("could not add free space");
return ret;
}
- ret = __add_to_free_space_tree(trans, fs_info, cache, path,
+ ret = __add_to_free_space_tree(trans, cache, path,
cache->key.objectid + alignment,
alignment);
if (ret) {
- test_msg("Could not add free space\n");
+ test_err("could not add free space");
return ret;
}
@@ -391,34 +391,34 @@ static int test_merge_none(struct btrfs_trans_handle *trans,
};
int ret;
- ret = __remove_from_free_space_tree(trans, fs_info, cache, path,
+ ret = __remove_from_free_space_tree(trans, cache, path,
cache->key.objectid,
cache->key.offset);
if (ret) {
- test_msg("Could not remove free space\n");
+ test_err("could not remove free space");
return ret;
}
- ret = __add_to_free_space_tree(trans, fs_info, cache, path,
- cache->key.objectid, alignment);
+ ret = __add_to_free_space_tree(trans, cache, path, cache->key.objectid,
+ alignment);
if (ret) {
- test_msg("Could not add free space\n");
+ test_err("could not add free space");
return ret;
}
- ret = __add_to_free_space_tree(trans, fs_info, cache, path,
+ ret = __add_to_free_space_tree(trans, cache, path,
cache->key.objectid + 4 * alignment,
alignment);
if (ret) {
- test_msg("Could not add free space\n");
+ test_err("could not add free space");
return ret;
}
- ret = __add_to_free_space_tree(trans, fs_info, cache, path,
+ ret = __add_to_free_space_tree(trans, cache, path,
cache->key.objectid + 2 * alignment,
alignment);
if (ret) {
- test_msg("Could not add free space\n");
+ test_err("could not add free space");
return ret;
}
@@ -444,14 +444,14 @@ static int run_test(test_func_t test_func, int bitmaps, u32 sectorsize,
fs_info = btrfs_alloc_dummy_fs_info(nodesize, sectorsize);
if (!fs_info) {
- test_msg("Couldn't allocate dummy fs info\n");
+ test_err("couldn't allocate dummy fs info");
ret = -ENOMEM;
goto out;
}
root = btrfs_alloc_dummy_root(fs_info);
if (IS_ERR(root)) {
- test_msg("Couldn't allocate dummy root\n");
+ test_err("couldn't allocate dummy root");
ret = PTR_ERR(root);
goto out;
}
@@ -463,7 +463,7 @@ static int run_test(test_func_t test_func, int bitmaps, u32 sectorsize,
root->node = alloc_test_extent_buffer(root->fs_info, nodesize);
if (!root->node) {
- test_msg("Couldn't allocate dummy buffer\n");
+ test_err("couldn't allocate dummy buffer");
ret = -ENOMEM;
goto out;
}
@@ -473,7 +473,7 @@ static int run_test(test_func_t test_func, int bitmaps, u32 sectorsize,
cache = btrfs_alloc_dummy_block_group(fs_info, 8 * alignment);
if (!cache) {
- test_msg("Couldn't allocate dummy block group cache\n");
+ test_err("couldn't allocate dummy block group cache");
ret = -ENOMEM;
goto out;
}
@@ -482,26 +482,25 @@ static int run_test(test_func_t test_func, int bitmaps, u32 sectorsize,
cache->needs_free_space = 1;
cache->fs_info = root->fs_info;
- btrfs_init_dummy_trans(&trans);
+ btrfs_init_dummy_trans(&trans, root->fs_info);
path = btrfs_alloc_path();
if (!path) {
- test_msg("Couldn't allocate path\n");
+ test_err("couldn't allocate path");
ret = -ENOMEM;
goto out;
}
- ret = add_block_group_free_space(&trans, root->fs_info, cache);
+ ret = add_block_group_free_space(&trans, cache);
if (ret) {
- test_msg("Could not add block group free space\n");
+ test_err("could not add block group free space");
goto out;
}
if (bitmaps) {
- ret = convert_free_space_to_bitmaps(&trans, root->fs_info,
- cache, path);
+ ret = convert_free_space_to_bitmaps(&trans, cache, path);
if (ret) {
- test_msg("Could not convert block group to bitmaps\n");
+ test_err("could not convert block group to bitmaps");
goto out;
}
}
@@ -510,14 +509,14 @@ static int run_test(test_func_t test_func, int bitmaps, u32 sectorsize,
if (ret)
goto out;
- ret = remove_block_group_free_space(&trans, root->fs_info, cache);
+ ret = remove_block_group_free_space(&trans, cache);
if (ret) {
- test_msg("Could not remove block group free space\n");
+ test_err("could not remove block group free space");
goto out;
}
if (btrfs_header_nritems(root->node) != 0) {
- test_msg("Free space tree has leftover items\n");
+ test_err("free space tree has leftover items");
ret = -EINVAL;
goto out;
}
@@ -539,14 +538,16 @@ static int run_test_both_formats(test_func_t test_func, u32 sectorsize,
ret = run_test(test_func, 0, sectorsize, nodesize, alignment);
if (ret) {
- test_msg("%pf failed with extents, sectorsize=%u, nodesize=%u, alignment=%u\n",
+ test_err(
+ "%pf failed with extents, sectorsize=%u, nodesize=%u, alignment=%u",
test_func, sectorsize, nodesize, alignment);
test_ret = ret;
}
ret = run_test(test_func, 1, sectorsize, nodesize, alignment);
if (ret) {
- test_msg("%pf failed with bitmaps, sectorsize=%u, nodesize=%u, alignment=%u\n",
+ test_err(
+ "%pf failed with bitmaps, sectorsize=%u, nodesize=%u, alignment=%u",
test_func, sectorsize, nodesize, alignment);
test_ret = ret;
}
@@ -577,7 +578,7 @@ int btrfs_test_free_space_tree(u32 sectorsize, u32 nodesize)
*/
bitmap_alignment = BTRFS_FREE_SPACE_BITMAP_BITS * PAGE_SIZE;
- test_msg("Running free space tree tests\n");
+ test_msg("running free space tree tests");
for (i = 0; i < ARRAY_SIZE(tests); i++) {
int ret;
diff --git a/fs/btrfs/tests/inode-tests.c b/fs/btrfs/tests/inode-tests.c
index e0ba799536b4..64043f028820 100644
--- a/fs/btrfs/tests/inode-tests.c
+++ b/fs/btrfs/tests/inode-tests.c
@@ -228,7 +228,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
inode = btrfs_new_test_inode();
if (!inode) {
- test_msg("Couldn't allocate inode\n");
+ test_err("couldn't allocate inode");
return ret;
}
@@ -238,19 +238,19 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
fs_info = btrfs_alloc_dummy_fs_info(nodesize, sectorsize);
if (!fs_info) {
- test_msg("Couldn't allocate dummy fs info\n");
+ test_err("couldn't allocate dummy fs info");
goto out;
}
root = btrfs_alloc_dummy_root(fs_info);
if (IS_ERR(root)) {
- test_msg("Couldn't allocate root\n");
+ test_err("couldn't allocate root");
goto out;
}
root->node = alloc_dummy_extent_buffer(fs_info, nodesize);
if (!root->node) {
- test_msg("Couldn't allocate dummy buffer\n");
+ test_err("couldn't allocate dummy buffer");
goto out;
}
@@ -268,11 +268,11 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, 0, sectorsize, 0);
if (IS_ERR(em)) {
em = NULL;
- test_msg("Got an error when we shouldn't have\n");
+ test_err("got an error when we shouldn't have");
goto out;
}
if (em->block_start != EXTENT_MAP_HOLE) {
- test_msg("Expected a hole, got %llu\n", em->block_start);
+ test_err("expected a hole, got %llu", em->block_start);
goto out;
}
free_extent_map(em);
@@ -287,20 +287,21 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, 0, (u64)-1, 0);
if (IS_ERR(em)) {
- test_msg("Got an error when we shouldn't have\n");
+ test_err("got an error when we shouldn't have");
goto out;
}
if (em->block_start != EXTENT_MAP_HOLE) {
- test_msg("Expected a hole, got %llu\n", em->block_start);
+ test_err("expected a hole, got %llu", em->block_start);
goto out;
}
if (em->start != 0 || em->len != 5) {
- test_msg("Unexpected extent wanted start 0 len 5, got start "
- "%llu len %llu\n", em->start, em->len);
+ test_err(
+ "unexpected extent wanted start 0 len 5, got start %llu len %llu",
+ em->start, em->len);
goto out;
}
if (em->flags != 0) {
- test_msg("Unexpected flags set, want 0 have %lu\n", em->flags);
+ test_err("unexpected flags set, want 0 have %lu", em->flags);
goto out;
}
offset = em->start + em->len;
@@ -308,21 +309,22 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0);
if (IS_ERR(em)) {
- test_msg("Got an error when we shouldn't have\n");
+ test_err("got an error when we shouldn't have");
goto out;
}
if (em->block_start != EXTENT_MAP_INLINE) {
- test_msg("Expected an inline, got %llu\n", em->block_start);
+ test_err("expected an inline, got %llu", em->block_start);
goto out;
}
if (em->start != offset || em->len != (sectorsize - 5)) {
- test_msg("Unexpected extent wanted start %llu len 1, got start "
- "%llu len %llu\n", offset, em->start, em->len);
+ test_err(
+ "unexpected extent wanted start %llu len 1, got start %llu len %llu",
+ offset, em->start, em->len);
goto out;
}
if (em->flags != 0) {
- test_msg("Unexpected flags set, want 0 have %lu\n", em->flags);
+ test_err("unexpected flags set, want 0 have %lu", em->flags);
goto out;
}
/*
@@ -335,20 +337,21 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0);
if (IS_ERR(em)) {
- test_msg("Got an error when we shouldn't have\n");
+ test_err("got an error when we shouldn't have");
goto out;
}
if (em->block_start != EXTENT_MAP_HOLE) {
- test_msg("Expected a hole, got %llu\n", em->block_start);
+ test_err("expected a hole, got %llu", em->block_start);
goto out;
}
if (em->start != offset || em->len != 4) {
- test_msg("Unexpected extent wanted start %llu len 4, got start "
- "%llu len %llu\n", offset, em->start, em->len);
+ test_err(
+ "unexpected extent wanted start %llu len 4, got start %llu len %llu",
+ offset, em->start, em->len);
goto out;
}
if (em->flags != 0) {
- test_msg("Unexpected flags set, want 0 have %lu\n", em->flags);
+ test_err("unexpected flags set, want 0 have %lu", em->flags);
goto out;
}
offset = em->start + em->len;
@@ -357,24 +360,25 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
/* Regular extent */
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0);
if (IS_ERR(em)) {
- test_msg("Got an error when we shouldn't have\n");
+ test_err("got an error when we shouldn't have");
goto out;
}
if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
- test_msg("Expected a real extent, got %llu\n", em->block_start);
+ test_err("expected a real extent, got %llu", em->block_start);
goto out;
}
if (em->start != offset || em->len != sectorsize - 1) {
- test_msg("Unexpected extent wanted start %llu len 4095, got "
- "start %llu len %llu\n", offset, em->start, em->len);
+ test_err(
+ "unexpected extent wanted start %llu len 4095, got start %llu len %llu",
+ offset, em->start, em->len);
goto out;
}
if (em->flags != 0) {
- test_msg("Unexpected flags set, want 0 have %lu\n", em->flags);
+ test_err("unexpected flags set, want 0 have %lu", em->flags);
goto out;
}
if (em->orig_start != em->start) {
- test_msg("Wrong orig offset, want %llu, have %llu\n", em->start,
+ test_err("wrong orig offset, want %llu, have %llu", em->start,
em->orig_start);
goto out;
}
@@ -384,25 +388,25 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
/* The next 3 are split extents */
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0);
if (IS_ERR(em)) {
- test_msg("Got an error when we shouldn't have\n");
+ test_err("got an error when we shouldn't have");
goto out;
}
if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
- test_msg("Expected a real extent, got %llu\n", em->block_start);
+ test_err("expected a real extent, got %llu", em->block_start);
goto out;
}
if (em->start != offset || em->len != sectorsize) {
- test_msg("Unexpected extent start %llu len %u, "
- "got start %llu len %llu\n",
+ test_err(
+ "unexpected extent start %llu len %u, got start %llu len %llu",
offset, sectorsize, em->start, em->len);
goto out;
}
if (em->flags != 0) {
- test_msg("Unexpected flags set, want 0 have %lu\n", em->flags);
+ test_err("unexpected flags set, want 0 have %lu", em->flags);
goto out;
}
if (em->orig_start != em->start) {
- test_msg("Wrong orig offset, want %llu, have %llu\n", em->start,
+ test_err("wrong orig offset, want %llu, have %llu", em->start,
em->orig_start);
goto out;
}
@@ -413,21 +417,21 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0);
if (IS_ERR(em)) {
- test_msg("Got an error when we shouldn't have\n");
+ test_err("got an error when we shouldn't have");
goto out;
}
if (em->block_start != EXTENT_MAP_HOLE) {
- test_msg("Expected a hole, got %llu\n", em->block_start);
+ test_err("expected a hole, got %llu", em->block_start);
goto out;
}
if (em->start != offset || em->len != sectorsize) {
- test_msg("Unexpected extent wanted start %llu len %u, "
- "got start %llu len %llu\n",
+ test_err(
+ "unexpected extent wanted start %llu len %u, got start %llu len %llu",
offset, sectorsize, em->start, em->len);
goto out;
}
if (em->flags != 0) {
- test_msg("Unexpected flags set, want 0 have %lu\n", em->flags);
+ test_err("unexpected flags set, want 0 have %lu", em->flags);
goto out;
}
offset = em->start + em->len;
@@ -435,31 +439,31 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0);
if (IS_ERR(em)) {
- test_msg("Got an error when we shouldn't have\n");
+ test_err("got an error when we shouldn't have");
goto out;
}
if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
- test_msg("Expected a real extent, got %llu\n", em->block_start);
+ test_err("expected a real extent, got %llu", em->block_start);
goto out;
}
if (em->start != offset || em->len != 2 * sectorsize) {
- test_msg("Unexpected extent wanted start %llu len %u, "
- "got start %llu len %llu\n",
+ test_err(
+ "unexpected extent wanted start %llu len %u, got start %llu len %llu",
offset, 2 * sectorsize, em->start, em->len);
goto out;
}
if (em->flags != 0) {
- test_msg("Unexpected flags set, want 0 have %lu\n", em->flags);
+ test_err("unexpected flags set, want 0 have %lu", em->flags);
goto out;
}
if (em->orig_start != orig_start) {
- test_msg("Wrong orig offset, want %llu, have %llu\n",
+ test_err("wrong orig offset, want %llu, have %llu",
orig_start, em->orig_start);
goto out;
}
disk_bytenr += (em->start - orig_start);
if (em->block_start != disk_bytenr) {
- test_msg("Wrong block start, want %llu, have %llu\n",
+ test_err("wrong block start, want %llu, have %llu",
disk_bytenr, em->block_start);
goto out;
}
@@ -469,26 +473,26 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
/* Prealloc extent */
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0);
if (IS_ERR(em)) {
- test_msg("Got an error when we shouldn't have\n");
+ test_err("got an error when we shouldn't have");
goto out;
}
if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
- test_msg("Expected a real extent, got %llu\n", em->block_start);
+ test_err("expected a real extent, got %llu", em->block_start);
goto out;
}
if (em->start != offset || em->len != sectorsize) {
- test_msg("Unexpected extent wanted start %llu len %u, "
- "got start %llu len %llu\n",
+ test_err(
+ "unexpected extent wanted start %llu len %u, got start %llu len %llu",
offset, sectorsize, em->start, em->len);
goto out;
}
if (em->flags != prealloc_only) {
- test_msg("Unexpected flags set, want %lu have %lu\n",
+ test_err("unexpected flags set, want %lu have %lu",
prealloc_only, em->flags);
goto out;
}
if (em->orig_start != em->start) {
- test_msg("Wrong orig offset, want %llu, have %llu\n", em->start,
+ test_err("wrong orig offset, want %llu, have %llu", em->start,
em->orig_start);
goto out;
}
@@ -498,26 +502,26 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
/* The next 3 are a half written prealloc extent */
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0);
if (IS_ERR(em)) {
- test_msg("Got an error when we shouldn't have\n");
+ test_err("got an error when we shouldn't have");
goto out;
}
if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
- test_msg("Expected a real extent, got %llu\n", em->block_start);
+ test_err("expected a real extent, got %llu", em->block_start);
goto out;
}
if (em->start != offset || em->len != sectorsize) {
- test_msg("Unexpected extent wanted start %llu len %u, "
- "got start %llu len %llu\n",
+ test_err(
+ "unexpected extent wanted start %llu len %u, got start %llu len %llu",
offset, sectorsize, em->start, em->len);
goto out;
}
if (em->flags != prealloc_only) {
- test_msg("Unexpected flags set, want %lu have %lu\n",
+ test_err("unexpected flags set, want %lu have %lu",
prealloc_only, em->flags);
goto out;
}
if (em->orig_start != em->start) {
- test_msg("Wrong orig offset, want %llu, have %llu\n", em->start,
+ test_err("wrong orig offset, want %llu, have %llu", em->start,
em->orig_start);
goto out;
}
@@ -528,30 +532,30 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0);
if (IS_ERR(em)) {
- test_msg("Got an error when we shouldn't have\n");
+ test_err("got an error when we shouldn't have");
goto out;
}
if (em->block_start >= EXTENT_MAP_HOLE) {
- test_msg("Expected a real extent, got %llu\n", em->block_start);
+ test_err("expected a real extent, got %llu", em->block_start);
goto out;
}
if (em->start != offset || em->len != sectorsize) {
- test_msg("Unexpected extent wanted start %llu len %u, "
- "got start %llu len %llu\n",
+ test_err(
+ "unexpected extent wanted start %llu len %u, got start %llu len %llu",
offset, sectorsize, em->start, em->len);
goto out;
}
if (em->flags != 0) {
- test_msg("Unexpected flags set, want 0 have %lu\n", em->flags);
+ test_err("unexpected flags set, want 0 have %lu", em->flags);
goto out;
}
if (em->orig_start != orig_start) {
- test_msg("Unexpected orig offset, wanted %llu, have %llu\n",
+ test_err("unexpected orig offset, wanted %llu, have %llu",
orig_start, em->orig_start);
goto out;
}
if (em->block_start != (disk_bytenr + (em->start - em->orig_start))) {
- test_msg("Unexpected block start, wanted %llu, have %llu\n",
+ test_err("unexpected block start, wanted %llu, have %llu",
disk_bytenr + (em->start - em->orig_start),
em->block_start);
goto out;
@@ -561,31 +565,31 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0);
if (IS_ERR(em)) {
- test_msg("Got an error when we shouldn't have\n");
+ test_err("got an error when we shouldn't have");
goto out;
}
if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
- test_msg("Expected a real extent, got %llu\n", em->block_start);
+ test_err("expected a real extent, got %llu", em->block_start);
goto out;
}
if (em->start != offset || em->len != 2 * sectorsize) {
- test_msg("Unexpected extent wanted start %llu len %u, "
- "got start %llu len %llu\n",
+ test_err(
+ "unexpected extent wanted start %llu len %u, got start %llu len %llu",
offset, 2 * sectorsize, em->start, em->len);
goto out;
}
if (em->flags != prealloc_only) {
- test_msg("Unexpected flags set, want %lu have %lu\n",
+ test_err("unexpected flags set, want %lu have %lu",
prealloc_only, em->flags);
goto out;
}
if (em->orig_start != orig_start) {
- test_msg("Wrong orig offset, want %llu, have %llu\n", orig_start,
+ test_err("wrong orig offset, want %llu, have %llu", orig_start,
em->orig_start);
goto out;
}
if (em->block_start != (disk_bytenr + (em->start - em->orig_start))) {
- test_msg("Unexpected block start, wanted %llu, have %llu\n",
+ test_err("unexpected block start, wanted %llu, have %llu",
disk_bytenr + (em->start - em->orig_start),
em->block_start);
goto out;
@@ -596,31 +600,31 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
/* Now for the compressed extent */
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0);
if (IS_ERR(em)) {
- test_msg("Got an error when we shouldn't have\n");
+ test_err("got an error when we shouldn't have");
goto out;
}
if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
- test_msg("Expected a real extent, got %llu\n", em->block_start);
+ test_err("expected a real extent, got %llu", em->block_start);
goto out;
}
if (em->start != offset || em->len != 2 * sectorsize) {
- test_msg("Unexpected extent wanted start %llu len %u,"
- "got start %llu len %llu\n",
+ test_err(
+ "unexpected extent wanted start %llu len %u, got start %llu len %llu",
offset, 2 * sectorsize, em->start, em->len);
goto out;
}
if (em->flags != compressed_only) {
- test_msg("Unexpected flags set, want %lu have %lu\n",
+ test_err("unexpected flags set, want %lu have %lu",
compressed_only, em->flags);
goto out;
}
if (em->orig_start != em->start) {
- test_msg("Wrong orig offset, want %llu, have %llu\n",
+ test_err("wrong orig offset, want %llu, have %llu",
em->start, em->orig_start);
goto out;
}
if (em->compress_type != BTRFS_COMPRESS_ZLIB) {
- test_msg("Unexpected compress type, wanted %d, got %d\n",
+ test_err("unexpected compress type, wanted %d, got %d",
BTRFS_COMPRESS_ZLIB, em->compress_type);
goto out;
}
@@ -630,31 +634,31 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
/* Split compressed extent */
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0);
if (IS_ERR(em)) {
- test_msg("Got an error when we shouldn't have\n");
+ test_err("got an error when we shouldn't have");
goto out;
}
if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
- test_msg("Expected a real extent, got %llu\n", em->block_start);
+ test_err("expected a real extent, got %llu", em->block_start);
goto out;
}
if (em->start != offset || em->len != sectorsize) {
- test_msg("Unexpected extent wanted start %llu len %u,"
- "got start %llu len %llu\n",
+ test_err(
+ "unexpected extent wanted start %llu len %u, got start %llu len %llu",
offset, sectorsize, em->start, em->len);
goto out;
}
if (em->flags != compressed_only) {
- test_msg("Unexpected flags set, want %lu have %lu\n",
+ test_err("unexpected flags set, want %lu have %lu",
compressed_only, em->flags);
goto out;
}
if (em->orig_start != em->start) {
- test_msg("Wrong orig offset, want %llu, have %llu\n",
+ test_err("wrong orig offset, want %llu, have %llu",
em->start, em->orig_start);
goto out;
}
if (em->compress_type != BTRFS_COMPRESS_ZLIB) {
- test_msg("Unexpected compress type, wanted %d, got %d\n",
+ test_err("unexpected compress type, wanted %d, got %d",
BTRFS_COMPRESS_ZLIB, em->compress_type);
goto out;
}
@@ -665,25 +669,25 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0);
if (IS_ERR(em)) {
- test_msg("Got an error when we shouldn't have\n");
+ test_err("got an error when we shouldn't have");
goto out;
}
if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
- test_msg("Expected a real extent, got %llu\n", em->block_start);
+ test_err("expected a real extent, got %llu", em->block_start);
goto out;
}
if (em->start != offset || em->len != sectorsize) {
- test_msg("Unexpected extent wanted start %llu len %u, "
- "got start %llu len %llu\n",
+ test_err(
+ "unexpected extent wanted start %llu len %u, got start %llu len %llu",
offset, sectorsize, em->start, em->len);
goto out;
}
if (em->flags != 0) {
- test_msg("Unexpected flags set, want 0 have %lu\n", em->flags);
+ test_err("unexpected flags set, want 0 have %lu", em->flags);
goto out;
}
if (em->orig_start != em->start) {
- test_msg("Wrong orig offset, want %llu, have %llu\n", em->start,
+ test_err("wrong orig offset, want %llu, have %llu", em->start,
em->orig_start);
goto out;
}
@@ -692,32 +696,32 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0);
if (IS_ERR(em)) {
- test_msg("Got an error when we shouldn't have\n");
+ test_err("got an error when we shouldn't have");
goto out;
}
if (em->block_start != disk_bytenr) {
- test_msg("Block start does not match, want %llu got %llu\n",
+ test_err("block start does not match, want %llu got %llu",
disk_bytenr, em->block_start);
goto out;
}
if (em->start != offset || em->len != 2 * sectorsize) {
- test_msg("Unexpected extent wanted start %llu len %u, "
- "got start %llu len %llu\n",
+ test_err(
+ "unexpected extent wanted start %llu len %u, got start %llu len %llu",
offset, 2 * sectorsize, em->start, em->len);
goto out;
}
if (em->flags != compressed_only) {
- test_msg("Unexpected flags set, want %lu have %lu\n",
+ test_err("unexpected flags set, want %lu have %lu",
compressed_only, em->flags);
goto out;
}
if (em->orig_start != orig_start) {
- test_msg("Wrong orig offset, want %llu, have %llu\n",
+ test_err("wrong orig offset, want %llu, have %llu",
em->start, orig_start);
goto out;
}
if (em->compress_type != BTRFS_COMPRESS_ZLIB) {
- test_msg("Unexpected compress type, wanted %d, got %d\n",
+ test_err("unexpected compress type, wanted %d, got %d",
BTRFS_COMPRESS_ZLIB, em->compress_type);
goto out;
}
@@ -728,25 +732,25 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset + 6,
sectorsize, 0);
if (IS_ERR(em)) {
- test_msg("Got an error when we shouldn't have\n");
+ test_err("got an error when we shouldn't have");
goto out;
}
if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
- test_msg("Expected a real extent, got %llu\n", em->block_start);
+ test_err("expected a real extent, got %llu", em->block_start);
goto out;
}
if (em->start != offset || em->len != sectorsize) {
- test_msg("Unexpected extent wanted start %llu len %u, "
- "got start %llu len %llu\n",
+ test_err(
+ "unexpected extent wanted start %llu len %u, got start %llu len %llu",
offset, sectorsize, em->start, em->len);
goto out;
}
if (em->flags != 0) {
- test_msg("Unexpected flags set, want 0 have %lu\n", em->flags);
+ test_err("unexpected flags set, want 0 have %lu", em->flags);
goto out;
}
if (em->orig_start != em->start) {
- test_msg("Wrong orig offset, want %llu, have %llu\n", em->start,
+ test_err("wrong orig offset, want %llu, have %llu", em->start,
em->orig_start);
goto out;
}
@@ -755,11 +759,11 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, SZ_4M, 0);
if (IS_ERR(em)) {
- test_msg("Got an error when we shouldn't have\n");
+ test_err("got an error when we shouldn't have");
goto out;
}
if (em->block_start != EXTENT_MAP_HOLE) {
- test_msg("Expected a hole extent, got %llu\n", em->block_start);
+ test_err("expected a hole extent, got %llu", em->block_start);
goto out;
}
/*
@@ -768,18 +772,18 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
* test.
*/
if (em->start != offset || em->len != 3 * sectorsize) {
- test_msg("Unexpected extent wanted start %llu len %u, "
- "got start %llu len %llu\n",
+ test_err(
+ "unexpected extent wanted start %llu len %u, got start %llu len %llu",
offset, 3 * sectorsize, em->start, em->len);
goto out;
}
if (em->flags != vacancy_only) {
- test_msg("Unexpected flags set, want %lu have %lu\n",
+ test_err("unexpected flags set, want %lu have %lu",
vacancy_only, em->flags);
goto out;
}
if (em->orig_start != em->start) {
- test_msg("Wrong orig offset, want %llu, have %llu\n", em->start,
+ test_err("wrong orig offset, want %llu, have %llu", em->start,
em->orig_start);
goto out;
}
@@ -788,25 +792,25 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0);
if (IS_ERR(em)) {
- test_msg("Got an error when we shouldn't have\n");
+ test_err("got an error when we shouldn't have");
goto out;
}
if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
- test_msg("Expected a real extent, got %llu\n", em->block_start);
+ test_err("expected a real extent, got %llu", em->block_start);
goto out;
}
if (em->start != offset || em->len != sectorsize) {
- test_msg("Unexpected extent wanted start %llu len %u,"
- "got start %llu len %llu\n",
+ test_err(
+ "unexpected extent wanted start %llu len %u, got start %llu len %llu",
offset, sectorsize, em->start, em->len);
goto out;
}
if (em->flags != 0) {
- test_msg("Unexpected flags set, want 0 have %lu\n", em->flags);
+ test_err("unexpected flags set, want 0 have %lu", em->flags);
goto out;
}
if (em->orig_start != em->start) {
- test_msg("Wrong orig offset, want %llu, have %llu\n", em->start,
+ test_err("wrong orig offset, want %llu, have %llu", em->start,
em->orig_start);
goto out;
}
@@ -830,7 +834,7 @@ static int test_hole_first(u32 sectorsize, u32 nodesize)
inode = btrfs_new_test_inode();
if (!inode) {
- test_msg("Couldn't allocate inode\n");
+ test_err("couldn't allocate inode");
return ret;
}
@@ -840,19 +844,19 @@ static int test_hole_first(u32 sectorsize, u32 nodesize)
fs_info = btrfs_alloc_dummy_fs_info(nodesize, sectorsize);
if (!fs_info) {
- test_msg("Couldn't allocate dummy fs info\n");
+ test_err("couldn't allocate dummy fs info");
goto out;
}
root = btrfs_alloc_dummy_root(fs_info);
if (IS_ERR(root)) {
- test_msg("Couldn't allocate root\n");
+ test_err("couldn't allocate root");
goto out;
}
root->node = alloc_dummy_extent_buffer(fs_info, nodesize);
if (!root->node) {
- test_msg("Couldn't allocate dummy buffer\n");
+ test_err("couldn't allocate dummy buffer");
goto out;
}
@@ -871,21 +875,21 @@ static int test_hole_first(u32 sectorsize, u32 nodesize)
sectorsize, BTRFS_FILE_EXTENT_REG, 0, 1);
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, 0, 2 * sectorsize, 0);
if (IS_ERR(em)) {
- test_msg("Got an error when we shouldn't have\n");
+ test_err("got an error when we shouldn't have");
goto out;
}
if (em->block_start != EXTENT_MAP_HOLE) {
- test_msg("Expected a hole, got %llu\n", em->block_start);
+ test_err("expected a hole, got %llu", em->block_start);
goto out;
}
if (em->start != 0 || em->len != sectorsize) {
- test_msg("Unexpected extent wanted start 0 len %u, "
- "got start %llu len %llu\n",
+ test_err(
+ "unexpected extent wanted start 0 len %u, got start %llu len %llu",
sectorsize, em->start, em->len);
goto out;
}
if (em->flags != vacancy_only) {
- test_msg("Wrong flags, wanted %lu, have %lu\n", vacancy_only,
+ test_err("wrong flags, wanted %lu, have %lu", vacancy_only,
em->flags);
goto out;
}
@@ -894,21 +898,21 @@ static int test_hole_first(u32 sectorsize, u32 nodesize)
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, sectorsize,
2 * sectorsize, 0);
if (IS_ERR(em)) {
- test_msg("Got an error when we shouldn't have\n");
+ test_err("got an error when we shouldn't have");
goto out;
}
if (em->block_start != sectorsize) {
- test_msg("Expected a real extent, got %llu\n", em->block_start);
+ test_err("expected a real extent, got %llu", em->block_start);
goto out;
}
if (em->start != sectorsize || em->len != sectorsize) {
- test_msg("Unexpected extent wanted start %u len %u, "
- "got start %llu len %llu\n",
+ test_err(
+ "unexpected extent wanted start %u len %u, got start %llu len %llu",
sectorsize, sectorsize, em->start, em->len);
goto out;
}
if (em->flags != 0) {
- test_msg("Unexpected flags set, wanted 0 got %lu\n",
+ test_err("unexpected flags set, wanted 0 got %lu",
em->flags);
goto out;
}
@@ -931,19 +935,19 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize)
inode = btrfs_new_test_inode();
if (!inode) {
- test_msg("Couldn't allocate inode\n");
+ test_err("couldn't allocate inode");
return ret;
}
fs_info = btrfs_alloc_dummy_fs_info(nodesize, sectorsize);
if (!fs_info) {
- test_msg("Couldn't allocate dummy fs info\n");
+ test_err("couldn't allocate dummy fs info");
goto out;
}
root = btrfs_alloc_dummy_root(fs_info);
if (IS_ERR(root)) {
- test_msg("Couldn't allocate root\n");
+ test_err("couldn't allocate root");
goto out;
}
@@ -954,12 +958,12 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize)
ret = btrfs_set_extent_delalloc(inode, 0, BTRFS_MAX_EXTENT_SIZE - 1, 0,
NULL, 0);
if (ret) {
- test_msg("btrfs_set_extent_delalloc returned %d\n", ret);
+ test_err("btrfs_set_extent_delalloc returned %d", ret);
goto out;
}
if (BTRFS_I(inode)->outstanding_extents != 1) {
ret = -EINVAL;
- test_msg("Miscount, wanted 1, got %u\n",
+ test_err("miscount, wanted 1, got %u",
BTRFS_I(inode)->outstanding_extents);
goto out;
}
@@ -969,12 +973,12 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize)
BTRFS_MAX_EXTENT_SIZE + sectorsize - 1,
0, NULL, 0);
if (ret) {
- test_msg("btrfs_set_extent_delalloc returned %d\n", ret);
+ test_err("btrfs_set_extent_delalloc returned %d", ret);
goto out;
}
if (BTRFS_I(inode)->outstanding_extents != 2) {
ret = -EINVAL;
- test_msg("Miscount, wanted 2, got %u\n",
+ test_err("miscount, wanted 2, got %u",
BTRFS_I(inode)->outstanding_extents);
goto out;
}
@@ -986,12 +990,12 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize)
EXTENT_DELALLOC | EXTENT_DIRTY |
EXTENT_UPTODATE, 0, 0, NULL);
if (ret) {
- test_msg("clear_extent_bit returned %d\n", ret);
+ test_err("clear_extent_bit returned %d", ret);
goto out;
}
if (BTRFS_I(inode)->outstanding_extents != 2) {
ret = -EINVAL;
- test_msg("Miscount, wanted 2, got %u\n",
+ test_err("miscount, wanted 2, got %u",
BTRFS_I(inode)->outstanding_extents);
goto out;
}
@@ -1002,12 +1006,12 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize)
+ sectorsize - 1,
0, NULL, 0);
if (ret) {
- test_msg("btrfs_set_extent_delalloc returned %d\n", ret);
+ test_err("btrfs_set_extent_delalloc returned %d", ret);
goto out;
}
if (BTRFS_I(inode)->outstanding_extents != 2) {
ret = -EINVAL;
- test_msg("Miscount, wanted 2, got %u\n",
+ test_err("miscount, wanted 2, got %u",
BTRFS_I(inode)->outstanding_extents);
goto out;
}
@@ -1020,12 +1024,12 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize)
(BTRFS_MAX_EXTENT_SIZE << 1) + 3 * sectorsize - 1,
0, NULL, 0);
if (ret) {
- test_msg("btrfs_set_extent_delalloc returned %d\n", ret);
+ test_err("btrfs_set_extent_delalloc returned %d", ret);
goto out;
}
if (BTRFS_I(inode)->outstanding_extents != 4) {
ret = -EINVAL;
- test_msg("Miscount, wanted 4, got %u\n",
+ test_err("miscount, wanted 4, got %u",
BTRFS_I(inode)->outstanding_extents);
goto out;
}
@@ -1037,12 +1041,12 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize)
BTRFS_MAX_EXTENT_SIZE + sectorsize,
BTRFS_MAX_EXTENT_SIZE + 2 * sectorsize - 1, 0, NULL, 0);
if (ret) {
- test_msg("btrfs_set_extent_delalloc returned %d\n", ret);
+ test_err("btrfs_set_extent_delalloc returned %d", ret);
goto out;
}
if (BTRFS_I(inode)->outstanding_extents != 3) {
ret = -EINVAL;
- test_msg("Miscount, wanted 3, got %u\n",
+ test_err("miscount, wanted 3, got %u",
BTRFS_I(inode)->outstanding_extents);
goto out;
}
@@ -1054,12 +1058,12 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize)
EXTENT_DIRTY | EXTENT_DELALLOC |
EXTENT_UPTODATE, 0, 0, NULL);
if (ret) {
- test_msg("clear_extent_bit returned %d\n", ret);
+ test_err("clear_extent_bit returned %d", ret);
goto out;
}
if (BTRFS_I(inode)->outstanding_extents != 4) {
ret = -EINVAL;
- test_msg("Miscount, wanted 4, got %u\n",
+ test_err("miscount, wanted 4, got %u",
BTRFS_I(inode)->outstanding_extents);
goto out;
}
@@ -1072,12 +1076,12 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize)
BTRFS_MAX_EXTENT_SIZE + sectorsize,
BTRFS_MAX_EXTENT_SIZE + 2 * sectorsize - 1, 0, NULL, 0);
if (ret) {
- test_msg("btrfs_set_extent_delalloc returned %d\n", ret);
+ test_err("btrfs_set_extent_delalloc returned %d", ret);
goto out;
}
if (BTRFS_I(inode)->outstanding_extents != 3) {
ret = -EINVAL;
- test_msg("Miscount, wanted 3, got %u\n",
+ test_err("miscount, wanted 3, got %u",
BTRFS_I(inode)->outstanding_extents);
goto out;
}
@@ -1087,12 +1091,12 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize)
EXTENT_DIRTY | EXTENT_DELALLOC |
EXTENT_UPTODATE, 0, 0, NULL);
if (ret) {
- test_msg("clear_extent_bit returned %d\n", ret);
+ test_err("clear_extent_bit returned %d", ret);
goto out;
}
if (BTRFS_I(inode)->outstanding_extents) {
ret = -EINVAL;
- test_msg("Miscount, wanted 0, got %u\n",
+ test_err("miscount, wanted 0, got %u",
BTRFS_I(inode)->outstanding_extents);
goto out;
}
@@ -1115,14 +1119,14 @@ int btrfs_test_inodes(u32 sectorsize, u32 nodesize)
set_bit(EXTENT_FLAG_COMPRESSED, &compressed_only);
set_bit(EXTENT_FLAG_PREALLOC, &prealloc_only);
- test_msg("Running btrfs_get_extent tests\n");
+ test_msg("running btrfs_get_extent tests");
ret = test_btrfs_get_extent(sectorsize, nodesize);
if (ret)
return ret;
- test_msg("Running hole first btrfs_get_extent test\n");
+ test_msg("running hole first btrfs_get_extent test");
ret = test_hole_first(sectorsize, nodesize);
if (ret)
return ret;
- test_msg("Running outstanding_extents tests\n");
+ test_msg("running outstanding_extents tests");
return test_extent_accounting(sectorsize, nodesize);
}
diff --git a/fs/btrfs/tests/qgroup-tests.c b/fs/btrfs/tests/qgroup-tests.c
index 39b95783f736..ace94db09d29 100644
--- a/fs/btrfs/tests/qgroup-tests.c
+++ b/fs/btrfs/tests/qgroup-tests.c
@@ -24,7 +24,7 @@ static int insert_normal_tree_ref(struct btrfs_root *root, u64 bytenr,
u32 size = sizeof(*item) + sizeof(*iref) + sizeof(*block_info);
int ret;
- btrfs_init_dummy_trans(&trans);
+ btrfs_init_dummy_trans(&trans, NULL);
ins.objectid = bytenr;
ins.type = BTRFS_EXTENT_ITEM_KEY;
@@ -32,14 +32,14 @@ static int insert_normal_tree_ref(struct btrfs_root *root, u64 bytenr,
path = btrfs_alloc_path();
if (!path) {
- test_msg("Couldn't allocate path\n");
+ test_err("couldn't allocate path");
return -ENOMEM;
}
path->leave_spinning = 1;
ret = btrfs_insert_empty_item(&trans, root, path, &ins, size);
if (ret) {
- test_msg("Couldn't insert ref %d\n", ret);
+ test_err("couldn't insert ref %d", ret);
btrfs_free_path(path);
return ret;
}
@@ -74,7 +74,7 @@ static int add_tree_ref(struct btrfs_root *root, u64 bytenr, u64 num_bytes,
u64 refs;
int ret;
- btrfs_init_dummy_trans(&trans);
+ btrfs_init_dummy_trans(&trans, NULL);
key.objectid = bytenr;
key.type = BTRFS_EXTENT_ITEM_KEY;
@@ -82,14 +82,14 @@ static int add_tree_ref(struct btrfs_root *root, u64 bytenr, u64 num_bytes,
path = btrfs_alloc_path();
if (!path) {
- test_msg("Couldn't allocate path\n");
+ test_err("couldn't allocate path");
return -ENOMEM;
}
path->leave_spinning = 1;
ret = btrfs_search_slot(&trans, root, &key, path, 0, 1);
if (ret) {
- test_msg("Couldn't find extent ref\n");
+ test_err("couldn't find extent ref");
btrfs_free_path(path);
return ret;
}
@@ -111,7 +111,7 @@ static int add_tree_ref(struct btrfs_root *root, u64 bytenr, u64 num_bytes,
ret = btrfs_insert_empty_item(&trans, root, path, &key, 0);
if (ret)
- test_msg("Failed to insert backref\n");
+ test_err("failed to insert backref");
btrfs_free_path(path);
return ret;
}
@@ -124,7 +124,7 @@ static int remove_extent_item(struct btrfs_root *root, u64 bytenr,
struct btrfs_path *path;
int ret;
- btrfs_init_dummy_trans(&trans);
+ btrfs_init_dummy_trans(&trans, NULL);
key.objectid = bytenr;
key.type = BTRFS_EXTENT_ITEM_KEY;
@@ -132,14 +132,14 @@ static int remove_extent_item(struct btrfs_root *root, u64 bytenr,
path = btrfs_alloc_path();
if (!path) {
- test_msg("Couldn't allocate path\n");
+ test_err("couldn't allocate path");
return -ENOMEM;
}
path->leave_spinning = 1;
ret = btrfs_search_slot(&trans, root, &key, path, -1, 1);
if (ret) {
- test_msg("Didn't find our key %d\n", ret);
+ test_err("didn't find our key %d", ret);
btrfs_free_path(path);
return ret;
}
@@ -158,7 +158,7 @@ static int remove_extent_ref(struct btrfs_root *root, u64 bytenr,
u64 refs;
int ret;
- btrfs_init_dummy_trans(&trans);
+ btrfs_init_dummy_trans(&trans, NULL);
key.objectid = bytenr;
key.type = BTRFS_EXTENT_ITEM_KEY;
@@ -166,14 +166,14 @@ static int remove_extent_ref(struct btrfs_root *root, u64 bytenr,
path = btrfs_alloc_path();
if (!path) {
- test_msg("Couldn't allocate path\n");
+ test_err("couldn't allocate path");
return -ENOMEM;
}
path->leave_spinning = 1;
ret = btrfs_search_slot(&trans, root, &key, path, 0, 1);
if (ret) {
- test_msg("Couldn't find extent ref\n");
+ test_err("couldn't find extent ref");
btrfs_free_path(path);
return ret;
}
@@ -195,7 +195,7 @@ static int remove_extent_ref(struct btrfs_root *root, u64 bytenr,
ret = btrfs_search_slot(&trans, root, &key, path, -1, 1);
if (ret) {
- test_msg("Couldn't find backref %d\n", ret);
+ test_err("couldn't find backref %d", ret);
btrfs_free_path(path);
return ret;
}
@@ -213,12 +213,12 @@ static int test_no_shared_qgroup(struct btrfs_root *root,
struct ulist *new_roots = NULL;
int ret;
- btrfs_init_dummy_trans(&trans);
+ btrfs_init_dummy_trans(&trans, fs_info);
- test_msg("Qgroup basic add\n");
+ test_msg("qgroup basic add");
ret = btrfs_create_qgroup(NULL, fs_info, BTRFS_FS_TREE_OBJECTID);
if (ret) {
- test_msg("Couldn't create a qgroup %d\n", ret);
+ test_err("couldn't create a qgroup %d", ret);
return ret;
}
@@ -231,7 +231,7 @@ static int test_no_shared_qgroup(struct btrfs_root *root,
false);
if (ret) {
ulist_free(old_roots);
- test_msg("Couldn't find old roots: %d\n", ret);
+ test_err("couldn't find old roots: %d", ret);
return ret;
}
@@ -245,20 +245,20 @@ static int test_no_shared_qgroup(struct btrfs_root *root,
if (ret) {
ulist_free(old_roots);
ulist_free(new_roots);
- test_msg("Couldn't find old roots: %d\n", ret);
+ test_err("couldn't find old roots: %d", ret);
return ret;
}
ret = btrfs_qgroup_account_extent(&trans, fs_info, nodesize,
nodesize, old_roots, new_roots);
if (ret) {
- test_msg("Couldn't account space for a qgroup %d\n", ret);
+ test_err("couldn't account space for a qgroup %d", ret);
return ret;
}
if (btrfs_verify_qgroup_counts(fs_info, BTRFS_FS_TREE_OBJECTID,
nodesize, nodesize)) {
- test_msg("Qgroup counts didn't match expected values\n");
+ test_err("qgroup counts didn't match expected values");
return -EINVAL;
}
old_roots = NULL;
@@ -268,7 +268,7 @@ static int test_no_shared_qgroup(struct btrfs_root *root,
false);
if (ret) {
ulist_free(old_roots);
- test_msg("Couldn't find old roots: %d\n", ret);
+ test_err("couldn't find old roots: %d", ret);
return ret;
}
@@ -281,19 +281,19 @@ static int test_no_shared_qgroup(struct btrfs_root *root,
if (ret) {
ulist_free(old_roots);
ulist_free(new_roots);
- test_msg("Couldn't find old roots: %d\n", ret);
+ test_err("couldn't find old roots: %d", ret);
return ret;
}
ret = btrfs_qgroup_account_extent(&trans, fs_info, nodesize,
nodesize, old_roots, new_roots);
if (ret) {
- test_msg("Couldn't account space for a qgroup %d\n", ret);
+ test_err("couldn't account space for a qgroup %d", ret);
return -EINVAL;
}
if (btrfs_verify_qgroup_counts(fs_info, BTRFS_FS_TREE_OBJECTID, 0, 0)) {
- test_msg("Qgroup counts didn't match expected values\n");
+ test_err("qgroup counts didn't match expected values");
return -EINVAL;
}
@@ -314,9 +314,9 @@ static int test_multiple_refs(struct btrfs_root *root,
struct ulist *new_roots = NULL;
int ret;
- btrfs_init_dummy_trans(&trans);
+ btrfs_init_dummy_trans(&trans, fs_info);
- test_msg("Qgroup multiple refs test\n");
+ test_msg("qgroup multiple refs test");
/*
* We have BTRFS_FS_TREE_OBJECTID created already from the
@@ -324,7 +324,7 @@ static int test_multiple_refs(struct btrfs_root *root,
*/
ret = btrfs_create_qgroup(NULL, fs_info, BTRFS_FIRST_FREE_OBJECTID);
if (ret) {
- test_msg("Couldn't create a qgroup %d\n", ret);
+ test_err("couldn't create a qgroup %d", ret);
return ret;
}
@@ -332,7 +332,7 @@ static int test_multiple_refs(struct btrfs_root *root,
false);
if (ret) {
ulist_free(old_roots);
- test_msg("Couldn't find old roots: %d\n", ret);
+ test_err("couldn't find old roots: %d", ret);
return ret;
}
@@ -346,20 +346,20 @@ static int test_multiple_refs(struct btrfs_root *root,
if (ret) {
ulist_free(old_roots);
ulist_free(new_roots);
- test_msg("Couldn't find old roots: %d\n", ret);
+ test_err("couldn't find old roots: %d", ret);
return ret;
}
ret = btrfs_qgroup_account_extent(&trans, fs_info, nodesize,
nodesize, old_roots, new_roots);
if (ret) {
- test_msg("Couldn't account space for a qgroup %d\n", ret);
+ test_err("couldn't account space for a qgroup %d", ret);
return ret;
}
if (btrfs_verify_qgroup_counts(fs_info, BTRFS_FS_TREE_OBJECTID,
nodesize, nodesize)) {
- test_msg("Qgroup counts didn't match expected values\n");
+ test_err("qgroup counts didn't match expected values");
return -EINVAL;
}
@@ -367,7 +367,7 @@ static int test_multiple_refs(struct btrfs_root *root,
false);
if (ret) {
ulist_free(old_roots);
- test_msg("Couldn't find old roots: %d\n", ret);
+ test_err("couldn't find old roots: %d", ret);
return ret;
}
@@ -381,26 +381,26 @@ static int test_multiple_refs(struct btrfs_root *root,
if (ret) {
ulist_free(old_roots);
ulist_free(new_roots);
- test_msg("Couldn't find old roots: %d\n", ret);
+ test_err("couldn't find old roots: %d", ret);
return ret;
}
ret = btrfs_qgroup_account_extent(&trans, fs_info, nodesize,
nodesize, old_roots, new_roots);
if (ret) {
- test_msg("Couldn't account space for a qgroup %d\n", ret);
+ test_err("couldn't account space for a qgroup %d", ret);
return ret;
}
if (btrfs_verify_qgroup_counts(fs_info, BTRFS_FS_TREE_OBJECTID,
nodesize, 0)) {
- test_msg("Qgroup counts didn't match expected values\n");
+ test_err("qgroup counts didn't match expected values");
return -EINVAL;
}
if (btrfs_verify_qgroup_counts(fs_info, BTRFS_FIRST_FREE_OBJECTID,
nodesize, 0)) {
- test_msg("Qgroup counts didn't match expected values\n");
+ test_err("qgroup counts didn't match expected values");
return -EINVAL;
}
@@ -408,7 +408,7 @@ static int test_multiple_refs(struct btrfs_root *root,
false);
if (ret) {
ulist_free(old_roots);
- test_msg("Couldn't find old roots: %d\n", ret);
+ test_err("couldn't find old roots: %d", ret);
return ret;
}
@@ -422,26 +422,26 @@ static int test_multiple_refs(struct btrfs_root *root,
if (ret) {
ulist_free(old_roots);
ulist_free(new_roots);
- test_msg("Couldn't find old roots: %d\n", ret);
+ test_err("couldn't find old roots: %d", ret);
return ret;
}
ret = btrfs_qgroup_account_extent(&trans, fs_info, nodesize,
nodesize, old_roots, new_roots);
if (ret) {
- test_msg("Couldn't account space for a qgroup %d\n", ret);
+ test_err("couldn't account space for a qgroup %d", ret);
return ret;
}
if (btrfs_verify_qgroup_counts(fs_info, BTRFS_FIRST_FREE_OBJECTID,
0, 0)) {
- test_msg("Qgroup counts didn't match expected values\n");
+ test_err("qgroup counts didn't match expected values");
return -EINVAL;
}
if (btrfs_verify_qgroup_counts(fs_info, BTRFS_FS_TREE_OBJECTID,
nodesize, nodesize)) {
- test_msg("Qgroup counts didn't match expected values\n");
+ test_err("qgroup counts didn't match expected values");
return -EINVAL;
}
@@ -457,13 +457,13 @@ int btrfs_test_qgroups(u32 sectorsize, u32 nodesize)
fs_info = btrfs_alloc_dummy_fs_info(nodesize, sectorsize);
if (!fs_info) {
- test_msg("Couldn't allocate dummy fs info\n");
+ test_err("couldn't allocate dummy fs info");
return -ENOMEM;
}
root = btrfs_alloc_dummy_root(fs_info);
if (IS_ERR(root)) {
- test_msg("Couldn't allocate root\n");
+ test_err("couldn't allocate root");
ret = PTR_ERR(root);
goto out;
}
@@ -485,7 +485,7 @@ int btrfs_test_qgroups(u32 sectorsize, u32 nodesize)
*/
root->node = alloc_test_extent_buffer(root->fs_info, nodesize);
if (!root->node) {
- test_msg("Couldn't allocate dummy buffer\n");
+ test_err("couldn't allocate dummy buffer");
ret = -ENOMEM;
goto out;
}
@@ -495,7 +495,7 @@ int btrfs_test_qgroups(u32 sectorsize, u32 nodesize)
tmp_root = btrfs_alloc_dummy_root(fs_info);
if (IS_ERR(tmp_root)) {
- test_msg("Couldn't allocate a fs root\n");
+ test_err("couldn't allocate a fs root");
ret = PTR_ERR(tmp_root);
goto out;
}
@@ -504,13 +504,13 @@ int btrfs_test_qgroups(u32 sectorsize, u32 nodesize)
root->fs_info->fs_root = tmp_root;
ret = btrfs_insert_fs_root(root->fs_info, tmp_root);
if (ret) {
- test_msg("Couldn't insert fs root %d\n", ret);
+ test_err("couldn't insert fs root %d", ret);
goto out;
}
tmp_root = btrfs_alloc_dummy_root(fs_info);
if (IS_ERR(tmp_root)) {
- test_msg("Couldn't allocate a fs root\n");
+ test_err("couldn't allocate a fs root");
ret = PTR_ERR(tmp_root);
goto out;
}
@@ -518,11 +518,11 @@ int btrfs_test_qgroups(u32 sectorsize, u32 nodesize)
tmp_root->root_key.objectid = BTRFS_FIRST_FREE_OBJECTID;
ret = btrfs_insert_fs_root(root->fs_info, tmp_root);
if (ret) {
- test_msg("Couldn't insert fs root %d\n", ret);
+ test_err("couldn't insert fs root %d", ret);
goto out;
}
- test_msg("Running qgroup tests\n");
+ test_msg("running qgroup tests");
ret = test_no_shared_qgroup(root, sectorsize, nodesize);
if (ret)
goto out;
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index c944b4769e3c..4485eae41e88 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -877,12 +877,7 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
atomic_dec(&cur_trans->num_writers);
extwriter_counter_dec(cur_trans, trans->type);
- /*
- * Make sure counter is updated before we wake up waiters.
- */
- smp_mb();
- if (waitqueue_active(&cur_trans->writer_wait))
- wake_up(&cur_trans->writer_wait);
+ cond_wake_up(&cur_trans->writer_wait);
btrfs_put_transaction(cur_trans);
if (current->journal_info == trans)
@@ -1250,7 +1245,6 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans)
btrfs_free_log(trans, root);
btrfs_update_reloc_root(trans, root);
- btrfs_orphan_commit_root(trans, root);
btrfs_save_ino_cache(root, trans);
@@ -1640,15 +1634,14 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
btrfs_abort_transaction(trans, ret);
goto fail;
}
- ret = btrfs_uuid_tree_add(trans, fs_info, new_uuid.b,
- BTRFS_UUID_KEY_SUBVOL, objectid);
+ ret = btrfs_uuid_tree_add(trans, new_uuid.b, BTRFS_UUID_KEY_SUBVOL,
+ objectid);
if (ret) {
btrfs_abort_transaction(trans, ret);
goto fail;
}
if (!btrfs_is_empty_uuid(new_root_item->received_uuid)) {
- ret = btrfs_uuid_tree_add(trans, fs_info,
- new_root_item->received_uuid,
+ ret = btrfs_uuid_tree_add(trans, new_root_item->received_uuid,
BTRFS_UUID_KEY_RECEIVED_SUBVOL,
objectid);
if (ret && ret != -EEXIST) {
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index d8c0826bc2c7..94439482a0ec 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -139,7 +139,6 @@ struct btrfs_pending_snapshot {
struct btrfs_path *path;
/* block reservation for the operation */
struct btrfs_block_rsv block_rsv;
- u64 qgroup_reserved;
/* extra metadata reservation for relocation */
int error;
bool readonly;
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 43758e30aa7a..f8220ec02036 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -222,11 +222,8 @@ int btrfs_pin_log_trans(struct btrfs_root *root)
void btrfs_end_log_trans(struct btrfs_root *root)
{
if (atomic_dec_and_test(&root->log_writers)) {
- /*
- * Implicit memory barrier after atomic_dec_and_test
- */
- if (waitqueue_active(&root->log_writer_wait))
- wake_up(&root->log_writer_wait);
+ /* atomic_dec_and_test implies a barrier */
+ cond_wake_up_nomb(&root->log_writer_wait);
}
}
@@ -2988,11 +2985,8 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
mutex_lock(&log_root_tree->log_mutex);
if (atomic_dec_and_test(&log_root_tree->log_writers)) {
- /*
- * Implicit memory barrier after atomic_dec_and_test
- */
- if (waitqueue_active(&log_root_tree->log_writer_wait))
- wake_up(&log_root_tree->log_writer_wait);
+ /* atomic_dec_and_test implies a barrier */
+ cond_wake_up_nomb(&log_root_tree->log_writer_wait);
}
if (ret) {
@@ -3116,10 +3110,11 @@ out_wake_log_root:
mutex_unlock(&log_root_tree->log_mutex);
/*
- * The barrier before waitqueue_active is implied by mutex_unlock
+ * The barrier before waitqueue_active (in cond_wake_up) is needed so
+ * all the updates above are seen by the woken threads. It might not be
+ * necessary, but proving that seems to be hard.
*/
- if (waitqueue_active(&log_root_tree->log_commit_wait[index2]))
- wake_up(&log_root_tree->log_commit_wait[index2]);
+ cond_wake_up(&log_root_tree->log_commit_wait[index2]);
out:
mutex_lock(&root->log_mutex);
btrfs_remove_all_log_ctxs(root, index1, ret);
@@ -3128,10 +3123,11 @@ out:
mutex_unlock(&root->log_mutex);
/*
- * The barrier before waitqueue_active is implied by mutex_unlock
+ * The barrier before waitqueue_active (in cond_wake_up) is needed so
+ * all the updates above are seen by the woken threads. It might not be
+ * necessary, but proving that seems to be hard.
*/
- if (waitqueue_active(&root->log_commit_wait[index1]))
- wake_up(&root->log_commit_wait[index1]);
+ cond_wake_up(&root->log_commit_wait[index1]);
return ret;
}
@@ -4320,6 +4316,110 @@ static int log_one_extent(struct btrfs_trans_handle *trans,
return ret;
}
+/*
+ * Log all prealloc extents beyond the inode's i_size to make sure we do not
+ * lose them after doing a fast fsync and replaying the log. We scan the
+ * subvolume's root instead of iterating the inode's extent map tree because
+ * otherwise we can log incorrect extent items based on extent map conversion.
+ * That can happen due to the fact that extent maps are merged when they
+ * are not in the extent map tree's list of modified extents.
+ */
+static int btrfs_log_prealloc_extents(struct btrfs_trans_handle *trans,
+ struct btrfs_inode *inode,
+ struct btrfs_path *path)
+{
+ struct btrfs_root *root = inode->root;
+ struct btrfs_key key;
+ const u64 i_size = i_size_read(&inode->vfs_inode);
+ const u64 ino = btrfs_ino(inode);
+ struct btrfs_path *dst_path = NULL;
+ u64 last_extent = (u64)-1;
+ int ins_nr = 0;
+ int start_slot;
+ int ret;
+
+ if (!(inode->flags & BTRFS_INODE_PREALLOC))
+ return 0;
+
+ key.objectid = ino;
+ key.type = BTRFS_EXTENT_DATA_KEY;
+ key.offset = i_size;
+ ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+ if (ret < 0)
+ goto out;
+
+ while (true) {
+ struct extent_buffer *leaf = path->nodes[0];
+ int slot = path->slots[0];
+
+ if (slot >= btrfs_header_nritems(leaf)) {
+ if (ins_nr > 0) {
+ ret = copy_items(trans, inode, dst_path, path,
+ &last_extent, start_slot,
+ ins_nr, 1, 0);
+ if (ret < 0)
+ goto out;
+ ins_nr = 0;
+ }
+ ret = btrfs_next_leaf(root, path);
+ if (ret < 0)
+ goto out;
+ if (ret > 0) {
+ ret = 0;
+ break;
+ }
+ continue;
+ }
+
+ btrfs_item_key_to_cpu(leaf, &key, slot);
+ if (key.objectid > ino)
+ break;
+ if (WARN_ON_ONCE(key.objectid < ino) ||
+ key.type < BTRFS_EXTENT_DATA_KEY ||
+ key.offset < i_size) {
+ path->slots[0]++;
+ continue;
+ }
+ if (last_extent == (u64)-1) {
+ last_extent = key.offset;
+ /*
+ * Avoid logging extent items logged in past fsync calls
+ * and leading to duplicate keys in the log tree.
+ */
+ do {
+ ret = btrfs_truncate_inode_items(trans,
+ root->log_root,
+ &inode->vfs_inode,
+ i_size,
+ BTRFS_EXTENT_DATA_KEY);
+ } while (ret == -EAGAIN);
+ if (ret)
+ goto out;
+ }
+ if (ins_nr == 0)
+ start_slot = slot;
+ ins_nr++;
+ path->slots[0]++;
+ if (!dst_path) {
+ dst_path = btrfs_alloc_path();
+ if (!dst_path) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ }
+ }
+ if (ins_nr > 0) {
+ ret = copy_items(trans, inode, dst_path, path, &last_extent,
+ start_slot, ins_nr, 1, 0);
+ if (ret > 0)
+ ret = 0;
+ }
+out:
+ btrfs_release_path(path);
+ btrfs_free_path(dst_path);
+ return ret;
+}
+
static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_inode *inode,
@@ -4362,6 +4462,11 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
if (em->generation <= test_gen)
continue;
+ /* We log prealloc extents beyond eof later. */
+ if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags) &&
+ em->start >= i_size_read(&inode->vfs_inode))
+ continue;
+
if (em->start < logged_start)
logged_start = em->start;
if ((em->start + em->len - 1) > logged_end)
@@ -4374,31 +4479,6 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
num++;
}
- /*
- * Add all prealloc extents beyond the inode's i_size to make sure we
- * don't lose them after doing a fast fsync and replaying the log.
- */
- if (inode->flags & BTRFS_INODE_PREALLOC) {
- struct rb_node *node;
-
- for (node = rb_last(&tree->map); node; node = rb_prev(node)) {
- em = rb_entry(node, struct extent_map, rb_node);
- if (em->start < i_size_read(&inode->vfs_inode))
- break;
- if (!list_empty(&em->list))
- continue;
- /* Same as above loop. */
- if (++num > 32768) {
- list_del_init(&tree->modified_extents);
- ret = -EFBIG;
- goto process;
- }
- refcount_inc(&em->refs);
- set_bit(EXTENT_FLAG_LOGGING, &em->flags);
- list_add_tail(&em->list, &extents);
- }
- }
-
list_sort(NULL, &extents, extent_cmp);
btrfs_get_logged_extents(inode, logged_list, logged_start, logged_end);
/*
@@ -4443,6 +4523,9 @@ process:
up_write(&inode->dio_sem);
btrfs_release_path(path);
+ if (!ret)
+ ret = btrfs_log_prealloc_extents(trans, inode, path);
+
return ret;
}
@@ -4827,6 +4910,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
struct extent_map_tree *em_tree = &inode->extent_tree;
u64 logged_isize = 0;
bool need_log_inode_item = true;
+ bool xattrs_logged = false;
path = btrfs_alloc_path();
if (!path)
@@ -5128,6 +5212,7 @@ next_key:
err = btrfs_log_all_xattrs(trans, root, inode, path, dst_path);
if (err)
goto out_unlock;
+ xattrs_logged = true;
if (max_key.type >= BTRFS_EXTENT_DATA_KEY && !fast_search) {
btrfs_release_path(path);
btrfs_release_path(dst_path);
@@ -5140,6 +5225,11 @@ log_extents:
btrfs_release_path(dst_path);
if (need_log_inode_item) {
err = log_inode_item(trans, log, dst_path, inode);
+ if (!err && !xattrs_logged) {
+ err = btrfs_log_all_xattrs(trans, root, inode, path,
+ dst_path);
+ btrfs_release_path(path);
+ }
if (err)
goto out_unlock;
}
diff --git a/fs/btrfs/uuid-tree.c b/fs/btrfs/uuid-tree.c
index 1ba7ca2a4200..3b2ae342e649 100644
--- a/fs/btrfs/uuid-tree.c
+++ b/fs/btrfs/uuid-tree.c
@@ -79,10 +79,10 @@ out:
return ret;
}
-int btrfs_uuid_tree_add(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info, u8 *uuid, u8 type,
+int btrfs_uuid_tree_add(struct btrfs_trans_handle *trans, u8 *uuid, u8 type,
u64 subid_cpu)
{
+ struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_root *uuid_root = fs_info->uuid_root;
int ret;
struct btrfs_path *path = NULL;
@@ -144,10 +144,10 @@ out:
return ret;
}
-int btrfs_uuid_tree_rem(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info, u8 *uuid, u8 type,
+int btrfs_uuid_tree_remove(struct btrfs_trans_handle *trans, u8 *uuid, u8 type,
u64 subid)
{
+ struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_root *uuid_root = fs_info->uuid_root;
int ret;
struct btrfs_path *path = NULL;
@@ -239,7 +239,7 @@ static int btrfs_uuid_iter_rem(struct btrfs_root *uuid_root, u8 *uuid, u8 type,
goto out;
}
- ret = btrfs_uuid_tree_rem(trans, uuid_root->fs_info, uuid, type, subid);
+ ret = btrfs_uuid_tree_remove(trans, uuid, type, subid);
btrfs_end_transaction(trans);
out:
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 292266f6ab9c..e034ad9e23b4 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -40,6 +40,9 @@ const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
.tolerated_failures = 1,
.devs_increment = 2,
.ncopies = 2,
+ .raid_name = "raid10",
+ .bg_flag = BTRFS_BLOCK_GROUP_RAID10,
+ .mindev_error = BTRFS_ERROR_DEV_RAID10_MIN_NOT_MET,
},
[BTRFS_RAID_RAID1] = {
.sub_stripes = 1,
@@ -49,6 +52,9 @@ const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
.tolerated_failures = 1,
.devs_increment = 2,
.ncopies = 2,
+ .raid_name = "raid1",
+ .bg_flag = BTRFS_BLOCK_GROUP_RAID1,
+ .mindev_error = BTRFS_ERROR_DEV_RAID1_MIN_NOT_MET,
},
[BTRFS_RAID_DUP] = {
.sub_stripes = 1,
@@ -58,6 +64,9 @@ const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
.tolerated_failures = 0,
.devs_increment = 1,
.ncopies = 2,
+ .raid_name = "dup",
+ .bg_flag = BTRFS_BLOCK_GROUP_DUP,
+ .mindev_error = 0,
},
[BTRFS_RAID_RAID0] = {
.sub_stripes = 1,
@@ -67,6 +76,9 @@ const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
.tolerated_failures = 0,
.devs_increment = 1,
.ncopies = 1,
+ .raid_name = "raid0",
+ .bg_flag = BTRFS_BLOCK_GROUP_RAID0,
+ .mindev_error = 0,
},
[BTRFS_RAID_SINGLE] = {
.sub_stripes = 1,
@@ -76,6 +88,9 @@ const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
.tolerated_failures = 0,
.devs_increment = 1,
.ncopies = 1,
+ .raid_name = "single",
+ .bg_flag = 0,
+ .mindev_error = 0,
},
[BTRFS_RAID_RAID5] = {
.sub_stripes = 1,
@@ -85,6 +100,9 @@ const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
.tolerated_failures = 1,
.devs_increment = 1,
.ncopies = 2,
+ .raid_name = "raid5",
+ .bg_flag = BTRFS_BLOCK_GROUP_RAID5,
+ .mindev_error = BTRFS_ERROR_DEV_RAID5_MIN_NOT_MET,
},
[BTRFS_RAID_RAID6] = {
.sub_stripes = 1,
@@ -94,33 +112,19 @@ const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
.tolerated_failures = 2,
.devs_increment = 1,
.ncopies = 3,
+ .raid_name = "raid6",
+ .bg_flag = BTRFS_BLOCK_GROUP_RAID6,
+ .mindev_error = BTRFS_ERROR_DEV_RAID6_MIN_NOT_MET,
},
};
-const u64 btrfs_raid_group[BTRFS_NR_RAID_TYPES] = {
- [BTRFS_RAID_RAID10] = BTRFS_BLOCK_GROUP_RAID10,
- [BTRFS_RAID_RAID1] = BTRFS_BLOCK_GROUP_RAID1,
- [BTRFS_RAID_DUP] = BTRFS_BLOCK_GROUP_DUP,
- [BTRFS_RAID_RAID0] = BTRFS_BLOCK_GROUP_RAID0,
- [BTRFS_RAID_SINGLE] = 0,
- [BTRFS_RAID_RAID5] = BTRFS_BLOCK_GROUP_RAID5,
- [BTRFS_RAID_RAID6] = BTRFS_BLOCK_GROUP_RAID6,
-};
+const char *get_raid_name(enum btrfs_raid_types type)
+{
+ if (type >= BTRFS_NR_RAID_TYPES)
+ return NULL;
-/*
- * Table to convert BTRFS_RAID_* to the error code if minimum number of devices
- * condition is not met. Zero means there's no corresponding
- * BTRFS_ERROR_DEV_*_NOT_MET value.
- */
-const int btrfs_raid_mindev_error[BTRFS_NR_RAID_TYPES] = {
- [BTRFS_RAID_RAID10] = BTRFS_ERROR_DEV_RAID10_MIN_NOT_MET,
- [BTRFS_RAID_RAID1] = BTRFS_ERROR_DEV_RAID1_MIN_NOT_MET,
- [BTRFS_RAID_DUP] = 0,
- [BTRFS_RAID_RAID0] = 0,
- [BTRFS_RAID_SINGLE] = 0,
- [BTRFS_RAID_RAID5] = BTRFS_ERROR_DEV_RAID5_MIN_NOT_MET,
- [BTRFS_RAID_RAID6] = BTRFS_ERROR_DEV_RAID6_MIN_NOT_MET,
-};
+ return btrfs_raid_array[type].raid_name;
+}
static int init_first_rw_device(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info);
@@ -167,12 +171,6 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
* may be used to exclude some operations from running concurrently without any
* modifications to the list (see write_all_supers)
*
- * volume_mutex
- * ------------
- * coarse lock owned by a mounted filesystem; used to exclude some operations
- * that cannot run in parallel and affect the higher-level properties of the
- * filesystem like: device add/deleting/resize/replace, or balance
- *
* balance_mutex
* -------------
* protects balance structures (status, state) and context accessed from
@@ -197,6 +195,41 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
* device_list_mutex
* chunk_mutex
* balance_mutex
+ *
+ *
+ * Exclusive operations, BTRFS_FS_EXCL_OP
+ * ======================================
+ *
+ * Maintains the exclusivity of the following operations that apply to the
+ * whole filesystem and cannot run in parallel.
+ *
+ * - Balance (*)
+ * - Device add
+ * - Device remove
+ * - Device replace (*)
+ * - Resize
+ *
+ * The device operations (as above) can be in one of the following states:
+ *
+ * - Running state
+ * - Paused state
+ * - Completed state
+ *
+ * Only device operations marked with (*) can go into the Paused state for the
+ * following reasons:
+ *
+ * - ioctl (only Balance can be Paused through ioctl)
+ * - filesystem remounted as read-only
+ * - filesystem unmounted and mounted as read-only
+ * - system power-cycle and filesystem mounted as read-only
+ * - filesystem or device errors leading to forced read-only
+ *
+ * BTRFS_FS_EXCL_OP flag is set and cleared using atomic operations.
+ * During the course of Paused state, the BTRFS_FS_EXCL_OP remains set.
+ * A device operation in Paused or Running state can be canceled or resumed
+ * either by ioctl (Balance only) or when remounted as read-write.
+ * BTRFS_FS_EXCL_OP flag is cleared when the device operation is canceled or
+ * completed.
*/
DEFINE_MUTEX(uuid_mutex);
@@ -227,14 +260,14 @@ static struct btrfs_fs_devices *alloc_fs_devices(const u8 *fsid)
INIT_LIST_HEAD(&fs_devs->devices);
INIT_LIST_HEAD(&fs_devs->resized_devices);
INIT_LIST_HEAD(&fs_devs->alloc_list);
- INIT_LIST_HEAD(&fs_devs->list);
+ INIT_LIST_HEAD(&fs_devs->fs_list);
if (fsid)
memcpy(fs_devs->fsid, fsid, BTRFS_FSID_SIZE);
return fs_devs;
}
-static void free_device(struct btrfs_device *device)
+void btrfs_free_device(struct btrfs_device *device)
{
rcu_string_free(device->name);
bio_put(device->flush_bio);
@@ -249,7 +282,7 @@ static void free_fs_devices(struct btrfs_fs_devices *fs_devices)
device = list_entry(fs_devices->devices.next,
struct btrfs_device, dev_list);
list_del(&device->dev_list);
- free_device(device);
+ btrfs_free_device(device);
}
kfree(fs_devices);
}
@@ -273,8 +306,8 @@ void __exit btrfs_cleanup_fs_uuids(void)
while (!list_empty(&fs_uuids)) {
fs_devices = list_entry(fs_uuids.next,
- struct btrfs_fs_devices, list);
- list_del(&fs_devices->list);
+ struct btrfs_fs_devices, fs_list);
+ list_del(&fs_devices->fs_list);
free_fs_devices(fs_devices);
}
}
@@ -282,7 +315,7 @@ void __exit btrfs_cleanup_fs_uuids(void)
/*
* Returns a pointer to a new btrfs_device on success; ERR_PTR() on error.
* Returned struct is not linked onto any lists and must be destroyed using
- * free_device.
+ * btrfs_free_device.
*/
static struct btrfs_device *__alloc_device(void)
{
@@ -327,10 +360,9 @@ static struct btrfs_device *__alloc_device(void)
static struct btrfs_device *find_device(struct btrfs_fs_devices *fs_devices,
u64 devid, const u8 *uuid)
{
- struct list_head *head = &fs_devices->devices;
struct btrfs_device *dev;
- list_for_each_entry(dev, head, dev_list) {
+ list_for_each_entry(dev, &fs_devices->devices, dev_list) {
if (dev->devid == devid &&
(!uuid || !memcmp(dev->uuid, uuid, BTRFS_UUID_SIZE))) {
return dev;
@@ -343,7 +375,7 @@ static noinline struct btrfs_fs_devices *find_fsid(u8 *fsid)
{
struct btrfs_fs_devices *fs_devices;
- list_for_each_entry(fs_devices, &fs_uuids, list) {
+ list_for_each_entry(fs_devices, &fs_uuids, fs_list) {
if (memcmp(fsid, fs_devices->fsid, BTRFS_FSID_SIZE) == 0)
return fs_devices;
}
@@ -607,7 +639,7 @@ static void btrfs_free_stale_devices(const char *path,
struct btrfs_fs_devices *fs_devs, *tmp_fs_devs;
struct btrfs_device *dev, *tmp_dev;
- list_for_each_entry_safe(fs_devs, tmp_fs_devs, &fs_uuids, list) {
+ list_for_each_entry_safe(fs_devs, tmp_fs_devs, &fs_uuids, fs_list) {
if (fs_devs->opened)
continue;
@@ -632,13 +664,13 @@ static void btrfs_free_stale_devices(const char *path,
/* delete the stale device */
if (fs_devs->num_devices == 1) {
btrfs_sysfs_remove_fsid(fs_devs);
- list_del(&fs_devs->list);
+ list_del(&fs_devs->fs_list);
free_fs_devices(fs_devs);
break;
} else {
fs_devs->num_devices--;
list_del(&dev->dev_list);
- free_device(dev);
+ btrfs_free_device(dev);
}
}
}
@@ -732,7 +764,7 @@ static noinline struct btrfs_device *device_list_add(const char *path,
if (IS_ERR(fs_devices))
return ERR_CAST(fs_devices);
- list_add(&fs_devices->list, &fs_uuids);
+ list_add(&fs_devices->fs_list, &fs_uuids);
device = NULL;
} else {
@@ -753,7 +785,7 @@ static noinline struct btrfs_device *device_list_add(const char *path,
name = rcu_string_strdup(path, GFP_NOFS);
if (!name) {
- free_device(device);
+ btrfs_free_device(device);
return ERR_PTR(-ENOMEM);
}
rcu_assign_pointer(device->name, name);
@@ -866,7 +898,7 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig)
name = rcu_string_strdup(orig_dev->name->str,
GFP_KERNEL);
if (!name) {
- free_device(device);
+ btrfs_free_device(device);
goto error;
}
rcu_assign_pointer(device->name, name);
@@ -938,7 +970,7 @@ again:
}
list_del_init(&device->dev_list);
fs_devices->num_devices--;
- free_device(device);
+ btrfs_free_device(device);
}
if (fs_devices->seed) {
@@ -956,7 +988,7 @@ static void free_device_rcu(struct rcu_head *head)
struct btrfs_device *device;
device = container_of(head, struct btrfs_device, rcu);
- free_device(device);
+ btrfs_free_device(device);
}
static void btrfs_close_bdev(struct btrfs_device *device)
@@ -1005,7 +1037,7 @@ static void btrfs_prepare_close_one_device(struct btrfs_device *device)
new_device->fs_devices = device->fs_devices;
}
-static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
+static int close_fs_devices(struct btrfs_fs_devices *fs_devices)
{
struct btrfs_device *device, *tmp;
struct list_head pending_put;
@@ -1050,7 +1082,7 @@ int btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
int ret;
mutex_lock(&uuid_mutex);
- ret = __btrfs_close_devices(fs_devices);
+ ret = close_fs_devices(fs_devices);
if (!fs_devices->opened) {
seed_devices = fs_devices->seed;
fs_devices->seed = NULL;
@@ -1060,23 +1092,22 @@ int btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
while (seed_devices) {
fs_devices = seed_devices;
seed_devices = fs_devices->seed;
- __btrfs_close_devices(fs_devices);
+ close_fs_devices(fs_devices);
free_fs_devices(fs_devices);
}
return ret;
}
-static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
+static int open_fs_devices(struct btrfs_fs_devices *fs_devices,
fmode_t flags, void *holder)
{
- struct list_head *head = &fs_devices->devices;
struct btrfs_device *device;
struct btrfs_device *latest_dev = NULL;
int ret = 0;
flags |= FMODE_EXCL;
- list_for_each_entry(device, head, dev_list) {
+ list_for_each_entry(device, &fs_devices->devices, dev_list) {
/* Just open everything we can; ignore failures here */
if (btrfs_open_one_device(fs_devices, device, flags, holder))
continue;
@@ -1115,15 +1146,16 @@ int btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
{
int ret;
- mutex_lock(&uuid_mutex);
+ mutex_lock(&fs_devices->device_list_mutex);
if (fs_devices->opened) {
fs_devices->opened++;
ret = 0;
} else {
list_sort(NULL, &fs_devices->devices, devid_cmp);
- ret = __btrfs_open_devices(fs_devices, flags, holder);
+ ret = open_fs_devices(fs_devices, flags, holder);
}
- mutex_unlock(&uuid_mutex);
+ mutex_unlock(&fs_devices->device_list_mutex);
+
return ret;
}
@@ -1201,31 +1233,29 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
*/
bytenr = btrfs_sb_offset(0);
flags |= FMODE_EXCL;
- mutex_lock(&uuid_mutex);
bdev = blkdev_get_by_path(path, flags, holder);
- if (IS_ERR(bdev)) {
- ret = PTR_ERR(bdev);
- goto error;
- }
+ if (IS_ERR(bdev))
+ return PTR_ERR(bdev);
if (btrfs_read_disk_super(bdev, bytenr, &page, &disk_super)) {
ret = -EINVAL;
goto error_bdev_put;
}
+ mutex_lock(&uuid_mutex);
device = device_list_add(path, disk_super);
if (IS_ERR(device))
ret = PTR_ERR(device);
else
*fs_devices_ret = device->fs_devices;
+ mutex_unlock(&uuid_mutex);
btrfs_release_disk_super(page);
error_bdev_put:
blkdev_put(bdev, flags);
-error:
- mutex_unlock(&uuid_mutex);
+
return ret;
}
@@ -1857,11 +1887,11 @@ static int btrfs_check_raid_min_devices(struct btrfs_fs_info *fs_info,
} while (read_seqretry(&fs_info->profiles_lock, seq));
for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) {
- if (!(all_avail & btrfs_raid_group[i]))
+ if (!(all_avail & btrfs_raid_array[i].bg_flag))
continue;
if (num_devices < btrfs_raid_array[i].devs_min) {
- int ret = btrfs_raid_mindev_error[i];
+ int ret = btrfs_raid_array[i].mindev_error;
if (ret)
return ret;
@@ -1917,13 +1947,13 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, const char *device_path,
{
struct btrfs_device *device;
struct btrfs_fs_devices *cur_devices;
+ struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
u64 num_devices;
int ret = 0;
- mutex_lock(&fs_info->volume_mutex);
mutex_lock(&uuid_mutex);
- num_devices = fs_info->fs_devices->num_devices;
+ num_devices = fs_devices->num_devices;
btrfs_dev_replace_read_lock(&fs_info->dev_replace);
if (btrfs_dev_replace_is_ongoing(&fs_info->dev_replace)) {
WARN_ON(num_devices < 1);
@@ -1986,27 +2016,32 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, const char *device_path,
* (super_copy) should hold the device list mutex.
*/
+ /*
+ * In normal cases the cur_devices == fs_devices. But in case
+ * of deleting a seed device, the cur_devices should point to
+ * its own fs_devices listed under the fs_devices->seed.
+ */
cur_devices = device->fs_devices;
- mutex_lock(&fs_info->fs_devices->device_list_mutex);
+ mutex_lock(&fs_devices->device_list_mutex);
list_del_rcu(&device->dev_list);
- device->fs_devices->num_devices--;
- device->fs_devices->total_devices--;
+ cur_devices->num_devices--;
+ cur_devices->total_devices--;
if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state))
- device->fs_devices->missing_devices--;
+ cur_devices->missing_devices--;
btrfs_assign_next_active_device(fs_info, device, NULL);
if (device->bdev) {
- device->fs_devices->open_devices--;
+ cur_devices->open_devices--;
/* remove sysfs entry */
- btrfs_sysfs_rm_device_link(fs_info->fs_devices, device);
+ btrfs_sysfs_rm_device_link(fs_devices, device);
}
num_devices = btrfs_super_num_devices(fs_info->super_copy) - 1;
btrfs_set_super_num_devices(fs_info->super_copy, num_devices);
- mutex_unlock(&fs_info->fs_devices->device_list_mutex);
+ mutex_unlock(&fs_devices->device_list_mutex);
/*
* at this point, the device is zero sized and detached from
@@ -2020,8 +2055,6 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, const char *device_path,
call_rcu(&device->rcu, free_device_rcu);
if (cur_devices->open_devices == 0) {
- struct btrfs_fs_devices *fs_devices;
- fs_devices = fs_info->fs_devices;
while (fs_devices) {
if (fs_devices->seed == cur_devices) {
fs_devices->seed = cur_devices->seed;
@@ -2030,20 +2063,19 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, const char *device_path,
fs_devices = fs_devices->seed;
}
cur_devices->seed = NULL;
- __btrfs_close_devices(cur_devices);
+ close_fs_devices(cur_devices);
free_fs_devices(cur_devices);
}
out:
mutex_unlock(&uuid_mutex);
- mutex_unlock(&fs_info->volume_mutex);
return ret;
error_undo:
if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) {
mutex_lock(&fs_info->chunk_mutex);
list_add(&device->dev_alloc_list,
- &fs_info->fs_devices->alloc_list);
+ &fs_devices->alloc_list);
device->fs_devices->rw_devices++;
mutex_unlock(&fs_info->chunk_mutex);
}
@@ -2112,7 +2144,7 @@ void btrfs_rm_dev_replace_free_srcdev(struct btrfs_fs_info *fs_info,
tmp_fs_devices = tmp_fs_devices->seed;
}
fs_devices->seed = NULL;
- __btrfs_close_devices(fs_devices);
+ close_fs_devices(fs_devices);
free_fs_devices(fs_devices);
}
}
@@ -2120,23 +2152,23 @@ void btrfs_rm_dev_replace_free_srcdev(struct btrfs_fs_info *fs_info,
void btrfs_destroy_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
struct btrfs_device *tgtdev)
{
- mutex_lock(&uuid_mutex);
+ struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
+
WARN_ON(!tgtdev);
- mutex_lock(&fs_info->fs_devices->device_list_mutex);
+ mutex_lock(&fs_devices->device_list_mutex);
- btrfs_sysfs_rm_device_link(fs_info->fs_devices, tgtdev);
+ btrfs_sysfs_rm_device_link(fs_devices, tgtdev);
if (tgtdev->bdev)
- fs_info->fs_devices->open_devices--;
+ fs_devices->open_devices--;
- fs_info->fs_devices->num_devices--;
+ fs_devices->num_devices--;
btrfs_assign_next_active_device(fs_info, tgtdev, NULL);
list_del_rcu(&tgtdev->dev_list);
- mutex_unlock(&fs_info->fs_devices->device_list_mutex);
- mutex_unlock(&uuid_mutex);
+ mutex_unlock(&fs_devices->device_list_mutex);
/*
* The update_dev_time() with in btrfs_scratch_superblocks()
@@ -2188,10 +2220,6 @@ int btrfs_find_device_missing_or_by_path(struct btrfs_fs_info *fs_info,
struct btrfs_device *tmp;
devices = &fs_info->fs_devices->devices;
- /*
- * It is safe to read the devices since the volume_mutex
- * is held by the caller.
- */
list_for_each_entry(tmp, devices, dev_list) {
if (test_bit(BTRFS_DEV_STATE_IN_FS_METADATA,
&tmp->dev_state) && !tmp->bdev) {
@@ -2259,7 +2287,7 @@ static int btrfs_prepare_sprout(struct btrfs_fs_info *fs_info)
return PTR_ERR(old_devices);
}
- list_add(&old_devices->list, &fs_uuids);
+ list_add(&old_devices->fs_list, &fs_uuids);
memcpy(seed_devices, fs_devices, sizeof(*seed_devices));
seed_devices->opened = 1;
@@ -2570,7 +2598,7 @@ error_trans:
if (trans)
btrfs_end_transaction(trans);
error_free_device:
- free_device(device);
+ btrfs_free_device(device);
error:
blkdev_put(bdev, FMODE_EXCL);
if (seeding_dev && !unlocked) {
@@ -2580,99 +2608,6 @@ error:
return ret;
}
-int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
- const char *device_path,
- struct btrfs_device *srcdev,
- struct btrfs_device **device_out)
-{
- struct btrfs_device *device;
- struct block_device *bdev;
- struct list_head *devices;
- struct rcu_string *name;
- u64 devid = BTRFS_DEV_REPLACE_DEVID;
- int ret = 0;
-
- *device_out = NULL;
- if (fs_info->fs_devices->seeding) {
- btrfs_err(fs_info, "the filesystem is a seed filesystem!");
- return -EINVAL;
- }
-
- bdev = blkdev_get_by_path(device_path, FMODE_WRITE | FMODE_EXCL,
- fs_info->bdev_holder);
- if (IS_ERR(bdev)) {
- btrfs_err(fs_info, "target device %s is invalid!", device_path);
- return PTR_ERR(bdev);
- }
-
- filemap_write_and_wait(bdev->bd_inode->i_mapping);
-
- devices = &fs_info->fs_devices->devices;
- list_for_each_entry(device, devices, dev_list) {
- if (device->bdev == bdev) {
- btrfs_err(fs_info,
- "target device is in the filesystem!");
- ret = -EEXIST;
- goto error;
- }
- }
-
-
- if (i_size_read(bdev->bd_inode) <
- btrfs_device_get_total_bytes(srcdev)) {
- btrfs_err(fs_info,
- "target device is smaller than source device!");
- ret = -EINVAL;
- goto error;
- }
-
-
- device = btrfs_alloc_device(NULL, &devid, NULL);
- if (IS_ERR(device)) {
- ret = PTR_ERR(device);
- goto error;
- }
-
- name = rcu_string_strdup(device_path, GFP_KERNEL);
- if (!name) {
- free_device(device);
- ret = -ENOMEM;
- goto error;
- }
- rcu_assign_pointer(device->name, name);
-
- mutex_lock(&fs_info->fs_devices->device_list_mutex);
- set_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state);
- device->generation = 0;
- device->io_width = fs_info->sectorsize;
- device->io_align = fs_info->sectorsize;
- device->sector_size = fs_info->sectorsize;
- device->total_bytes = btrfs_device_get_total_bytes(srcdev);
- device->disk_total_bytes = btrfs_device_get_disk_total_bytes(srcdev);
- device->bytes_used = btrfs_device_get_bytes_used(srcdev);
- device->commit_total_bytes = srcdev->commit_total_bytes;
- device->commit_bytes_used = device->bytes_used;
- device->fs_info = fs_info;
- device->bdev = bdev;
- set_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state);
- set_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state);
- device->mode = FMODE_EXCL;
- device->dev_stats_valid = 1;
- set_blocksize(device->bdev, BTRFS_BDEV_BLOCKSIZE);
- device->fs_devices = fs_info->fs_devices;
- list_add(&device->dev_list, &fs_info->fs_devices->devices);
- fs_info->fs_devices->num_devices++;
- fs_info->fs_devices->open_devices++;
- mutex_unlock(&fs_info->fs_devices->device_list_mutex);
-
- *device_out = device;
- return ret;
-
-error:
- blkdev_put(bdev, FMODE_EXCL);
- return ret;
-}
-
static noinline int btrfs_update_device(struct btrfs_trans_handle *trans,
struct btrfs_device *device)
{
@@ -3273,24 +3208,12 @@ static void update_balance_args(struct btrfs_balance_control *bctl)
}
/*
- * Should be called with both balance and volume mutexes held to
- * serialize other volume operations (add_dev/rm_dev/resize) with
- * restriper. Same goes for unset_balance_control.
+ * Clear the balance status in fs_info and delete the balance item from disk.
*/
-static void set_balance_control(struct btrfs_balance_control *bctl)
-{
- struct btrfs_fs_info *fs_info = bctl->fs_info;
-
- BUG_ON(fs_info->balance_ctl);
-
- spin_lock(&fs_info->balance_lock);
- fs_info->balance_ctl = bctl;
- spin_unlock(&fs_info->balance_lock);
-}
-
-static void unset_balance_control(struct btrfs_fs_info *fs_info)
+static void reset_balance_state(struct btrfs_fs_info *fs_info)
{
struct btrfs_balance_control *bctl = fs_info->balance_ctl;
+ int ret;
BUG_ON(!fs_info->balance_ctl);
@@ -3299,6 +3222,9 @@ static void unset_balance_control(struct btrfs_fs_info *fs_info)
spin_unlock(&fs_info->balance_lock);
kfree(bctl);
+ ret = del_balance_item(fs_info);
+ if (ret)
+ btrfs_handle_fs_error(fs_info, ret, NULL);
}
/*
@@ -3835,18 +3761,6 @@ static inline int balance_need_close(struct btrfs_fs_info *fs_info)
atomic_read(&fs_info->balance_cancel_req) == 0);
}
-static void __cancel_balance(struct btrfs_fs_info *fs_info)
-{
- int ret;
-
- unset_balance_control(fs_info);
- ret = del_balance_item(fs_info);
- if (ret)
- btrfs_handle_fs_error(fs_info, ret, NULL);
-
- clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
-}
-
/* Non-zero return value signifies invalidity */
static inline int validate_convert_profile(struct btrfs_balance_args *bctl_arg,
u64 allowed)
@@ -3857,12 +3771,12 @@ static inline int validate_convert_profile(struct btrfs_balance_args *bctl_arg,
}
/*
- * Should be called with both balance and volume mutexes held
+ * Should be called with balance mutexe held
*/
-int btrfs_balance(struct btrfs_balance_control *bctl,
+int btrfs_balance(struct btrfs_fs_info *fs_info,
+ struct btrfs_balance_control *bctl,
struct btrfs_ioctl_balance_args *bargs)
{
- struct btrfs_fs_info *fs_info = bctl->fs_info;
u64 meta_target, data_target;
u64 allowed;
int mixed = 0;
@@ -3891,7 +3805,7 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
!(bctl->flags & BTRFS_BALANCE_METADATA) ||
memcmp(&bctl->data, &bctl->meta, sizeof(bctl->data))) {
btrfs_err(fs_info,
- "with mixed groups data and metadata balance options must be the same");
+ "balance: mixed groups data and metadata options must be the same");
ret = -EINVAL;
goto out;
}
@@ -3913,23 +3827,29 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
allowed |= (BTRFS_BLOCK_GROUP_RAID10 |
BTRFS_BLOCK_GROUP_RAID6);
if (validate_convert_profile(&bctl->data, allowed)) {
+ int index = btrfs_bg_flags_to_raid_index(bctl->data.target);
+
btrfs_err(fs_info,
- "unable to start balance with target data profile %llu",
- bctl->data.target);
+ "balance: invalid convert data profile %s",
+ get_raid_name(index));
ret = -EINVAL;
goto out;
}
if (validate_convert_profile(&bctl->meta, allowed)) {
+ int index = btrfs_bg_flags_to_raid_index(bctl->meta.target);
+
btrfs_err(fs_info,
- "unable to start balance with target metadata profile %llu",
- bctl->meta.target);
+ "balance: invalid convert metadata profile %s",
+ get_raid_name(index));
ret = -EINVAL;
goto out;
}
if (validate_convert_profile(&bctl->sys, allowed)) {
+ int index = btrfs_bg_flags_to_raid_index(bctl->sys.target);
+
btrfs_err(fs_info,
- "unable to start balance with target system profile %llu",
- bctl->sys.target);
+ "balance: invalid convert system profile %s",
+ get_raid_name(index));
ret = -EINVAL;
goto out;
}
@@ -3950,10 +3870,10 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
!(bctl->meta.target & allowed))) {
if (bctl->flags & BTRFS_BALANCE_FORCE) {
btrfs_info(fs_info,
- "force reducing metadata integrity");
+ "balance: force reducing metadata integrity");
} else {
btrfs_err(fs_info,
- "balance will reduce metadata integrity, use force if you want this");
+ "balance: reduces metadata integrity, use --force if you want this");
ret = -EINVAL;
goto out;
}
@@ -3967,9 +3887,12 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
bctl->data.target : fs_info->avail_data_alloc_bits;
if (btrfs_get_num_tolerated_disk_barrier_failures(meta_target) <
btrfs_get_num_tolerated_disk_barrier_failures(data_target)) {
+ int meta_index = btrfs_bg_flags_to_raid_index(meta_target);
+ int data_index = btrfs_bg_flags_to_raid_index(data_target);
+
btrfs_warn(fs_info,
- "metadata profile 0x%llx has lower redundancy than data profile 0x%llx",
- meta_target, data_target);
+ "balance: metadata profile %s has lower redundancy than data profile %s",
+ get_raid_name(meta_index), get_raid_name(data_index));
}
ret = insert_balance_item(fs_info, bctl);
@@ -3978,7 +3901,10 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
if (!(bctl->flags & BTRFS_BALANCE_RESUME)) {
BUG_ON(ret == -EEXIST);
- set_balance_control(bctl);
+ BUG_ON(fs_info->balance_ctl);
+ spin_lock(&fs_info->balance_lock);
+ fs_info->balance_ctl = bctl;
+ spin_unlock(&fs_info->balance_lock);
} else {
BUG_ON(ret != -EEXIST);
spin_lock(&fs_info->balance_lock);
@@ -3986,22 +3912,24 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
spin_unlock(&fs_info->balance_lock);
}
- atomic_inc(&fs_info->balance_running);
+ ASSERT(!test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags));
+ set_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags);
mutex_unlock(&fs_info->balance_mutex);
ret = __btrfs_balance(fs_info);
mutex_lock(&fs_info->balance_mutex);
- atomic_dec(&fs_info->balance_running);
+ clear_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags);
if (bargs) {
memset(bargs, 0, sizeof(*bargs));
- update_ioctl_balance_args(fs_info, 0, bargs);
+ btrfs_update_ioctl_balance_args(fs_info, bargs);
}
if ((ret && ret != -ECANCELED && ret != -ENOSPC) ||
balance_need_close(fs_info)) {
- __cancel_balance(fs_info);
+ reset_balance_state(fs_info);
+ clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
}
wake_up(&fs_info->balance_wait_q);
@@ -4009,11 +3937,11 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
return ret;
out:
if (bctl->flags & BTRFS_BALANCE_RESUME)
- __cancel_balance(fs_info);
- else {
+ reset_balance_state(fs_info);
+ else
kfree(bctl);
- clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
- }
+ clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
+
return ret;
}
@@ -4022,16 +3950,12 @@ static int balance_kthread(void *data)
struct btrfs_fs_info *fs_info = data;
int ret = 0;
- mutex_lock(&fs_info->volume_mutex);
mutex_lock(&fs_info->balance_mutex);
-
if (fs_info->balance_ctl) {
- btrfs_info(fs_info, "continuing balance");
- ret = btrfs_balance(fs_info->balance_ctl, NULL);
+ btrfs_info(fs_info, "balance: resuming");
+ ret = btrfs_balance(fs_info, fs_info->balance_ctl, NULL);
}
-
mutex_unlock(&fs_info->balance_mutex);
- mutex_unlock(&fs_info->volume_mutex);
return ret;
}
@@ -4040,18 +3964,27 @@ int btrfs_resume_balance_async(struct btrfs_fs_info *fs_info)
{
struct task_struct *tsk;
- spin_lock(&fs_info->balance_lock);
+ mutex_lock(&fs_info->balance_mutex);
if (!fs_info->balance_ctl) {
- spin_unlock(&fs_info->balance_lock);
+ mutex_unlock(&fs_info->balance_mutex);
return 0;
}
- spin_unlock(&fs_info->balance_lock);
+ mutex_unlock(&fs_info->balance_mutex);
if (btrfs_test_opt(fs_info, SKIP_BALANCE)) {
- btrfs_info(fs_info, "force skipping balance");
+ btrfs_info(fs_info, "balance: resume skipped");
return 0;
}
+ /*
+ * A ro->rw remount sequence should continue with the paused balance
+ * regardless of who pauses it, system or the user as of now, so set
+ * the resume flag.
+ */
+ spin_lock(&fs_info->balance_lock);
+ fs_info->balance_ctl->flags |= BTRFS_BALANCE_RESUME;
+ spin_unlock(&fs_info->balance_lock);
+
tsk = kthread_run(balance_kthread, fs_info, "btrfs-balance");
return PTR_ERR_OR_ZERO(tsk);
}
@@ -4091,7 +4024,6 @@ int btrfs_recover_balance(struct btrfs_fs_info *fs_info)
leaf = path->nodes[0];
item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_balance_item);
- bctl->fs_info = fs_info;
bctl->flags = btrfs_balance_flags(leaf, item);
bctl->flags |= BTRFS_BALANCE_RESUME;
@@ -4102,15 +4034,26 @@ int btrfs_recover_balance(struct btrfs_fs_info *fs_info)
btrfs_balance_sys(leaf, item, &disk_bargs);
btrfs_disk_balance_args_to_cpu(&bctl->sys, &disk_bargs);
- WARN_ON(test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags));
+ /*
+ * This should never happen, as the paused balance state is recovered
+ * during mount without any chance of other exclusive ops to collide.
+ *
+ * This gives the exclusive op status to balance and keeps in paused
+ * state until user intervention (cancel or umount). If the ownership
+ * cannot be assigned, show a message but do not fail. The balance
+ * is in a paused state and must have fs_info::balance_ctl properly
+ * set up.
+ */
+ if (test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags))
+ btrfs_warn(fs_info,
+ "balance: cannot set exclusive op status, resume manually");
- mutex_lock(&fs_info->volume_mutex);
mutex_lock(&fs_info->balance_mutex);
-
- set_balance_control(bctl);
-
+ BUG_ON(fs_info->balance_ctl);
+ spin_lock(&fs_info->balance_lock);
+ fs_info->balance_ctl = bctl;
+ spin_unlock(&fs_info->balance_lock);
mutex_unlock(&fs_info->balance_mutex);
- mutex_unlock(&fs_info->volume_mutex);
out:
btrfs_free_path(path);
return ret;
@@ -4126,16 +4069,16 @@ int btrfs_pause_balance(struct btrfs_fs_info *fs_info)
return -ENOTCONN;
}
- if (atomic_read(&fs_info->balance_running)) {
+ if (test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags)) {
atomic_inc(&fs_info->balance_pause_req);
mutex_unlock(&fs_info->balance_mutex);
wait_event(fs_info->balance_wait_q,
- atomic_read(&fs_info->balance_running) == 0);
+ !test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags));
mutex_lock(&fs_info->balance_mutex);
/* we are good with balance_ctl ripped off from under us */
- BUG_ON(atomic_read(&fs_info->balance_running));
+ BUG_ON(test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags));
atomic_dec(&fs_info->balance_pause_req);
} else {
ret = -ENOTCONN;
@@ -4147,38 +4090,49 @@ int btrfs_pause_balance(struct btrfs_fs_info *fs_info)
int btrfs_cancel_balance(struct btrfs_fs_info *fs_info)
{
- if (sb_rdonly(fs_info->sb))
- return -EROFS;
-
mutex_lock(&fs_info->balance_mutex);
if (!fs_info->balance_ctl) {
mutex_unlock(&fs_info->balance_mutex);
return -ENOTCONN;
}
+ /*
+ * A paused balance with the item stored on disk can be resumed at
+ * mount time if the mount is read-write. Otherwise it's still paused
+ * and we must not allow cancelling as it deletes the item.
+ */
+ if (sb_rdonly(fs_info->sb)) {
+ mutex_unlock(&fs_info->balance_mutex);
+ return -EROFS;
+ }
+
atomic_inc(&fs_info->balance_cancel_req);
/*
* if we are running just wait and return, balance item is
* deleted in btrfs_balance in this case
*/
- if (atomic_read(&fs_info->balance_running)) {
+ if (test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags)) {
mutex_unlock(&fs_info->balance_mutex);
wait_event(fs_info->balance_wait_q,
- atomic_read(&fs_info->balance_running) == 0);
+ !test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags));
mutex_lock(&fs_info->balance_mutex);
} else {
- /* __cancel_balance needs volume_mutex */
mutex_unlock(&fs_info->balance_mutex);
- mutex_lock(&fs_info->volume_mutex);
+ /*
+ * Lock released to allow other waiters to continue, we'll
+ * reexamine the status again.
+ */
mutex_lock(&fs_info->balance_mutex);
- if (fs_info->balance_ctl)
- __cancel_balance(fs_info);
-
- mutex_unlock(&fs_info->volume_mutex);
+ if (fs_info->balance_ctl) {
+ reset_balance_state(fs_info);
+ clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
+ btrfs_info(fs_info, "balance: canceled");
+ }
}
- BUG_ON(fs_info->balance_ctl || atomic_read(&fs_info->balance_running));
+ BUG_ON(fs_info->balance_ctl ||
+ test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags));
atomic_dec(&fs_info->balance_cancel_req);
mutex_unlock(&fs_info->balance_mutex);
return 0;
@@ -4255,8 +4209,7 @@ static int btrfs_uuid_scan_kthread(void *data)
}
update_tree:
if (!btrfs_is_empty_uuid(root_item.uuid)) {
- ret = btrfs_uuid_tree_add(trans, fs_info,
- root_item.uuid,
+ ret = btrfs_uuid_tree_add(trans, root_item.uuid,
BTRFS_UUID_KEY_SUBVOL,
key.objectid);
if (ret < 0) {
@@ -4267,7 +4220,7 @@ update_tree:
}
if (!btrfs_is_empty_uuid(root_item.received_uuid)) {
- ret = btrfs_uuid_tree_add(trans, fs_info,
+ ret = btrfs_uuid_tree_add(trans,
root_item.received_uuid,
BTRFS_UUID_KEY_RECEIVED_SUBVOL,
key.objectid);
@@ -4473,7 +4426,7 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
if (!path)
return -ENOMEM;
- path->reada = READA_FORWARD;
+ path->reada = READA_BACK;
mutex_lock(&fs_info->chunk_mutex);
@@ -6034,9 +5987,8 @@ int btrfs_map_sblock(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
return __btrfs_map_block(fs_info, op, logical, length, bbio_ret, 0, 1);
}
-int btrfs_rmap_block(struct btrfs_fs_info *fs_info,
- u64 chunk_start, u64 physical, u64 devid,
- u64 **logical, int *naddrs, int *stripe_len)
+int btrfs_rmap_block(struct btrfs_fs_info *fs_info, u64 chunk_start,
+ u64 physical, u64 **logical, int *naddrs, int *stripe_len)
{
struct extent_map *em;
struct map_lookup *map;
@@ -6068,8 +6020,6 @@ int btrfs_rmap_block(struct btrfs_fs_info *fs_info,
BUG_ON(!buf); /* -ENOMEM */
for (i = 0; i < map->num_stripes; i++) {
- if (devid && map->stripes[i].dev->devid != devid)
- continue;
if (map->stripes[i].physical > physical ||
map->stripes[i].physical + length <= physical)
continue;
@@ -6401,7 +6351,7 @@ static struct btrfs_device *add_missing_dev(struct btrfs_fs_devices *fs_devices,
*
* Return: a pointer to a new &struct btrfs_device on success; ERR_PTR()
* on error. Returned struct is not linked onto any lists and must be
- * destroyed with free_device.
+ * destroyed with btrfs_free_device.
*/
struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info,
const u64 *devid,
@@ -6424,7 +6374,7 @@ struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info,
ret = find_next_devid(fs_info, &tmp);
if (ret) {
- free_device(dev);
+ btrfs_free_device(dev);
return ERR_PTR(ret);
}
}
@@ -6675,8 +6625,7 @@ static struct btrfs_fs_devices *open_seed_devices(struct btrfs_fs_info *fs_info,
if (IS_ERR(fs_devices))
return fs_devices;
- ret = __btrfs_open_devices(fs_devices, FMODE_READ,
- fs_info->bdev_holder);
+ ret = open_fs_devices(fs_devices, FMODE_READ, fs_info->bdev_holder);
if (ret) {
free_fs_devices(fs_devices);
fs_devices = ERR_PTR(ret);
@@ -6684,7 +6633,7 @@ static struct btrfs_fs_devices *open_seed_devices(struct btrfs_fs_info *fs_info,
}
if (!fs_devices->seeding) {
- __btrfs_close_devices(fs_devices);
+ close_fs_devices(fs_devices);
free_fs_devices(fs_devices);
fs_devices = ERR_PTR(-EINVAL);
goto out;
@@ -6993,6 +6942,10 @@ int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info)
if (!path)
return -ENOMEM;
+ /*
+ * uuid_mutex is needed only if we are mounting a sprout FS
+ * otherwise we don't need it.
+ */
mutex_lock(&uuid_mutex);
mutex_lock(&fs_info->chunk_mutex);
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 79096884654f..5139ec8daf4c 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -208,6 +208,7 @@ BTRFS_DEVICE_GETSET_FUNCS(bytes_used);
struct btrfs_fs_devices {
u8 fsid[BTRFS_FSID_SIZE]; /* FS specific uuid */
+ struct list_head fs_list;
u64 num_devices;
u64 open_devices;
@@ -229,7 +230,6 @@ struct btrfs_fs_devices {
struct list_head resized_devices;
/* devices not currently being allocated */
struct list_head alloc_list;
- struct list_head list;
struct btrfs_fs_devices *seed;
int seeding;
@@ -329,11 +329,12 @@ struct btrfs_raid_attr {
int tolerated_failures; /* max tolerated fail devs */
int devs_increment; /* ndevs has to be a multiple of this */
int ncopies; /* how many copies to data has */
+ int mindev_error; /* error code if min devs requisite is unmet */
+ const char raid_name[8]; /* name of the raid */
+ u64 bg_flag; /* block group flag of the raid */
};
extern const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES];
-extern const int btrfs_raid_mindev_error[BTRFS_NR_RAID_TYPES];
-extern const u64 btrfs_raid_group[BTRFS_NR_RAID_TYPES];
struct map_lookup {
u64 type;
@@ -351,8 +352,6 @@ struct map_lookup {
struct btrfs_balance_args;
struct btrfs_balance_progress;
struct btrfs_balance_control {
- struct btrfs_fs_info *fs_info;
-
struct btrfs_balance_args data;
struct btrfs_balance_args meta;
struct btrfs_balance_args sys;
@@ -393,9 +392,8 @@ int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
int btrfs_map_sblock(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
u64 logical, u64 *length,
struct btrfs_bio **bbio_ret);
-int btrfs_rmap_block(struct btrfs_fs_info *fs_info,
- u64 chunk_start, u64 physical, u64 devid,
- u64 **logical, int *naddrs, int *stripe_len);
+int btrfs_rmap_block(struct btrfs_fs_info *fs_info, u64 chunk_start,
+ u64 physical, u64 **logical, int *naddrs, int *stripe_len);
int btrfs_read_sys_array(struct btrfs_fs_info *fs_info);
int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info);
int btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
@@ -421,6 +419,7 @@ int btrfs_find_device_by_devspec(struct btrfs_fs_info *fs_info, u64 devid,
struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info,
const u64 *devid,
const u8 *uuid);
+void btrfs_free_device(struct btrfs_device *device);
int btrfs_rm_device(struct btrfs_fs_info *fs_info,
const char *device_path, u64 devid);
void __exit btrfs_cleanup_fs_uuids(void);
@@ -431,11 +430,8 @@ struct btrfs_device *btrfs_find_device(struct btrfs_fs_info *fs_info, u64 devid,
u8 *uuid, u8 *fsid);
int btrfs_shrink_device(struct btrfs_device *device, u64 new_size);
int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *path);
-int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
- const char *device_path,
- struct btrfs_device *srcdev,
- struct btrfs_device **device_out);
-int btrfs_balance(struct btrfs_balance_control *bctl,
+int btrfs_balance(struct btrfs_fs_info *fs_info,
+ struct btrfs_balance_control *bctl,
struct btrfs_ioctl_balance_args *bargs);
int btrfs_resume_balance_async(struct btrfs_fs_info *fs_info);
int btrfs_recover_balance(struct btrfs_fs_info *fs_info);
@@ -553,6 +549,8 @@ static inline enum btrfs_raid_types btrfs_bg_flags_to_raid_index(u64 flags)
return BTRFS_RAID_SINGLE; /* BTRFS_BLOCK_GROUP_SINGLE */
}
+const char *get_raid_name(enum btrfs_raid_types type);
+
void btrfs_update_commit_device_size(struct btrfs_fs_info *fs_info);
void btrfs_update_commit_device_bytes_used(struct btrfs_transaction *trans);
diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
index 0daa1e3fe0df..ab0bbe93b398 100644
--- a/fs/cachefiles/namei.c
+++ b/fs/cachefiles/namei.c
@@ -572,6 +572,11 @@ lookup_again:
if (ret < 0)
goto create_error;
+ if (unlikely(d_unhashed(next))) {
+ dput(next);
+ inode_unlock(d_inode(dir));
+ goto lookup_again;
+ }
ASSERT(d_backing_inode(next));
_debug("mkdir -> %p{%p{ino=%lu}}",
@@ -764,6 +769,7 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache,
/* search the current directory for the element name */
inode_lock(d_inode(dir));
+retry:
start = jiffies;
subdir = lookup_one_len(dirname, dir, strlen(dirname));
cachefiles_hist(cachefiles_lookup_histogram, start);
@@ -793,6 +799,10 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache,
if (ret < 0)
goto mkdir_error;
+ if (unlikely(d_unhashed(subdir))) {
+ dput(subdir);
+ goto retry;
+ }
ASSERT(d_backing_inode(subdir));
_debug("mkdir -> %p{%p{ino=%lu}}",
diff --git a/fs/cachefiles/proc.c b/fs/cachefiles/proc.c
index 125b90f6c796..0ce1aa56b67f 100644
--- a/fs/cachefiles/proc.c
+++ b/fs/cachefiles/proc.c
@@ -85,21 +85,6 @@ static const struct seq_operations cachefiles_histogram_ops = {
};
/*
- * open "/proc/fs/cachefiles/XXX" which provide statistics summaries
- */
-static int cachefiles_histogram_open(struct inode *inode, struct file *file)
-{
- return seq_open(file, &cachefiles_histogram_ops);
-}
-
-static const struct file_operations cachefiles_histogram_fops = {
- .open = cachefiles_histogram_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
-/*
* initialise the /proc/fs/cachefiles/ directory
*/
int __init cachefiles_proc_init(void)
@@ -109,8 +94,8 @@ int __init cachefiles_proc_init(void)
if (!proc_mkdir("fs/cachefiles", NULL))
goto error_dir;
- if (!proc_create("fs/cachefiles/histogram", S_IFREG | 0444, NULL,
- &cachefiles_histogram_fops))
+ if (!proc_create_seq("fs/cachefiles/histogram", S_IFREG | 0444, NULL,
+ &cachefiles_histogram_ops))
goto error_histogram;
_leave(" = 0");
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index f85040d73e3d..cf0e45b10121 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -70,69 +70,104 @@ static __le32 ceph_flags_sys2wire(u32 flags)
*/
/*
- * Calculate the length sum of direct io vectors that can
- * be combined into one page vector.
+ * How many pages to get in one call to iov_iter_get_pages(). This
+ * determines the size of the on-stack array used as a buffer.
*/
-static size_t dio_get_pagev_size(const struct iov_iter *it)
+#define ITER_GET_BVECS_PAGES 64
+
+static ssize_t __iter_get_bvecs(struct iov_iter *iter, size_t maxsize,
+ struct bio_vec *bvecs)
{
- const struct iovec *iov = it->iov;
- const struct iovec *iovend = iov + it->nr_segs;
- size_t size;
-
- size = iov->iov_len - it->iov_offset;
- /*
- * An iov can be page vectored when both the current tail
- * and the next base are page aligned.
- */
- while (PAGE_ALIGNED((iov->iov_base + iov->iov_len)) &&
- (++iov < iovend && PAGE_ALIGNED((iov->iov_base)))) {
- size += iov->iov_len;
- }
- dout("dio_get_pagevlen len = %zu\n", size);
- return size;
+ size_t size = 0;
+ int bvec_idx = 0;
+
+ if (maxsize > iov_iter_count(iter))
+ maxsize = iov_iter_count(iter);
+
+ while (size < maxsize) {
+ struct page *pages[ITER_GET_BVECS_PAGES];
+ ssize_t bytes;
+ size_t start;
+ int idx = 0;
+
+ bytes = iov_iter_get_pages(iter, pages, maxsize - size,
+ ITER_GET_BVECS_PAGES, &start);
+ if (bytes < 0)
+ return size ?: bytes;
+
+ iov_iter_advance(iter, bytes);
+ size += bytes;
+
+ for ( ; bytes; idx++, bvec_idx++) {
+ struct bio_vec bv = {
+ .bv_page = pages[idx],
+ .bv_len = min_t(int, bytes, PAGE_SIZE - start),
+ .bv_offset = start,
+ };
+
+ bvecs[bvec_idx] = bv;
+ bytes -= bv.bv_len;
+ start = 0;
+ }
+ }
+
+ return size;
}
/*
- * Allocate a page vector based on (@it, @nbytes).
- * The return value is the tuple describing a page vector,
- * that is (@pages, @page_align, @num_pages).
+ * iov_iter_get_pages() only considers one iov_iter segment, no matter
+ * what maxsize or maxpages are given. For ITER_BVEC that is a single
+ * page.
+ *
+ * Attempt to get up to @maxsize bytes worth of pages from @iter.
+ * Return the number of bytes in the created bio_vec array, or an error.
*/
-static struct page **
-dio_get_pages_alloc(const struct iov_iter *it, size_t nbytes,
- size_t *page_align, int *num_pages)
+static ssize_t iter_get_bvecs_alloc(struct iov_iter *iter, size_t maxsize,
+ struct bio_vec **bvecs, int *num_bvecs)
{
- struct iov_iter tmp_it = *it;
- size_t align;
- struct page **pages;
- int ret = 0, idx, npages;
+ struct bio_vec *bv;
+ size_t orig_count = iov_iter_count(iter);
+ ssize_t bytes;
+ int npages;
- align = (unsigned long)(it->iov->iov_base + it->iov_offset) &
- (PAGE_SIZE - 1);
- npages = calc_pages_for(align, nbytes);
- pages = kvmalloc(sizeof(*pages) * npages, GFP_KERNEL);
- if (!pages)
- return ERR_PTR(-ENOMEM);
+ iov_iter_truncate(iter, maxsize);
+ npages = iov_iter_npages(iter, INT_MAX);
+ iov_iter_reexpand(iter, orig_count);
- for (idx = 0; idx < npages; ) {
- size_t start;
- ret = iov_iter_get_pages(&tmp_it, pages + idx, nbytes,
- npages - idx, &start);
- if (ret < 0)
- goto fail;
+ /*
+ * __iter_get_bvecs() may populate only part of the array -- zero it
+ * out.
+ */
+ bv = kvmalloc_array(npages, sizeof(*bv), GFP_KERNEL | __GFP_ZERO);
+ if (!bv)
+ return -ENOMEM;
- iov_iter_advance(&tmp_it, ret);
- nbytes -= ret;
- idx += (ret + start + PAGE_SIZE - 1) / PAGE_SIZE;
+ bytes = __iter_get_bvecs(iter, maxsize, bv);
+ if (bytes < 0) {
+ /*
+ * No pages were pinned -- just free the array.
+ */
+ kvfree(bv);
+ return bytes;
}
- BUG_ON(nbytes != 0);
- *num_pages = npages;
- *page_align = align;
- dout("dio_get_pages_alloc: got %d pages align %zu\n", npages, align);
- return pages;
-fail:
- ceph_put_page_vector(pages, idx, false);
- return ERR_PTR(ret);
+ *bvecs = bv;
+ *num_bvecs = npages;
+ return bytes;
+}
+
+static void put_bvecs(struct bio_vec *bvecs, int num_bvecs, bool should_dirty)
+{
+ int i;
+
+ for (i = 0; i < num_bvecs; i++) {
+ if (bvecs[i].bv_page) {
+ if (should_dirty)
+ set_page_dirty_lock(bvecs[i].bv_page);
+ put_page(bvecs[i].bv_page);
+ }
+ }
+ kvfree(bvecs);
}
/*
@@ -746,11 +781,12 @@ static void ceph_aio_complete_req(struct ceph_osd_request *req)
struct inode *inode = req->r_inode;
struct ceph_aio_request *aio_req = req->r_priv;
struct ceph_osd_data *osd_data = osd_req_op_extent_osd_data(req, 0);
- int num_pages = calc_pages_for((u64)osd_data->alignment,
- osd_data->length);
- dout("ceph_aio_complete_req %p rc %d bytes %llu\n",
- inode, rc, osd_data->length);
+ BUG_ON(osd_data->type != CEPH_OSD_DATA_TYPE_BVECS);
+ BUG_ON(!osd_data->num_bvecs);
+
+ dout("ceph_aio_complete_req %p rc %d bytes %u\n",
+ inode, rc, osd_data->bvec_pos.iter.bi_size);
if (rc == -EOLDSNAPC) {
struct ceph_aio_work *aio_work;
@@ -768,9 +804,10 @@ static void ceph_aio_complete_req(struct ceph_osd_request *req)
} else if (!aio_req->write) {
if (rc == -ENOENT)
rc = 0;
- if (rc >= 0 && osd_data->length > rc) {
- int zoff = osd_data->alignment + rc;
- int zlen = osd_data->length - rc;
+ if (rc >= 0 && osd_data->bvec_pos.iter.bi_size > rc) {
+ struct iov_iter i;
+ int zlen = osd_data->bvec_pos.iter.bi_size - rc;
+
/*
* If read is satisfied by single OSD request,
* it can pass EOF. Otherwise read is within
@@ -785,13 +822,16 @@ static void ceph_aio_complete_req(struct ceph_osd_request *req)
aio_req->total_len = rc + zlen;
}
- if (zlen > 0)
- ceph_zero_page_vector_range(zoff, zlen,
- osd_data->pages);
+ iov_iter_bvec(&i, ITER_BVEC, osd_data->bvec_pos.bvecs,
+ osd_data->num_bvecs,
+ osd_data->bvec_pos.iter.bi_size);
+ iov_iter_advance(&i, rc);
+ iov_iter_zero(zlen, &i);
}
}
- ceph_put_page_vector(osd_data->pages, num_pages, aio_req->should_dirty);
+ put_bvecs(osd_data->bvec_pos.bvecs, osd_data->num_bvecs,
+ aio_req->should_dirty);
ceph_osdc_put_request(req);
if (rc < 0)
@@ -879,7 +919,7 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
struct ceph_vino vino;
struct ceph_osd_request *req;
- struct page **pages;
+ struct bio_vec *bvecs;
struct ceph_aio_request *aio_req = NULL;
int num_pages = 0;
int flags;
@@ -914,10 +954,14 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
}
while (iov_iter_count(iter) > 0) {
- u64 size = dio_get_pagev_size(iter);
- size_t start = 0;
+ u64 size = iov_iter_count(iter);
ssize_t len;
+ if (write)
+ size = min_t(u64, size, fsc->mount_options->wsize);
+ else
+ size = min_t(u64, size, fsc->mount_options->rsize);
+
vino = ceph_vino(inode);
req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout,
vino, pos, &size, 0,
@@ -933,18 +977,14 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
break;
}
- if (write)
- size = min_t(u64, size, fsc->mount_options->wsize);
- else
- size = min_t(u64, size, fsc->mount_options->rsize);
-
- len = size;
- pages = dio_get_pages_alloc(iter, len, &start, &num_pages);
- if (IS_ERR(pages)) {
+ len = iter_get_bvecs_alloc(iter, size, &bvecs, &num_pages);
+ if (len < 0) {
ceph_osdc_put_request(req);
- ret = PTR_ERR(pages);
+ ret = len;
break;
}
+ if (len != size)
+ osd_req_op_extent_update(req, 0, len);
/*
* To simplify error handling, allow AIO when IO within i_size
@@ -977,8 +1017,7 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
req->r_mtime = mtime;
}
- osd_req_op_extent_osd_data_pages(req, 0, pages, len, start,
- false, false);
+ osd_req_op_extent_osd_data_bvecs(req, 0, bvecs, num_pages, len);
if (aio_req) {
aio_req->total_len += len;
@@ -991,7 +1030,6 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
list_add_tail(&req->r_unsafe_item, &aio_req->osd_reqs);
pos += len;
- iov_iter_advance(iter, len);
continue;
}
@@ -1004,25 +1042,26 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
if (ret == -ENOENT)
ret = 0;
if (ret >= 0 && ret < len && pos + ret < size) {
+ struct iov_iter i;
int zlen = min_t(size_t, len - ret,
size - pos - ret);
- ceph_zero_page_vector_range(start + ret, zlen,
- pages);
+
+ iov_iter_bvec(&i, ITER_BVEC, bvecs, num_pages,
+ len);
+ iov_iter_advance(&i, ret);
+ iov_iter_zero(zlen, &i);
ret += zlen;
}
if (ret >= 0)
len = ret;
}
- ceph_put_page_vector(pages, num_pages, should_dirty);
-
+ put_bvecs(bvecs, num_pages, should_dirty);
ceph_osdc_put_request(req);
if (ret < 0)
break;
pos += len;
- iov_iter_advance(iter, len);
-
if (!write && pos >= size)
break;
diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig
index 741749a98614..5f132d59dfc2 100644
--- a/fs/cifs/Kconfig
+++ b/fs/cifs/Kconfig
@@ -197,7 +197,7 @@ config CIFS_SMB311
config CIFS_SMB_DIRECT
bool "SMB Direct support (Experimental)"
- depends on CIFS=m && INFINIBAND || CIFS=y && INFINIBAND=y
+ depends on CIFS=m && INFINIBAND && INFINIBAND_ADDR_TRANS || CIFS=y && INFINIBAND=y && INFINIBAND_ADDR_TRANS=y
help
Enables SMB Direct experimental support for SMB 3.0, 3.02 and 3.1.1.
SMB Direct allows transferring SMB packets over RDMA. If unsure,
diff --git a/fs/cifs/Makefile b/fs/cifs/Makefile
index 7e4a1e2f0696..85817991ee68 100644
--- a/fs/cifs/Makefile
+++ b/fs/cifs/Makefile
@@ -1,11 +1,12 @@
# SPDX-License-Identifier: GPL-2.0
#
-# Makefile for Linux CIFS VFS client
+# Makefile for Linux CIFS/SMB2/SMB3 VFS client
#
+ccflags-y += -I$(src) # needed for trace events
obj-$(CONFIG_CIFS) += cifs.o
-cifs-y := cifsfs.o cifssmb.o cifs_debug.o connect.o dir.o file.o inode.o \
- link.o misc.o netmisc.o smbencrypt.o transport.o asn1.o \
+cifs-y := trace.o cifsfs.o cifssmb.o cifs_debug.o connect.o dir.o file.o \
+ inode.o link.o misc.o netmisc.o smbencrypt.o transport.o asn1.o \
cifs_unicode.o nterr.o cifsencrypt.o \
readdir.o ioctl.o sess.o export.o smb1ops.o winucase.o \
smb2ops.o smb2maperror.o smb2transport.o \
diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c
index 9d69ea433330..116146022aa1 100644
--- a/fs/cifs/cifs_debug.c
+++ b/fs/cifs/cifs_debug.c
@@ -42,7 +42,7 @@ cifs_dump_mem(char *label, void *data, int length)
data, length, true);
}
-void cifs_dump_detail(void *buf)
+void cifs_dump_detail(void *buf, struct TCP_Server_Info *server)
{
#ifdef CONFIG_CIFS_DEBUG2
struct smb_hdr *smb = (struct smb_hdr *)buf;
@@ -50,7 +50,8 @@ void cifs_dump_detail(void *buf)
cifs_dbg(VFS, "Cmd: %d Err: 0x%x Flags: 0x%x Flgs2: 0x%x Mid: %d Pid: %d\n",
smb->Command, smb->Status.CifsError,
smb->Flags, smb->Flags2, smb->Mid, smb->Pid);
- cifs_dbg(VFS, "smb buf %p len %u\n", smb, smbCalcSize(smb));
+ cifs_dbg(VFS, "smb buf %p len %u\n", smb,
+ server->ops->calc_smb_size(smb, server));
#endif /* CONFIG_CIFS_DEBUG2 */
}
@@ -83,7 +84,7 @@ void cifs_dump_mids(struct TCP_Server_Info *server)
cifs_dbg(VFS, "IsMult: %d IsEnd: %d\n",
mid_entry->multiRsp, mid_entry->multiEnd);
if (mid_entry->resp_buf) {
- cifs_dump_detail(mid_entry->resp_buf);
+ cifs_dump_detail(mid_entry->resp_buf, server);
cifs_dump_mem("existing buf: ",
mid_entry->resp_buf, 62);
}
@@ -113,6 +114,8 @@ static void cifs_debug_tcon(struct seq_file *m, struct cifs_tcon *tcon)
seq_printf(m, " type: %d ", dev_type);
if (tcon->seal)
seq_printf(m, " Encrypted");
+ if (tcon->nocase)
+ seq_printf(m, " nocase");
if (tcon->unix_ext)
seq_printf(m, " POSIX Extensions");
if (tcon->ses->server->ops->dump_share_caps)
@@ -237,6 +240,10 @@ skip_rdma:
server->credits, server->dialect);
if (server->sign)
seq_printf(m, " signed");
+#ifdef CONFIG_CIFS_SMB311
+ if (server->posix_ext_supported)
+ seq_printf(m, " posix");
+#endif /* 3.1.1 */
i++;
list_for_each(tmp2, &server->smb_ses_list) {
ses = list_entry(tmp2, struct cifs_ses,
@@ -314,18 +321,6 @@ skip_rdma:
return 0;
}
-static int cifs_debug_data_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, cifs_debug_data_proc_show, NULL);
-}
-
-static const struct file_operations cifs_debug_data_proc_fops = {
- .open = cifs_debug_data_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
#ifdef CONFIG_CIFS_STATS
static ssize_t cifs_stats_proc_write(struct file *file,
const char __user *buffer, size_t count, loff_t *ppos)
@@ -497,35 +492,36 @@ cifs_proc_init(void)
if (proc_fs_cifs == NULL)
return;
- proc_create("DebugData", 0, proc_fs_cifs, &cifs_debug_data_proc_fops);
+ proc_create_single("DebugData", 0, proc_fs_cifs,
+ cifs_debug_data_proc_show);
#ifdef CONFIG_CIFS_STATS
- proc_create("Stats", 0, proc_fs_cifs, &cifs_stats_proc_fops);
+ proc_create("Stats", 0644, proc_fs_cifs, &cifs_stats_proc_fops);
#endif /* STATS */
- proc_create("cifsFYI", 0, proc_fs_cifs, &cifsFYI_proc_fops);
- proc_create("traceSMB", 0, proc_fs_cifs, &traceSMB_proc_fops);
- proc_create("LinuxExtensionsEnabled", 0, proc_fs_cifs,
+ proc_create("cifsFYI", 0644, proc_fs_cifs, &cifsFYI_proc_fops);
+ proc_create("traceSMB", 0644, proc_fs_cifs, &traceSMB_proc_fops);
+ proc_create("LinuxExtensionsEnabled", 0644, proc_fs_cifs,
&cifs_linux_ext_proc_fops);
- proc_create("SecurityFlags", 0, proc_fs_cifs,
+ proc_create("SecurityFlags", 0644, proc_fs_cifs,
&cifs_security_flags_proc_fops);
- proc_create("LookupCacheEnabled", 0, proc_fs_cifs,
+ proc_create("LookupCacheEnabled", 0644, proc_fs_cifs,
&cifs_lookup_cache_proc_fops);
#ifdef CONFIG_CIFS_SMB_DIRECT
- proc_create("rdma_readwrite_threshold", 0, proc_fs_cifs,
+ proc_create("rdma_readwrite_threshold", 0644, proc_fs_cifs,
&cifs_rdma_readwrite_threshold_proc_fops);
- proc_create("smbd_max_frmr_depth", 0, proc_fs_cifs,
+ proc_create("smbd_max_frmr_depth", 0644, proc_fs_cifs,
&cifs_smbd_max_frmr_depth_proc_fops);
- proc_create("smbd_keep_alive_interval", 0, proc_fs_cifs,
+ proc_create("smbd_keep_alive_interval", 0644, proc_fs_cifs,
&cifs_smbd_keep_alive_interval_proc_fops);
- proc_create("smbd_max_receive_size", 0, proc_fs_cifs,
+ proc_create("smbd_max_receive_size", 0644, proc_fs_cifs,
&cifs_smbd_max_receive_size_proc_fops);
- proc_create("smbd_max_fragmented_recv_size", 0, proc_fs_cifs,
+ proc_create("smbd_max_fragmented_recv_size", 0644, proc_fs_cifs,
&cifs_smbd_max_fragmented_recv_size_proc_fops);
- proc_create("smbd_max_send_size", 0, proc_fs_cifs,
+ proc_create("smbd_max_send_size", 0644, proc_fs_cifs,
&cifs_smbd_max_send_size_proc_fops);
- proc_create("smbd_send_credit_target", 0, proc_fs_cifs,
+ proc_create("smbd_send_credit_target", 0644, proc_fs_cifs,
&cifs_smbd_send_credit_target_proc_fops);
- proc_create("smbd_receive_credit_max", 0, proc_fs_cifs,
+ proc_create("smbd_receive_credit_max", 0644, proc_fs_cifs,
&cifs_smbd_receive_credit_max_proc_fops);
#endif
}
@@ -583,6 +579,8 @@ static ssize_t cifsFYI_proc_write(struct file *file, const char __user *buffer,
cifsFYI = bv;
else if ((c[0] > '1') && (c[0] <= '9'))
cifsFYI = (int) (c[0] - '0'); /* see cifs_debug.h for meanings */
+ else
+ return -EINVAL;
return count;
}
diff --git a/fs/cifs/cifs_debug.h b/fs/cifs/cifs_debug.h
index 0e74690d11bc..f4f3f0853c6e 100644
--- a/fs/cifs/cifs_debug.h
+++ b/fs/cifs/cifs_debug.h
@@ -23,7 +23,7 @@
#define _H_CIFS_DEBUG
void cifs_dump_mem(char *label, void *data, int length);
-void cifs_dump_detail(void *);
+void cifs_dump_detail(void *buf, struct TCP_Server_Info *ptcp_info);
void cifs_dump_mids(struct TCP_Server_Info *);
extern bool traceSMB; /* flag which enables the function below */
void dump_smb(void *, int);
diff --git a/fs/cifs/cifs_fs_sb.h b/fs/cifs/cifs_fs_sb.h
index 350fa55a1bf7..9731d0d891e7 100644
--- a/fs/cifs/cifs_fs_sb.h
+++ b/fs/cifs/cifs_fs_sb.h
@@ -50,6 +50,7 @@
* root mountable
*/
#define CIFS_MOUNT_UID_FROM_ACL 0x2000000 /* try to get UID via special SID */
+#define CIFS_MOUNT_NO_HANDLE_CACHE 0x4000000 /* disable caching dir handles */
struct cifs_sb_info {
struct rb_root tlink_tree;
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index f715609b13f3..eb7b6573f322 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -58,13 +58,15 @@ bool traceSMB;
bool enable_oplocks = true;
bool linuxExtEnabled = true;
bool lookupCacheEnabled = true;
+bool disable_legacy_dialects; /* false by default */
unsigned int global_secflags = CIFSSEC_DEF;
/* unsigned int ntlmv2_support = 0; */
unsigned int sign_CIFS_PDUs = 1;
static const struct super_operations cifs_super_ops;
unsigned int CIFSMaxBufSize = CIFS_MAX_MSGSIZE;
module_param(CIFSMaxBufSize, uint, 0444);
-MODULE_PARM_DESC(CIFSMaxBufSize, "Network buffer size (not including header). "
+MODULE_PARM_DESC(CIFSMaxBufSize, "Network buffer size (not including header) "
+ "for CIFS requests. "
"Default: 16384 Range: 8192 to 130048");
unsigned int cifs_min_rcv = CIFS_MIN_RCV_POOL;
module_param(cifs_min_rcv, uint, 0444);
@@ -76,11 +78,21 @@ MODULE_PARM_DESC(cifs_min_small, "Small network buffers in pool. Default: 30 "
"Range: 2 to 256");
unsigned int cifs_max_pending = CIFS_MAX_REQ;
module_param(cifs_max_pending, uint, 0444);
-MODULE_PARM_DESC(cifs_max_pending, "Simultaneous requests to server. "
+MODULE_PARM_DESC(cifs_max_pending, "Simultaneous requests to server for "
+ "CIFS/SMB1 dialect (N/A for SMB3) "
"Default: 32767 Range: 2 to 32767.");
module_param(enable_oplocks, bool, 0644);
MODULE_PARM_DESC(enable_oplocks, "Enable or disable oplocks. Default: y/Y/1");
+module_param(disable_legacy_dialects, bool, 0644);
+MODULE_PARM_DESC(disable_legacy_dialects, "To improve security it may be "
+ "helpful to restrict the ability to "
+ "override the default dialects (SMB2.1, "
+ "SMB3 and SMB3.02) on mount with old "
+ "dialects (CIFS/SMB1 and SMB2) since "
+ "vers=1.0 (CIFS/SMB1) and vers=2.0 are weaker"
+ " and less secure. Default: n/N/0");
+
extern mempool_t *cifs_sm_req_poolp;
extern mempool_t *cifs_req_poolp;
extern mempool_t *cifs_mid_poolp;
@@ -469,10 +481,20 @@ cifs_show_options(struct seq_file *s, struct dentry *root)
seq_puts(s, ",persistenthandles");
else if (tcon->use_resilient)
seq_puts(s, ",resilienthandles");
+
+#ifdef CONFIG_CIFS_SMB311
+ if (tcon->posix_extensions)
+ seq_puts(s, ",posix");
+ else if (tcon->unix_ext)
+ seq_puts(s, ",unix");
+ else
+ seq_puts(s, ",nounix");
+#else
if (tcon->unix_ext)
seq_puts(s, ",unix");
else
seq_puts(s, ",nounix");
+#endif /* SMB311 */
if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS)
seq_puts(s, ",posixpaths");
if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID)
@@ -495,6 +517,8 @@ cifs_show_options(struct seq_file *s, struct dentry *root)
seq_puts(s, ",sfu");
if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
seq_puts(s, ",nobrl");
+ if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_HANDLE_CACHE)
+ seq_puts(s, ",nohandlecache");
if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL)
seq_puts(s, ",cifsacl");
if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM)
@@ -897,6 +921,17 @@ struct file_system_type cifs_fs_type = {
/* .fs_flags */
};
MODULE_ALIAS_FS("cifs");
+
+static struct file_system_type smb3_fs_type = {
+ .owner = THIS_MODULE,
+ .name = "smb3",
+ .mount = cifs_do_mount,
+ .kill_sb = cifs_kill_sb,
+ /* .fs_flags */
+};
+MODULE_ALIAS_FS("smb3");
+MODULE_ALIAS("smb3");
+
const struct inode_operations cifs_dir_inode_ops = {
.create = cifs_create,
.atomic_open = cifs_atomic_open,
@@ -1047,6 +1082,18 @@ out:
return rc;
}
+/*
+ * Directory operations under CIFS/SMB2/SMB3 are synchronous, so fsync()
+ * is a dummy operation.
+ */
+static int cifs_dir_fsync(struct file *file, loff_t start, loff_t end, int datasync)
+{
+ cifs_dbg(FYI, "Sync directory - name: %pD datasync: 0x%x\n",
+ file, datasync);
+
+ return 0;
+}
+
static ssize_t cifs_copy_file_range(struct file *src_file, loff_t off,
struct file *dst_file, loff_t destoff,
size_t len, unsigned int flags)
@@ -1181,6 +1228,7 @@ const struct file_operations cifs_dir_ops = {
.copy_file_range = cifs_copy_file_range,
.clone_file_range = cifs_clone_file_range,
.llseek = generic_file_llseek,
+ .fsync = cifs_dir_fsync,
};
static void
@@ -1422,6 +1470,12 @@ init_cifs(void)
if (rc)
goto out_init_cifs_idmap;
+ rc = register_filesystem(&smb3_fs_type);
+ if (rc) {
+ unregister_filesystem(&cifs_fs_type);
+ goto out_init_cifs_idmap;
+ }
+
return 0;
out_init_cifs_idmap:
@@ -1452,8 +1506,9 @@ out_clean_proc:
static void __exit
exit_cifs(void)
{
- cifs_dbg(NOISY, "exit_cifs\n");
+ cifs_dbg(NOISY, "exit_smb3\n");
unregister_filesystem(&cifs_fs_type);
+ unregister_filesystem(&smb3_fs_type);
cifs_dfs_release_automount_timer();
#ifdef CONFIG_CIFS_ACL
exit_cifs_idmap();
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 013ba2aed8d9..5f0231803431 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -149,5 +149,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
extern const struct export_operations cifs_export_ops;
#endif /* CONFIG_CIFS_NFSD_EXPORT */
-#define CIFS_VERSION "2.11"
+#define CIFS_VERSION "2.12"
#endif /* _CIFSFS_H */
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index cb950a5fa078..08d1cdd96701 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -176,6 +176,7 @@ struct smb_rqst {
struct kvec *rq_iov; /* array of kvecs */
unsigned int rq_nvec; /* number of kvecs in array */
struct page **rq_pages; /* pointer to array of page ptrs */
+ unsigned int rq_offset; /* the offset to the 1st page */
unsigned int rq_npages; /* number pages in array */
unsigned int rq_pagesz; /* page size to use */
unsigned int rq_tailsz; /* length of last page */
@@ -244,7 +245,7 @@ struct smb_version_operations {
int (*map_error)(char *, bool);
/* find mid corresponding to the response message */
struct mid_q_entry * (*find_mid)(struct TCP_Server_Info *, char *);
- void (*dump_detail)(void *);
+ void (*dump_detail)(void *buf, struct TCP_Server_Info *ptcp_info);
void (*clear_stats)(struct cifs_tcon *);
void (*print_stats)(struct seq_file *m, struct cifs_tcon *);
void (*dump_share_caps)(struct seq_file *, struct cifs_tcon *);
@@ -372,7 +373,7 @@ struct smb_version_operations {
int (*close_dir)(const unsigned int, struct cifs_tcon *,
struct cifs_fid *);
/* calculate a size of SMB message */
- unsigned int (*calc_smb_size)(void *);
+ unsigned int (*calc_smb_size)(void *buf, struct TCP_Server_Info *ptcpi);
/* check for STATUS_PENDING and process it in a positive case */
bool (*is_status_pending)(char *, struct TCP_Server_Info *, int);
/* check for STATUS_NETWORK_SESSION_EXPIRED */
@@ -417,7 +418,7 @@ struct smb_version_operations {
/* create lease context buffer for CREATE request */
char * (*create_lease_buf)(u8 *, u8);
/* parse lease context buffer and return oplock/epoch info */
- __u8 (*parse_lease_buf)(void *, unsigned int *);
+ __u8 (*parse_lease_buf)(void *buf, unsigned int *epoch, char *lkey);
ssize_t (*copychunk_range)(const unsigned int,
struct cifsFileInfo *src_file,
struct cifsFileInfo *target_file,
@@ -457,7 +458,7 @@ struct smb_version_operations {
struct mid_q_entry **);
enum securityEnum (*select_sectype)(struct TCP_Server_Info *,
enum securityEnum);
-
+ int (*next_header)(char *);
};
struct smb_version_values {
@@ -521,10 +522,12 @@ struct smb_vol {
bool sfu_remap:1; /* remap seven reserved chars ala SFU */
bool posix_paths:1; /* unset to not ask for posix pathnames. */
bool no_linux_ext:1;
+ bool linux_ext:1;
bool sfu_emul:1;
bool nullauth:1; /* attempt to authenticate with null user */
bool nocase:1; /* request case insensitive filenames */
bool nobrl:1; /* disable sending byte range locks to srv */
+ bool nohandlecache:1; /* disable caching dir handles if srvr probs */
bool mand_lock:1; /* send mandatory not posix byte range lock reqs */
bool seal:1; /* request transport encryption on share */
bool nodfs:1; /* Do not request DFS, even if available */
@@ -630,7 +633,7 @@ struct TCP_Server_Info {
bool oplocks:1; /* enable oplocks */
unsigned int maxReq; /* Clients should submit no more */
/* than maxReq distinct unanswered SMBs to the server when using */
- /* multiplexed reads or writes */
+ /* multiplexed reads or writes (for SMB1/CIFS only, not SMB2/SMB3) */
unsigned int maxBuf; /* maxBuf specifies the maximum */
/* message size the server can send or receive for non-raw SMBs */
/* maxBuf is returned by SMB NegotiateProtocol so maxBuf is only 0 */
@@ -681,6 +684,7 @@ struct TCP_Server_Info {
__le16 cipher_type;
/* save initital negprot hash */
__u8 preauth_sha_hash[SMB2_PREAUTH_HASH_SIZE];
+ bool posix_ext_supported;
#endif /* 3.1.1 */
struct delayed_work reconnect; /* reconnect workqueue job */
struct mutex reconnect_mutex; /* prevent simultaneous reconnects */
@@ -953,9 +957,13 @@ struct cifs_tcon {
bool print:1; /* set if connection to printer share */
bool retry:1;
bool nocase:1;
+ bool nohandlecache:1; /* if strange server resource prob can turn off */
bool seal:1; /* transport encryption for this mounted share */
bool unix_ext:1; /* if false disable Linux extensions to CIFS protocol
for this mount even if server would support */
+#ifdef CONFIG_CIFS_SMB311
+ bool posix_extensions; /* if true SMB3.11 posix extensions enabled */
+#endif /* CIFS_311 */
bool local_lease:1; /* check leases (only) on local system not remote */
bool broken_posix_open; /* e.g. Samba server versions < 3.3.2, 3.2.9 */
bool broken_sparse_sup; /* if server or share does not support sparse */
@@ -979,6 +987,9 @@ struct cifs_tcon {
struct fscache_cookie *fscache; /* cookie for share */
#endif
struct list_head pending_opens; /* list of incomplete opens */
+ bool valid_root_fid:1; /* Do we have a useable root fid */
+ struct mutex prfid_mutex; /* prevents reopen race after dead ses*/
+ struct cifs_fid *prfid; /* handle to the directory at top of share */
/* BB add field for back pointer to sb struct(s)? */
};
@@ -1071,6 +1082,7 @@ struct cifs_open_parms {
int create_options;
const char *path;
struct cifs_fid *fid;
+ umode_t mode;
bool reconnect:1;
};
@@ -1169,10 +1181,11 @@ struct cifs_readdata {
struct smbd_mr *mr;
#endif
unsigned int pagesz;
+ unsigned int page_offset;
unsigned int tailsz;
unsigned int credits;
unsigned int nr_pages;
- struct page *pages[];
+ struct page **pages;
};
struct cifs_writedata;
@@ -1194,10 +1207,11 @@ struct cifs_writedata {
struct smbd_mr *mr;
#endif
unsigned int pagesz;
+ unsigned int page_offset;
unsigned int tailsz;
unsigned int credits;
unsigned int nr_pages;
- struct page *pages[];
+ struct page **pages;
};
/*
@@ -1692,16 +1706,17 @@ GLOBAL_EXTERN atomic_t smBufAllocCount;
GLOBAL_EXTERN atomic_t midCount;
/* Misc globals */
-GLOBAL_EXTERN bool enable_oplocks; /* enable or disable oplocks */
-GLOBAL_EXTERN bool lookupCacheEnabled;
-GLOBAL_EXTERN unsigned int global_secflags; /* if on, session setup sent
+extern bool enable_oplocks; /* enable or disable oplocks */
+extern bool lookupCacheEnabled;
+extern unsigned int global_secflags; /* if on, session setup sent
with more secure ntlmssp2 challenge/resp */
-GLOBAL_EXTERN unsigned int sign_CIFS_PDUs; /* enable smb packet signing */
-GLOBAL_EXTERN bool linuxExtEnabled;/*enable Linux/Unix CIFS extensions*/
-GLOBAL_EXTERN unsigned int CIFSMaxBufSize; /* max size not including hdr */
-GLOBAL_EXTERN unsigned int cifs_min_rcv; /* min size of big ntwrk buf pool */
-GLOBAL_EXTERN unsigned int cifs_min_small; /* min size of small buf pool */
-GLOBAL_EXTERN unsigned int cifs_max_pending; /* MAX requests at once to server*/
+extern unsigned int sign_CIFS_PDUs; /* enable smb packet signing */
+extern bool linuxExtEnabled;/*enable Linux/Unix CIFS extensions*/
+extern unsigned int CIFSMaxBufSize; /* max size not including hdr */
+extern unsigned int cifs_min_rcv; /* min size of big ntwrk buf pool */
+extern unsigned int cifs_min_small; /* min size of small buf pool */
+extern unsigned int cifs_max_pending; /* MAX requests at once to server*/
+extern bool disable_legacy_dialects; /* forbid vers=1.0 and vers=2.0 mounts */
#ifdef CONFIG_CIFS_ACL
GLOBAL_EXTERN struct rb_root uidtree;
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index 365a414a75e9..7933c5f9c076 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -21,6 +21,7 @@
#ifndef _CIFSPROTO_H
#define _CIFSPROTO_H
#include <linux/nls.h>
+#include "trace.h"
struct statfs;
struct smb_vol;
@@ -47,6 +48,7 @@ extern void _free_xid(unsigned int);
cifs_dbg(FYI, "CIFS VFS: in %s as Xid: %u with uid: %d\n", \
__func__, __xid, \
from_kuid(&init_user_ns, current_fsuid())); \
+ trace_smb3_enter(__xid, __func__); \
__xid; \
})
@@ -54,7 +56,11 @@ extern void _free_xid(unsigned int);
do { \
_free_xid(curr_xid); \
cifs_dbg(FYI, "CIFS VFS: leaving %s (xid = %u) rc = %d\n", \
- __func__, curr_xid, (int)rc); \
+ __func__, curr_xid, (int)rc); \
+ if (rc) \
+ trace_smb3_exit_err(curr_xid, __func__, (int)rc); \
+ else \
+ trace_smb3_exit_done(curr_xid, __func__); \
} while (0)
extern int init_cifs_idmap(void);
extern void exit_cifs_idmap(void);
@@ -124,7 +130,7 @@ extern void cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
unsigned int bytes_written);
extern struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *, bool);
extern struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *, bool);
-extern unsigned int smbCalcSize(void *buf);
+extern unsigned int smbCalcSize(void *buf, struct TCP_Server_Info *server);
extern int decode_negTokenInit(unsigned char *security_blob, int length,
struct TCP_Server_Info *server);
extern int cifs_convert_address(struct sockaddr *dst, const char *src, int len);
@@ -197,7 +203,9 @@ extern void dequeue_mid(struct mid_q_entry *mid, bool malformed);
extern int cifs_read_from_socket(struct TCP_Server_Info *server, char *buf,
unsigned int to_read);
extern int cifs_read_page_from_socket(struct TCP_Server_Info *server,
- struct page *page, unsigned int to_read);
+ struct page *page,
+ unsigned int page_offset,
+ unsigned int to_read);
extern int cifs_setup_cifs_sb(struct smb_vol *pvolume_info,
struct cifs_sb_info *cifs_sb);
extern int cifs_match_super(struct super_block *, void *);
@@ -525,6 +533,8 @@ int cifs_async_writev(struct cifs_writedata *wdata,
void cifs_writev_complete(struct work_struct *work);
struct cifs_writedata *cifs_writedata_alloc(unsigned int nr_pages,
work_func_t complete);
+struct cifs_writedata *cifs_writedata_direct_alloc(struct page **pages,
+ work_func_t complete);
void cifs_writedata_release(struct kref *refcount);
int cifs_query_mf_symlink(unsigned int xid, struct cifs_tcon *tcon,
struct cifs_sb_info *cifs_sb,
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 1529a088383d..5aca336642c0 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -106,6 +106,12 @@ cifs_mark_open_files_invalid(struct cifs_tcon *tcon)
open_file->oplock_break_cancelled = true;
}
spin_unlock(&tcon->open_file_lock);
+
+ mutex_lock(&tcon->prfid_mutex);
+ tcon->valid_root_fid = false;
+ memset(tcon->prfid, 0, sizeof(struct cifs_fid));
+ mutex_unlock(&tcon->prfid_mutex);
+
/*
* BB Add call to invalidate_inodes(sb) for all superblocks mounted
* to this tcon.
@@ -1946,6 +1952,7 @@ cifs_writedata_release(struct kref *refcount)
if (wdata->cfile)
cifsFileInfo_put(wdata->cfile);
+ kvfree(wdata->pages);
kfree(wdata);
}
@@ -2069,12 +2076,22 @@ cifs_writev_complete(struct work_struct *work)
struct cifs_writedata *
cifs_writedata_alloc(unsigned int nr_pages, work_func_t complete)
{
+ struct page **pages =
+ kzalloc(sizeof(struct page *) * nr_pages, GFP_NOFS);
+ if (pages)
+ return cifs_writedata_direct_alloc(pages, complete);
+
+ return NULL;
+}
+
+struct cifs_writedata *
+cifs_writedata_direct_alloc(struct page **pages, work_func_t complete)
+{
struct cifs_writedata *wdata;
- /* writedata + number of page pointers */
- wdata = kzalloc(sizeof(*wdata) +
- sizeof(struct page *) * nr_pages, GFP_NOFS);
+ wdata = kzalloc(sizeof(*wdata), GFP_NOFS);
if (wdata != NULL) {
+ wdata->pages = pages;
kref_init(&wdata->refcount);
INIT_LIST_HEAD(&wdata->list);
init_completion(&wdata->done);
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index a5aa158d535a..e5a2fe7f0dd4 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -61,6 +61,7 @@
#define RFC1001_PORT 139
extern mempool_t *cifs_req_poolp;
+extern bool disable_legacy_dialects;
/* FIXME: should these be tunable? */
#define TLINK_ERROR_EXPIRE (1 * HZ)
@@ -76,9 +77,10 @@ enum {
Opt_mapposix, Opt_nomapposix,
Opt_mapchars, Opt_nomapchars, Opt_sfu,
Opt_nosfu, Opt_nodfs, Opt_posixpaths,
- Opt_noposixpaths, Opt_nounix,
+ Opt_noposixpaths, Opt_nounix, Opt_unix,
Opt_nocase,
Opt_brl, Opt_nobrl,
+ Opt_handlecache, Opt_nohandlecache,
Opt_forcemandatorylock, Opt_setuidfromacl, Opt_setuids,
Opt_nosetuids, Opt_dynperm, Opt_nodynperm,
Opt_nohard, Opt_nosoft,
@@ -144,10 +146,16 @@ static const match_table_t cifs_mount_option_tokens = {
{ Opt_noposixpaths, "noposixpaths" },
{ Opt_nounix, "nounix" },
{ Opt_nounix, "nolinux" },
+ { Opt_nounix, "noposix" },
+ { Opt_unix, "unix" },
+ { Opt_unix, "linux" },
+ { Opt_unix, "posix" },
{ Opt_nocase, "nocase" },
{ Opt_nocase, "ignorecase" },
{ Opt_brl, "brl" },
{ Opt_nobrl, "nobrl" },
+ { Opt_handlecache, "handlecache" },
+ { Opt_nohandlecache, "nohandlecache" },
{ Opt_nobrl, "nolock" },
{ Opt_forcemandatorylock, "forcemandatorylock" },
{ Opt_forcemandatorylock, "forcemand" },
@@ -591,10 +599,11 @@ cifs_read_from_socket(struct TCP_Server_Info *server, char *buf,
int
cifs_read_page_from_socket(struct TCP_Server_Info *server, struct page *page,
- unsigned int to_read)
+ unsigned int page_offset, unsigned int to_read)
{
struct msghdr smb_msg;
- struct bio_vec bv = {.bv_page = page, .bv_len = to_read};
+ struct bio_vec bv = {
+ .bv_page = page, .bv_len = to_read, .bv_offset = page_offset};
iov_iter_bvec(&smb_msg.msg_iter, READ | ITER_BVEC, &bv, 1, to_read);
return cifs_readv_from_socket(server, &smb_msg);
}
@@ -848,6 +857,7 @@ cifs_demultiplex_thread(void *p)
int length;
struct TCP_Server_Info *server = p;
unsigned int pdu_length;
+ unsigned int next_offset;
char *buf = NULL;
struct task_struct *task_to_wake = NULL;
struct mid_q_entry *mid_entry;
@@ -874,24 +884,29 @@ cifs_demultiplex_thread(void *p)
length = cifs_read_from_socket(server, buf, pdu_length);
if (length < 0)
continue;
- server->total_read = length;
+
+ if (server->vals->header_preamble_size == 0)
+ server->total_read = 0;
+ else
+ server->total_read = length;
/*
* The right amount was read from socket - 4 bytes,
* so we can now interpret the length field.
*/
pdu_length = get_rfc1002_length(buf);
- server->pdu_size = pdu_length;
cifs_dbg(FYI, "RFC1002 header 0x%x\n", pdu_length);
if (!is_smb_response(server, buf[0]))
continue;
+next_pdu:
+ server->pdu_size = pdu_length;
/* make sure we have enough to get to the MID */
- if (pdu_length < HEADER_SIZE(server) - 1 -
+ if (server->pdu_size < HEADER_SIZE(server) - 1 -
server->vals->header_preamble_size) {
cifs_dbg(VFS, "SMB response too short (%u bytes)\n",
- pdu_length);
+ server->pdu_size);
cifs_reconnect(server);
wake_up(&server->response_q);
continue;
@@ -906,6 +921,12 @@ cifs_demultiplex_thread(void *p)
continue;
server->total_read += length;
+ if (server->ops->next_header) {
+ next_offset = server->ops->next_header(buf);
+ if (next_offset)
+ server->pdu_size = next_offset;
+ }
+
if (server->ops->is_transform_hdr &&
server->ops->receive_transform &&
server->ops->is_transform_hdr(buf)) {
@@ -948,10 +969,18 @@ cifs_demultiplex_thread(void *p)
HEADER_SIZE(server));
#ifdef CONFIG_CIFS_DEBUG2
if (server->ops->dump_detail)
- server->ops->dump_detail(buf);
+ server->ops->dump_detail(buf, server);
cifs_dump_mids(server);
#endif /* CIFS_DEBUG2 */
-
+ }
+ if (pdu_length > server->pdu_size) {
+ if (!allocate_buffers(server))
+ continue;
+ pdu_length -= server->pdu_size;
+ server->total_read = 0;
+ server->large_buf = false;
+ buf = server->smallbuf;
+ goto next_pdu;
}
} /* end while !EXITING */
@@ -1143,10 +1172,18 @@ cifs_parse_smb_version(char *value, struct smb_vol *vol)
switch (match_token(value, cifs_smb_version_tokens, args)) {
case Smb_1:
+ if (disable_legacy_dialects) {
+ cifs_dbg(VFS, "mount with legacy dialect disabled\n");
+ return 1;
+ }
vol->ops = &smb1_operations;
vol->vals = &smb1_values;
break;
case Smb_20:
+ if (disable_legacy_dialects) {
+ cifs_dbg(VFS, "mount with legacy dialect disabled\n");
+ return 1;
+ }
vol->ops = &smb20_operations;
vol->vals = &smb20_values;
break;
@@ -1426,8 +1463,17 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
vol->posix_paths = 0;
break;
case Opt_nounix:
+ if (vol->linux_ext)
+ cifs_dbg(VFS,
+ "conflicting unix mount options\n");
vol->no_linux_ext = 1;
break;
+ case Opt_unix:
+ if (vol->no_linux_ext)
+ cifs_dbg(VFS,
+ "conflicting unix mount options\n");
+ vol->linux_ext = 1;
+ break;
case Opt_nocase:
vol->nocase = 1;
break;
@@ -1445,6 +1491,12 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
(S_IALLUGO & ~(S_ISUID | S_IXGRP)))
vol->file_mode = S_IALLUGO;
break;
+ case Opt_nohandlecache:
+ vol->nohandlecache = 1;
+ break;
+ case Opt_handlecache:
+ vol->nohandlecache = 0;
+ break;
case Opt_forcemandatorylock:
vol->mand_lock = 1;
break;
@@ -1977,14 +2029,6 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
goto cifs_parse_mount_err;
}
-#ifdef CONFIG_CIFS_SMB_DIRECT
- if (vol->rdma && vol->sign) {
- cifs_dbg(VFS, "Currently SMB direct doesn't support signing."
- " This is being fixed\n");
- goto cifs_parse_mount_err;
- }
-#endif
-
#ifndef CONFIG_KEYS
/* Muliuser mounts require CONFIG_KEYS support */
if (vol->multiuser) {
@@ -2975,6 +3019,13 @@ cifs_get_tcon(struct cifs_ses *ses, struct smb_vol *volume_info)
}
}
+#ifdef CONFIG_CIFS_SMB311
+ if ((volume_info->linux_ext) && (ses->server->posix_ext_supported)) {
+ if (ses->server->vals->protocol_id == SMB311_PROT_ID)
+ tcon->posix_extensions = true;
+ }
+#endif /* 311 */
+
/*
* BB Do we need to wrap session_mutex around this TCon call and Unix
* SetFS as we do on SessSetup and reconnect?
@@ -3030,6 +3081,7 @@ cifs_get_tcon(struct cifs_ses *ses, struct smb_vol *volume_info)
*/
tcon->retry = volume_info->retry;
tcon->nocase = volume_info->nocase;
+ tcon->nohandlecache = volume_info->nohandlecache;
tcon->local_lease = volume_info->local_lease;
INIT_LIST_HEAD(&tcon->pending_opens);
@@ -3588,6 +3640,8 @@ int cifs_setup_cifs_sb(struct smb_vol *pvolume_info,
cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_UNX_EMUL;
if (pvolume_info->nobrl)
cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NO_BRL;
+ if (pvolume_info->nohandlecache)
+ cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NO_HANDLE_CACHE;
if (pvolume_info->nostrictsync)
cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NOSSYNC;
if (pvolume_info->mand_lock)
@@ -3930,6 +3984,12 @@ try_mount_again:
goto remote_path_check;
}
+#ifdef CONFIG_CIFS_SMB311
+ /* if new SMB3.11 POSIX extensions are supported do not remap / and \ */
+ if (tcon->posix_extensions)
+ cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_POSIX_PATHS;
+#endif /* SMB3.11 */
+
/* tell server which Unix caps we support */
if (cap_unix(tcon->ses)) {
/* reset of caps checks mount to see if unix extensions
@@ -4361,6 +4421,7 @@ cifs_construct_tcon(struct cifs_sb_info *cifs_sb, kuid_t fsuid)
vol_info->UNC = master_tcon->treeName;
vol_info->retry = master_tcon->retry;
vol_info->nocase = master_tcon->nocase;
+ vol_info->nohandlecache = master_tcon->nohandlecache;
vol_info->local_lease = master_tcon->local_lease;
vol_info->no_linux_ext = !master_tcon->unix_ext;
vol_info->sectype = master_tcon->ses->sectype;
@@ -4390,8 +4451,14 @@ cifs_construct_tcon(struct cifs_sb_info *cifs_sb, kuid_t fsuid)
goto out;
}
+#ifdef CONFIG_CIFS_SMB311
+ /* if new SMB3.11 POSIX extensions are supported do not remap / and \ */
+ if (tcon->posix_extensions)
+ cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_POSIX_PATHS;
+#endif /* SMB3.11 */
if (cap_unix(ses))
reset_cifs_unix_caps(0, tcon, NULL, vol_info);
+
out:
kfree(vol_info->username);
kzfree(vol_info->password);
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 925844343038..ddae52bd1993 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -369,7 +369,7 @@ cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned int xid,
oparms.path = full_path;
oparms.fid = fid;
oparms.reconnect = false;
-
+ oparms.mode = mode;
rc = server->ops->open(xid, &oparms, oplock, buf);
if (rc) {
cifs_dbg(FYI, "cifs_create returned 0x%x\n", rc);
@@ -780,21 +780,25 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
tlink = cifs_sb_tlink(cifs_sb);
if (IS_ERR(tlink)) {
free_xid(xid);
- return (struct dentry *)tlink;
+ return ERR_CAST(tlink);
}
pTcon = tlink_tcon(tlink);
rc = check_name(direntry, pTcon);
- if (rc)
- goto lookup_out;
+ if (unlikely(rc)) {
+ cifs_put_tlink(tlink);
+ free_xid(xid);
+ return ERR_PTR(rc);
+ }
/* can not grab the rename sem here since it would
deadlock in the cases (beginning of sys_rename itself)
in which we already have the sb rename sem */
full_path = build_path_from_dentry(direntry);
if (full_path == NULL) {
- rc = -ENOMEM;
- goto lookup_out;
+ cifs_put_tlink(tlink);
+ free_xid(xid);
+ return ERR_PTR(-ENOMEM);
}
if (d_really_is_positive(direntry)) {
@@ -813,29 +817,25 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
parent_dir_inode->i_sb, xid, NULL);
}
- if ((rc == 0) && (newInode != NULL)) {
- d_add(direntry, newInode);
+ if (rc == 0) {
/* since paths are not looked up by component - the parent
directories are presumed to be good here */
renew_parental_timestamps(direntry);
-
} else if (rc == -ENOENT) {
- rc = 0;
cifs_set_time(direntry, jiffies);
- d_add(direntry, NULL);
- /* if it was once a directory (but how can we tell?) we could do
- shrink_dcache_parent(direntry); */
- } else if (rc != -EACCES) {
- cifs_dbg(FYI, "Unexpected lookup error %d\n", rc);
- /* We special case check for Access Denied - since that
- is a common return code */
+ newInode = NULL;
+ } else {
+ if (rc != -EACCES) {
+ cifs_dbg(FYI, "Unexpected lookup error %d\n", rc);
+ /* We special case check for Access Denied - since that
+ is a common return code */
+ }
+ newInode = ERR_PTR(rc);
}
-
-lookup_out:
kfree(full_path);
cifs_put_tlink(tlink);
free_xid(xid);
- return ERR_PTR(rc);
+ return d_splice_alias(newInode, direntry);
}
static int
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 23fd430fe74a..87eece6fbd48 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -2880,13 +2880,13 @@ out:
}
static struct cifs_readdata *
-cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete)
+cifs_readdata_direct_alloc(struct page **pages, work_func_t complete)
{
struct cifs_readdata *rdata;
- rdata = kzalloc(sizeof(*rdata) + (sizeof(struct page *) * nr_pages),
- GFP_KERNEL);
+ rdata = kzalloc(sizeof(*rdata), GFP_KERNEL);
if (rdata != NULL) {
+ rdata->pages = pages;
kref_init(&rdata->refcount);
INIT_LIST_HEAD(&rdata->list);
init_completion(&rdata->done);
@@ -2896,6 +2896,22 @@ cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete)
return rdata;
}
+static struct cifs_readdata *
+cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete)
+{
+ struct page **pages =
+ kzalloc(sizeof(struct page *) * nr_pages, GFP_KERNEL);
+ struct cifs_readdata *ret = NULL;
+
+ if (pages) {
+ ret = cifs_readdata_direct_alloc(pages, complete);
+ if (!ret)
+ kfree(pages);
+ }
+
+ return ret;
+}
+
void
cifs_readdata_release(struct kref *refcount)
{
@@ -2910,6 +2926,7 @@ cifs_readdata_release(struct kref *refcount)
if (rdata->cfile)
cifsFileInfo_put(rdata->cfile);
+ kvfree(rdata->pages);
kfree(rdata);
}
@@ -3009,12 +3026,20 @@ uncached_fill_pages(struct TCP_Server_Info *server,
int result = 0;
unsigned int i;
unsigned int nr_pages = rdata->nr_pages;
+ unsigned int page_offset = rdata->page_offset;
rdata->got_bytes = 0;
rdata->tailsz = PAGE_SIZE;
for (i = 0; i < nr_pages; i++) {
struct page *page = rdata->pages[i];
size_t n;
+ unsigned int segment_size = rdata->pagesz;
+
+ if (i == 0)
+ segment_size -= page_offset;
+ else
+ page_offset = 0;
+
if (len <= 0) {
/* no need to hold page hostage */
@@ -3023,24 +3048,25 @@ uncached_fill_pages(struct TCP_Server_Info *server,
put_page(page);
continue;
}
+
n = len;
- if (len >= PAGE_SIZE) {
+ if (len >= segment_size)
/* enough data to fill the page */
- n = PAGE_SIZE;
- len -= n;
- } else {
- zero_user(page, len, PAGE_SIZE - len);
+ n = segment_size;
+ else
rdata->tailsz = len;
- len = 0;
- }
+ len -= n;
+
if (iter)
- result = copy_page_from_iter(page, 0, n, iter);
+ result = copy_page_from_iter(
+ page, page_offset, n, iter);
#ifdef CONFIG_CIFS_SMB_DIRECT
else if (rdata->mr)
result = n;
#endif
else
- result = cifs_read_page_from_socket(server, page, n);
+ result = cifs_read_page_from_socket(
+ server, page, page_offset, n);
if (result < 0)
break;
@@ -3113,6 +3139,7 @@ cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
rdata->bytes = cur_len;
rdata->pid = pid;
rdata->pagesz = PAGE_SIZE;
+ rdata->tailsz = PAGE_SIZE;
rdata->read_into_pages = cifs_uncached_read_into_pages;
rdata->copy_into_pages = cifs_uncached_copy_into_pages;
rdata->credits = credits;
@@ -3557,6 +3584,7 @@ readpages_fill_pages(struct TCP_Server_Info *server,
u64 eof;
pgoff_t eof_index;
unsigned int nr_pages = rdata->nr_pages;
+ unsigned int page_offset = rdata->page_offset;
/* determine the eof that the server (probably) has */
eof = CIFS_I(rdata->mapping->host)->server_eof;
@@ -3567,13 +3595,21 @@ readpages_fill_pages(struct TCP_Server_Info *server,
rdata->tailsz = PAGE_SIZE;
for (i = 0; i < nr_pages; i++) {
struct page *page = rdata->pages[i];
- size_t n = PAGE_SIZE;
+ unsigned int to_read = rdata->pagesz;
+ size_t n;
+
+ if (i == 0)
+ to_read -= page_offset;
+ else
+ page_offset = 0;
+
+ n = to_read;
- if (len >= PAGE_SIZE) {
- len -= PAGE_SIZE;
+ if (len >= to_read) {
+ len -= to_read;
} else if (len > 0) {
/* enough for partial page, fill and zero the rest */
- zero_user(page, len, PAGE_SIZE - len);
+ zero_user(page, len + page_offset, to_read - len);
n = rdata->tailsz = len;
len = 0;
} else if (page->index > eof_index) {
@@ -3605,13 +3641,15 @@ readpages_fill_pages(struct TCP_Server_Info *server,
}
if (iter)
- result = copy_page_from_iter(page, 0, n, iter);
+ result = copy_page_from_iter(
+ page, page_offset, n, iter);
#ifdef CONFIG_CIFS_SMB_DIRECT
else if (rdata->mr)
result = n;
#endif
else
- result = cifs_read_page_from_socket(server, page, n);
+ result = cifs_read_page_from_socket(
+ server, page, page_offset, n);
if (result < 0)
break;
@@ -3790,6 +3828,7 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
rdata->bytes = bytes;
rdata->pid = pid;
rdata->pagesz = PAGE_SIZE;
+ rdata->tailsz = PAGE_SIZE;
rdata->read_into_pages = cifs_readpages_read_into_pages;
rdata->copy_into_pages = cifs_readpages_copy_into_pages;
rdata->credits = credits;
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 3c371f7f5963..745fd7fe8d0e 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -746,7 +746,8 @@ cifs_get_inode_info(struct inode **inode, const char *full_path,
cifs_dbg(FYI, "Getting info on %s\n", full_path);
if ((data == NULL) && (*inode != NULL)) {
- if (CIFS_CACHE_READ(CIFS_I(*inode))) {
+ if (CIFS_CACHE_READ(CIFS_I(*inode)) &&
+ CIFS_I(*inode)->time != 0) {
cifs_dbg(FYI, "No need to revalidate cached inode sizes\n");
goto cgii_exit;
}
@@ -1857,15 +1858,15 @@ cifs_inode_needs_reval(struct inode *inode)
struct cifsInodeInfo *cifs_i = CIFS_I(inode);
struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
+ if (cifs_i->time == 0)
+ return true;
+
if (CIFS_CACHE_READ(cifs_i))
return false;
if (!lookupCacheEnabled)
return true;
- if (cifs_i->time == 0)
- return true;
-
if (!cifs_sb->actimeo)
return true;
@@ -2104,10 +2105,14 @@ static int cifs_truncate_page(struct address_space *mapping, loff_t from)
static void cifs_setsize(struct inode *inode, loff_t offset)
{
+ struct cifsInodeInfo *cifs_i = CIFS_I(inode);
+
spin_lock(&inode->i_lock);
i_size_write(inode, offset);
spin_unlock(&inode->i_lock);
+ /* Cached inode must be refreshed on truncate */
+ cifs_i->time = 0;
truncate_pagecache(inode, offset);
}
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index 460084a8eac5..aba3fc3058da 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -117,6 +117,8 @@ tconInfoAlloc(void)
INIT_LIST_HEAD(&ret_buf->openFileList);
INIT_LIST_HEAD(&ret_buf->tcon_list);
spin_lock_init(&ret_buf->open_file_lock);
+ mutex_init(&ret_buf->prfid_mutex);
+ ret_buf->prfid = kzalloc(sizeof(struct cifs_fid), GFP_KERNEL);
#ifdef CONFIG_CIFS_STATS
spin_lock_init(&ret_buf->stat_lock);
#endif
@@ -134,6 +136,7 @@ tconInfoFree(struct cifs_tcon *buf_to_free)
atomic_dec(&tconInfoAllocCount);
kfree(buf_to_free->nativeFileSystem);
kzfree(buf_to_free->password);
+ kfree(buf_to_free->prfid);
kfree(buf_to_free);
}
@@ -145,7 +148,7 @@ cifs_buf_get(void)
* SMB2 header is bigger than CIFS one - no problems to clean some
* more bytes for CIFS.
*/
- size_t buf_size = sizeof(struct smb2_hdr);
+ size_t buf_size = sizeof(struct smb2_sync_hdr);
/*
* We could use negotiated size instead of max_msgsize -
@@ -339,7 +342,7 @@ checkSMB(char *buf, unsigned int total_read, struct TCP_Server_Info *server)
/* otherwise, there is enough to get to the BCC */
if (check_smb_hdr(smb))
return -EIO;
- clc_len = smbCalcSize(smb);
+ clc_len = smbCalcSize(smb, server);
if (4 + rfclen != total_read) {
cifs_dbg(VFS, "Length read does not match RFC1001 length %d\n",
diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c
index cc88f4f0325e..d7ad0dfe4e68 100644
--- a/fs/cifs/netmisc.c
+++ b/fs/cifs/netmisc.c
@@ -903,7 +903,7 @@ map_smb_to_linux_error(char *buf, bool logErr)
* portion, the number of word parameters and the data portion of the message
*/
unsigned int
-smbCalcSize(void *buf)
+smbCalcSize(void *buf, struct TCP_Server_Info *server)
{
struct smb_hdr *ptr = (struct smb_hdr *)buf;
return (sizeof(struct smb_hdr) + (2 * ptr->WordCount) +
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index a27fc8791551..eeab81c9452f 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -650,7 +650,8 @@ find_cifs_entry(const unsigned int xid, struct cifs_tcon *tcon, loff_t pos,
char *cur_ent;
char *end_of_smb = cfile->srch_inf.ntwrk_buf_start +
server->ops->calc_smb_size(
- cfile->srch_inf.ntwrk_buf_start);
+ cfile->srch_inf.ntwrk_buf_start,
+ server);
cur_ent = cfile->srch_inf.srch_entries_start;
first_entry_in_buffer = cfile->srch_inf.index_of_last_entry
@@ -831,7 +832,8 @@ int cifs_readdir(struct file *file, struct dir_context *ctx)
cifs_dbg(FYI, "loop through %d times filling dir for net buf %p\n",
num_to_fill, cifsFile->srch_inf.ntwrk_buf_start);
max_len = tcon->ses->server->ops->calc_smb_size(
- cifsFile->srch_inf.ntwrk_buf_start);
+ cifsFile->srch_inf.ntwrk_buf_start,
+ tcon->ses->server);
end_of_smb = cifsFile->srch_inf.ntwrk_buf_start + max_len;
tmp_buf = kmalloc(UNICODE_NAME_MAX, GFP_KERNEL);
diff --git a/fs/cifs/smb2glob.h b/fs/cifs/smb2glob.h
index 401a5d856636..0ffa18094335 100644
--- a/fs/cifs/smb2glob.h
+++ b/fs/cifs/smb2glob.h
@@ -61,9 +61,4 @@
/* Maximum buffer size value we can send with 1 credit */
#define SMB2_MAX_BUFFER_SIZE 65536
-static inline struct smb2_sync_hdr *get_sync_hdr(void *buf)
-{
- return &(((struct smb2_hdr *)buf)->sync_hdr);
-}
-
#endif /* _SMB2_GLOB_H */
diff --git a/fs/cifs/smb2inode.c b/fs/cifs/smb2inode.c
index 1238cd3552f9..a6e786e39248 100644
--- a/fs/cifs/smb2inode.c
+++ b/fs/cifs/smb2inode.c
@@ -44,26 +44,38 @@ smb2_open_op_close(const unsigned int xid, struct cifs_tcon *tcon,
__u32 create_options, void *data, int command)
{
int rc, tmprc = 0;
- __le16 *utf16_path;
+ __le16 *utf16_path = NULL;
__u8 oplock = SMB2_OPLOCK_LEVEL_NONE;
struct cifs_open_parms oparms;
struct cifs_fid fid;
+ bool use_cached_root_handle = false;
- utf16_path = cifs_convert_path_to_utf16(full_path, cifs_sb);
- if (!utf16_path)
- return -ENOMEM;
+ if ((strcmp(full_path, "") == 0) && (create_options == 0) &&
+ (desired_access == FILE_READ_ATTRIBUTES) &&
+ (create_disposition == FILE_OPEN) &&
+ (tcon->nohandlecache == false)) {
+ rc = open_shroot(xid, tcon, &fid);
+ if (rc == 0)
+ use_cached_root_handle = true;
+ }
+
+ if (use_cached_root_handle == false) {
+ utf16_path = cifs_convert_path_to_utf16(full_path, cifs_sb);
+ if (!utf16_path)
+ return -ENOMEM;
- oparms.tcon = tcon;
- oparms.desired_access = desired_access;
- oparms.disposition = create_disposition;
- oparms.create_options = create_options;
- oparms.fid = &fid;
- oparms.reconnect = false;
+ oparms.tcon = tcon;
+ oparms.desired_access = desired_access;
+ oparms.disposition = create_disposition;
+ oparms.create_options = create_options;
+ oparms.fid = &fid;
+ oparms.reconnect = false;
- rc = SMB2_open(xid, &oparms, utf16_path, &oplock, NULL, NULL);
- if (rc) {
- kfree(utf16_path);
- return rc;
+ rc = SMB2_open(xid, &oparms, utf16_path, &oplock, NULL, NULL);
+ if (rc) {
+ kfree(utf16_path);
+ return rc;
+ }
}
switch (command) {
@@ -107,7 +119,8 @@ smb2_open_op_close(const unsigned int xid, struct cifs_tcon *tcon,
break;
}
- rc = SMB2_close(xid, tcon, fid.persistent_fid, fid.volatile_fid);
+ if (use_cached_root_handle == false)
+ rc = SMB2_close(xid, tcon, fid.persistent_fid, fid.volatile_fid);
if (tmprc)
rc = tmprc;
kfree(utf16_path);
diff --git a/fs/cifs/smb2maperror.c b/fs/cifs/smb2maperror.c
index 3bfc9c990724..20a2d304c603 100644
--- a/fs/cifs/smb2maperror.c
+++ b/fs/cifs/smb2maperror.c
@@ -27,6 +27,7 @@
#include "smb2proto.h"
#include "smb2status.h"
#include "smb2glob.h"
+#include "trace.h"
struct status_to_posix_error {
__le32 smb2_status;
@@ -2450,13 +2451,16 @@ smb2_print_status(__le32 status)
int
map_smb2_to_linux_error(char *buf, bool log_err)
{
- struct smb2_sync_hdr *shdr = get_sync_hdr(buf);
+ struct smb2_sync_hdr *shdr = (struct smb2_sync_hdr *)buf;
unsigned int i;
int rc = -EIO;
__le32 smb2err = shdr->Status;
- if (smb2err == 0)
+ if (smb2err == 0) {
+ trace_smb3_cmd_done(shdr->TreeId, shdr->SessionId,
+ le16_to_cpu(shdr->Command), le64_to_cpu(shdr->MessageId));
return 0;
+ }
/* mask facility */
if (log_err && (smb2err != STATUS_MORE_PROCESSING_REQUIRED) &&
@@ -2478,5 +2482,8 @@ map_smb2_to_linux_error(char *buf, bool log_err)
cifs_dbg(FYI, "Mapping SMB2 status code 0x%08x to POSIX err %d\n",
__le32_to_cpu(smb2err), rc);
+ trace_smb3_cmd_err(shdr->TreeId, shdr->SessionId,
+ le16_to_cpu(shdr->Command),
+ le64_to_cpu(shdr->MessageId), le32_to_cpu(smb2err), rc);
return rc;
}
diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c
index 68ea8491c160..cb5728e3d87d 100644
--- a/fs/cifs/smb2misc.c
+++ b/fs/cifs/smb2misc.c
@@ -94,8 +94,8 @@ static const __le16 smb2_rsp_struct_sizes[NUMBER_OF_SMB2_COMMANDS] = {
};
#ifdef CONFIG_CIFS_SMB311
-static __u32 get_neg_ctxt_len(struct smb2_hdr *hdr, __u32 len, __u32 non_ctxlen,
- size_t hdr_preamble_size)
+static __u32 get_neg_ctxt_len(struct smb2_sync_hdr *hdr, __u32 len,
+ __u32 non_ctxlen)
{
__u16 neg_count;
__u32 nc_offset, size_of_pad_before_neg_ctxts;
@@ -109,12 +109,11 @@ static __u32 get_neg_ctxt_len(struct smb2_hdr *hdr, __u32 len, __u32 non_ctxlen,
/* Make sure that negotiate contexts start after gss security blob */
nc_offset = le32_to_cpu(pneg_rsp->NegotiateContextOffset);
- if (nc_offset < non_ctxlen - hdr_preamble_size /* RFC1001 len */) {
+ if (nc_offset < non_ctxlen) {
printk_once(KERN_WARNING "invalid negotiate context offset\n");
return 0;
}
- size_of_pad_before_neg_ctxts = nc_offset -
- (non_ctxlen - hdr_preamble_size);
+ size_of_pad_before_neg_ctxts = nc_offset - non_ctxlen;
/* Verify that at least minimal negotiate contexts fit within frame */
if (len < nc_offset + (neg_count * sizeof(struct smb2_neg_context))) {
@@ -131,25 +130,20 @@ static __u32 get_neg_ctxt_len(struct smb2_hdr *hdr, __u32 len, __u32 non_ctxlen,
#endif /* CIFS_SMB311 */
int
-smb2_check_message(char *buf, unsigned int length, struct TCP_Server_Info *srvr)
+smb2_check_message(char *buf, unsigned int len, struct TCP_Server_Info *srvr)
{
- struct smb2_pdu *pdu = (struct smb2_pdu *)buf;
- struct smb2_hdr *hdr = &pdu->hdr;
- struct smb2_sync_hdr *shdr = get_sync_hdr(buf);
+ struct smb2_sync_hdr *shdr = (struct smb2_sync_hdr *)buf;
+ struct smb2_sync_pdu *pdu = (struct smb2_sync_pdu *)shdr;
__u64 mid;
- __u32 len = get_rfc1002_length(buf);
__u32 clc_len; /* calculated length */
int command;
-
- /* BB disable following printk later */
- cifs_dbg(FYI, "%s length: 0x%x, smb_buf_length: 0x%x\n",
- __func__, length, len);
+ int pdu_size = sizeof(struct smb2_sync_pdu);
+ int hdr_size = sizeof(struct smb2_sync_hdr);
/*
* Add function to do table lookup of StructureSize by command
* ie Validate the wct via smb2_struct_sizes table above
*/
-
if (shdr->ProtocolId == SMB2_TRANSFORM_PROTO_NUM) {
struct smb2_transform_hdr *thdr =
(struct smb2_transform_hdr *)buf;
@@ -173,8 +167,8 @@ smb2_check_message(char *buf, unsigned int length, struct TCP_Server_Info *srvr)
}
mid = le64_to_cpu(shdr->MessageId);
- if (length < sizeof(struct smb2_pdu)) {
- if ((length >= sizeof(struct smb2_hdr))
+ if (len < pdu_size) {
+ if ((len >= hdr_size)
&& (shdr->Status != 0)) {
pdu->StructureSize2 = 0;
/*
@@ -187,8 +181,7 @@ smb2_check_message(char *buf, unsigned int length, struct TCP_Server_Info *srvr)
}
return 1;
}
- if (len > CIFSMaxBufSize + MAX_SMB2_HDR_SIZE -
- srvr->vals->header_preamble_size) {
+ if (len > CIFSMaxBufSize + MAX_SMB2_HDR_SIZE) {
cifs_dbg(VFS, "SMB length greater than maximum, mid=%llu\n",
mid);
return 1;
@@ -227,44 +220,38 @@ smb2_check_message(char *buf, unsigned int length, struct TCP_Server_Info *srvr)
}
}
- if (srvr->vals->header_preamble_size + len != length) {
- cifs_dbg(VFS, "Total length %u RFC1002 length %zu mismatch mid %llu\n",
- length, srvr->vals->header_preamble_size + len, mid);
- return 1;
- }
-
- clc_len = smb2_calc_size(hdr);
+ clc_len = smb2_calc_size(buf, srvr);
#ifdef CONFIG_CIFS_SMB311
if (shdr->Command == SMB2_NEGOTIATE)
- clc_len += get_neg_ctxt_len(hdr, len, clc_len,
- srvr->vals->header_preamble_size);
+ clc_len += get_neg_ctxt_len(shdr, len, clc_len);
#endif /* SMB311 */
- if (srvr->vals->header_preamble_size + len != clc_len) {
- cifs_dbg(FYI, "Calculated size %u length %zu mismatch mid %llu\n",
- clc_len, srvr->vals->header_preamble_size + len, mid);
+ if (len != clc_len) {
+ cifs_dbg(FYI, "Calculated size %u length %u mismatch mid %llu\n",
+ clc_len, len, mid);
/* create failed on symlink */
if (command == SMB2_CREATE_HE &&
shdr->Status == STATUS_STOPPED_ON_SYMLINK)
return 0;
/* Windows 7 server returns 24 bytes more */
- if (clc_len + 24 - srvr->vals->header_preamble_size == len && command == SMB2_OPLOCK_BREAK_HE)
+ if (clc_len + 24 == len && command == SMB2_OPLOCK_BREAK_HE)
return 0;
/* server can return one byte more due to implied bcc[0] */
- if (clc_len == srvr->vals->header_preamble_size + len + 1)
+ if (clc_len == len + 1)
return 0;
/*
* MacOS server pads after SMB2.1 write response with 3 bytes
* of junk. Other servers match RFC1001 len to actual
* SMB2/SMB3 frame length (header + smb2 response specific data)
+ * Some windows servers do too when compounding is used.
* Log the server error (once), but allow it and continue
* since the frame is parseable.
*/
- if (clc_len < srvr->vals->header_preamble_size /* RFC1001 header size */ + len) {
+ if (clc_len < len) {
printk_once(KERN_WARNING
- "SMB2 server sent bad RFC1001 len %d not %zu\n",
- len, clc_len - srvr->vals->header_preamble_size);
+ "SMB2 server sent bad RFC1001 len %d not %d\n",
+ len, clc_len);
return 0;
}
@@ -305,15 +292,14 @@ static const bool has_smb2_data_area[NUMBER_OF_SMB2_COMMANDS] = {
* area and the offset to it (from the beginning of the smb are also returned.
*/
char *
-smb2_get_data_area_len(int *off, int *len, struct smb2_hdr *hdr)
+smb2_get_data_area_len(int *off, int *len, struct smb2_sync_hdr *shdr)
{
- struct smb2_sync_hdr *shdr = get_sync_hdr(hdr);
*off = 0;
*len = 0;
/* error responses do not have data area */
if (shdr->Status && shdr->Status != STATUS_MORE_PROCESSING_REQUIRED &&
- (((struct smb2_err_rsp *)hdr)->StructureSize) ==
+ (((struct smb2_err_rsp *)shdr)->StructureSize) ==
SMB2_ERROR_STRUCTURE_SIZE2)
return NULL;
@@ -325,42 +311,44 @@ smb2_get_data_area_len(int *off, int *len, struct smb2_hdr *hdr)
switch (shdr->Command) {
case SMB2_NEGOTIATE:
*off = le16_to_cpu(
- ((struct smb2_negotiate_rsp *)hdr)->SecurityBufferOffset);
+ ((struct smb2_negotiate_rsp *)shdr)->SecurityBufferOffset);
*len = le16_to_cpu(
- ((struct smb2_negotiate_rsp *)hdr)->SecurityBufferLength);
+ ((struct smb2_negotiate_rsp *)shdr)->SecurityBufferLength);
break;
case SMB2_SESSION_SETUP:
*off = le16_to_cpu(
- ((struct smb2_sess_setup_rsp *)hdr)->SecurityBufferOffset);
+ ((struct smb2_sess_setup_rsp *)shdr)->SecurityBufferOffset);
*len = le16_to_cpu(
- ((struct smb2_sess_setup_rsp *)hdr)->SecurityBufferLength);
+ ((struct smb2_sess_setup_rsp *)shdr)->SecurityBufferLength);
break;
case SMB2_CREATE:
*off = le32_to_cpu(
- ((struct smb2_create_rsp *)hdr)->CreateContextsOffset);
+ ((struct smb2_create_rsp *)shdr)->CreateContextsOffset);
*len = le32_to_cpu(
- ((struct smb2_create_rsp *)hdr)->CreateContextsLength);
+ ((struct smb2_create_rsp *)shdr)->CreateContextsLength);
break;
case SMB2_QUERY_INFO:
*off = le16_to_cpu(
- ((struct smb2_query_info_rsp *)hdr)->OutputBufferOffset);
+ ((struct smb2_query_info_rsp *)shdr)->OutputBufferOffset);
*len = le32_to_cpu(
- ((struct smb2_query_info_rsp *)hdr)->OutputBufferLength);
+ ((struct smb2_query_info_rsp *)shdr)->OutputBufferLength);
break;
case SMB2_READ:
- *off = ((struct smb2_read_rsp *)hdr)->DataOffset;
- *len = le32_to_cpu(((struct smb2_read_rsp *)hdr)->DataLength);
+ /* TODO: is this a bug ? */
+ *off = ((struct smb2_read_rsp *)shdr)->DataOffset;
+ *len = le32_to_cpu(((struct smb2_read_rsp *)shdr)->DataLength);
break;
case SMB2_QUERY_DIRECTORY:
*off = le16_to_cpu(
- ((struct smb2_query_directory_rsp *)hdr)->OutputBufferOffset);
+ ((struct smb2_query_directory_rsp *)shdr)->OutputBufferOffset);
*len = le32_to_cpu(
- ((struct smb2_query_directory_rsp *)hdr)->OutputBufferLength);
+ ((struct smb2_query_directory_rsp *)shdr)->OutputBufferLength);
break;
case SMB2_IOCTL:
*off = le32_to_cpu(
- ((struct smb2_ioctl_rsp *)hdr)->OutputOffset);
- *len = le32_to_cpu(((struct smb2_ioctl_rsp *)hdr)->OutputCount);
+ ((struct smb2_ioctl_rsp *)shdr)->OutputOffset);
+ *len = le32_to_cpu(
+ ((struct smb2_ioctl_rsp *)shdr)->OutputCount);
break;
case SMB2_CHANGE_NOTIFY:
default:
@@ -403,15 +391,14 @@ smb2_get_data_area_len(int *off, int *len, struct smb2_hdr *hdr)
* portion, the number of word parameters and the data portion of the message.
*/
unsigned int
-smb2_calc_size(void *buf)
+smb2_calc_size(void *buf, struct TCP_Server_Info *srvr)
{
- struct smb2_pdu *pdu = (struct smb2_pdu *)buf;
- struct smb2_hdr *hdr = &pdu->hdr;
- struct smb2_sync_hdr *shdr = get_sync_hdr(hdr);
+ struct smb2_sync_pdu *pdu = (struct smb2_sync_pdu *)buf;
+ struct smb2_sync_hdr *shdr = &pdu->sync_hdr;
int offset; /* the offset from the beginning of SMB to data area */
int data_length; /* the length of the variable length data area */
/* Structure Size has already been checked to make sure it is 64 */
- int len = 4 + le16_to_cpu(shdr->StructureSize);
+ int len = le16_to_cpu(shdr->StructureSize);
/*
* StructureSize2, ie length of fixed parameter area has already
@@ -422,7 +409,7 @@ smb2_calc_size(void *buf)
if (has_smb2_data_area[le16_to_cpu(shdr->Command)] == false)
goto calc_size_exit;
- smb2_get_data_area_len(&offset, &data_length, hdr);
+ smb2_get_data_area_len(&offset, &data_length, shdr);
cifs_dbg(FYI, "SMB2 data length %d offset %d\n", data_length, offset);
if (data_length > 0) {
@@ -430,15 +417,14 @@ smb2_calc_size(void *buf)
* Check to make sure that data area begins after fixed area,
* Note that last byte of the fixed area is part of data area
* for some commands, typically those with odd StructureSize,
- * so we must add one to the calculation (and 4 to account for
- * the size of the RFC1001 hdr.
+ * so we must add one to the calculation.
*/
- if (offset + 4 + 1 < len) {
+ if (offset + 1 < len) {
cifs_dbg(VFS, "data area offset %d overlaps SMB2 header %d\n",
- offset + 4 + 1, len);
+ offset + 1, len);
data_length = 0;
} else {
- len = 4 + offset + data_length;
+ len = offset + data_length;
}
}
calc_size_exit:
@@ -465,8 +451,14 @@ cifs_convert_path_to_utf16(const char *from, struct cifs_sb_info *cifs_sb)
/* Windows doesn't allow paths beginning with \ */
if (from[0] == '\\')
start_of_path = from + 1;
+#ifdef CONFIG_CIFS_SMB311
+ /* SMB311 POSIX extensions paths do not include leading slash */
+ else if (cifs_sb_master_tcon(cifs_sb)->posix_extensions)
+ start_of_path = from + 1;
+#endif /* 311 */
else
start_of_path = from;
+
to = cifs_strndup_to_utf16(start_of_path, PATH_MAX, &len,
cifs_sb->local_nls, map_type);
return to;
@@ -621,7 +613,7 @@ smb2_is_valid_lease_break(char *buffer)
bool
smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *server)
{
- struct smb2_oplock_break_rsp *rsp = (struct smb2_oplock_break_rsp *)buffer;
+ struct smb2_oplock_break *rsp = (struct smb2_oplock_break *)buffer;
struct list_head *tmp, *tmp1, *tmp2;
struct cifs_ses *ses;
struct cifs_tcon *tcon;
@@ -630,7 +622,7 @@ smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *server)
cifs_dbg(FYI, "Checking for oplock break\n");
- if (rsp->hdr.sync_hdr.Command != SMB2_OPLOCK_BREAK)
+ if (rsp->sync_hdr.Command != SMB2_OPLOCK_BREAK)
return false;
if (rsp->StructureSize !=
@@ -721,7 +713,7 @@ smb2_cancelled_close_fid(struct work_struct *work)
int
smb2_handle_cancelled_mid(char *buffer, struct TCP_Server_Info *server)
{
- struct smb2_sync_hdr *sync_hdr = get_sync_hdr(buffer);
+ struct smb2_sync_hdr *sync_hdr = (struct smb2_sync_hdr *)buffer;
struct smb2_create_rsp *rsp = (struct smb2_create_rsp *)buffer;
struct cifs_tcon *tcon;
struct close_cancelled_open *cancelled;
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index b76b85881dcc..950d0ab2cc61 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -123,7 +123,7 @@ smb2_get_credits_field(struct TCP_Server_Info *server, const int optype)
static unsigned int
smb2_get_credits(struct mid_q_entry *mid)
{
- struct smb2_sync_hdr *shdr = get_sync_hdr(mid->resp_buf);
+ struct smb2_sync_hdr *shdr = (struct smb2_sync_hdr *)mid->resp_buf;
return le16_to_cpu(shdr->CreditRequest);
}
@@ -190,7 +190,7 @@ static struct mid_q_entry *
smb2_find_mid(struct TCP_Server_Info *server, char *buf)
{
struct mid_q_entry *mid;
- struct smb2_sync_hdr *shdr = get_sync_hdr(buf);
+ struct smb2_sync_hdr *shdr = (struct smb2_sync_hdr *)buf;
__u64 wire_mid = le64_to_cpu(shdr->MessageId);
if (shdr->ProtocolId == SMB2_TRANSFORM_PROTO_NUM) {
@@ -212,15 +212,16 @@ smb2_find_mid(struct TCP_Server_Info *server, char *buf)
}
static void
-smb2_dump_detail(void *buf)
+smb2_dump_detail(void *buf, struct TCP_Server_Info *server)
{
#ifdef CONFIG_CIFS_DEBUG2
- struct smb2_sync_hdr *shdr = get_sync_hdr(buf);
+ struct smb2_sync_hdr *shdr = (struct smb2_sync_hdr *)buf;
cifs_dbg(VFS, "Cmd: %d Err: 0x%x Flags: 0x%x Mid: %llu Pid: %d\n",
shdr->Command, shdr->Status, shdr->Flags, shdr->MessageId,
shdr->ProcessId);
- cifs_dbg(VFS, "smb buf %p len %u\n", buf, smb2_calc_size(buf));
+ cifs_dbg(VFS, "smb buf %p len %u\n", buf,
+ server->ops->calc_smb_size(buf, server));
#endif
}
@@ -322,6 +323,40 @@ SMB3_request_interfaces(const unsigned int xid, struct cifs_tcon *tcon)
}
#endif /* STATS2 */
+/*
+ * Open the directory at the root of a share
+ */
+int open_shroot(unsigned int xid, struct cifs_tcon *tcon, struct cifs_fid *pfid)
+{
+ struct cifs_open_parms oparams;
+ int rc;
+ __le16 srch_path = 0; /* Null - since an open of top of share */
+ u8 oplock = SMB2_OPLOCK_LEVEL_NONE;
+
+ mutex_lock(&tcon->prfid_mutex);
+ if (tcon->valid_root_fid) {
+ cifs_dbg(FYI, "found a cached root file handle\n");
+ memcpy(pfid, tcon->prfid, sizeof(struct cifs_fid));
+ mutex_unlock(&tcon->prfid_mutex);
+ return 0;
+ }
+
+ oparams.tcon = tcon;
+ oparams.create_options = 0;
+ oparams.desired_access = FILE_READ_ATTRIBUTES;
+ oparams.disposition = FILE_OPEN;
+ oparams.fid = pfid;
+ oparams.reconnect = false;
+
+ rc = SMB2_open(xid, &oparams, &srch_path, &oplock, NULL, NULL);
+ if (rc == 0) {
+ memcpy(tcon->prfid, pfid, sizeof(struct cifs_fid));
+ tcon->valid_root_fid = true;
+ }
+ mutex_unlock(&tcon->prfid_mutex);
+ return rc;
+}
+
static void
smb3_qfs_tcon(const unsigned int xid, struct cifs_tcon *tcon)
{
@@ -330,6 +365,7 @@ smb3_qfs_tcon(const unsigned int xid, struct cifs_tcon *tcon)
u8 oplock = SMB2_OPLOCK_LEVEL_NONE;
struct cifs_open_parms oparms;
struct cifs_fid fid;
+ bool no_cached_open = tcon->nohandlecache;
oparms.tcon = tcon;
oparms.desired_access = FILE_READ_ATTRIBUTES;
@@ -338,7 +374,11 @@ smb3_qfs_tcon(const unsigned int xid, struct cifs_tcon *tcon)
oparms.fid = &fid;
oparms.reconnect = false;
- rc = SMB2_open(xid, &oparms, &srch_path, &oplock, NULL, NULL);
+ if (no_cached_open)
+ rc = SMB2_open(xid, &oparms, &srch_path, &oplock, NULL, NULL);
+ else
+ rc = open_shroot(xid, tcon, &fid);
+
if (rc)
return;
@@ -352,7 +392,8 @@ smb3_qfs_tcon(const unsigned int xid, struct cifs_tcon *tcon)
FS_DEVICE_INFORMATION);
SMB2_QFS_attr(xid, tcon, fid.persistent_fid, fid.volatile_fid,
FS_SECTOR_SIZE_INFORMATION); /* SMB3 specific */
- SMB2_close(xid, tcon, fid.persistent_fid, fid.volatile_fid);
+ if (no_cached_open)
+ SMB2_close(xid, tcon, fid.persistent_fid, fid.volatile_fid);
return;
}
@@ -394,6 +435,9 @@ smb2_is_path_accessible(const unsigned int xid, struct cifs_tcon *tcon,
struct cifs_open_parms oparms;
struct cifs_fid fid;
+ if ((*full_path == 0) && tcon->valid_root_fid)
+ return 0;
+
utf16_path = cifs_convert_path_to_utf16(full_path, cifs_sb);
if (!utf16_path)
return -ENOMEM;
@@ -589,9 +633,15 @@ smb2_query_eas(const unsigned int xid, struct cifs_tcon *tcon,
SMB2_close(xid, tcon, fid.persistent_fid, fid.volatile_fid);
+ /*
+ * If ea_name is NULL (listxattr) and there are no EAs, return 0 as it's
+ * not an error. Otherwise, the specified ea_name was not found.
+ */
if (!rc)
rc = move_smb2_ea_to_cifs(ea_data, buf_size, smb2_data,
SMB2_MAX_EA_BUF, ea_name);
+ else if (!ea_name && rc == -ENODATA)
+ rc = 0;
kfree(smb2_data);
return rc;
@@ -698,9 +748,11 @@ smb2_dump_share_caps(struct seq_file *m, struct cifs_tcon *tcon)
seq_puts(m, " TRIM-support,");
seq_printf(m, "\tShare Flags: 0x%x", tcon->share_flags);
+ seq_printf(m, "\n\ttid: 0x%x", tcon->tid);
if (tcon->perf_sector_size)
seq_printf(m, "\tOptimal sector size: 0x%x",
tcon->perf_sector_size);
+ seq_printf(m, "\tMaximal Access: 0x%x", tcon->maximal_access);
}
static void
@@ -1251,7 +1303,7 @@ smb2_close_dir(const unsigned int xid, struct cifs_tcon *tcon,
static bool
smb2_is_status_pending(char *buf, struct TCP_Server_Info *server, int length)
{
- struct smb2_sync_hdr *shdr = get_sync_hdr(buf);
+ struct smb2_sync_hdr *shdr = (struct smb2_sync_hdr *)buf;
if (shdr->Status != STATUS_PENDING)
return false;
@@ -1269,12 +1321,13 @@ smb2_is_status_pending(char *buf, struct TCP_Server_Info *server, int length)
static bool
smb2_is_session_expired(char *buf)
{
- struct smb2_sync_hdr *shdr = get_sync_hdr(buf);
+ struct smb2_sync_hdr *shdr = (struct smb2_sync_hdr *)buf;
- if (shdr->Status != STATUS_NETWORK_SESSION_EXPIRED)
+ if (shdr->Status != STATUS_NETWORK_SESSION_EXPIRED &&
+ shdr->Status != STATUS_USER_SESSION_DELETED)
return false;
- cifs_dbg(FYI, "Session expired\n");
+ cifs_dbg(FYI, "Session expired or deleted\n");
return true;
}
@@ -1468,8 +1521,6 @@ smb2_query_symlink(const unsigned int xid, struct cifs_tcon *tcon,
unsigned int sub_offset;
unsigned int print_len;
unsigned int print_offset;
- struct cifs_ses *ses = tcon->ses;
- struct TCP_Server_Info *server = ses->server;
cifs_dbg(FYI, "%s: path: %s\n", __func__, full_path);
@@ -1493,7 +1544,7 @@ smb2_query_symlink(const unsigned int xid, struct cifs_tcon *tcon,
err_buf = err_iov.iov_base;
if (le32_to_cpu(err_buf->ByteCount) < sizeof(struct smb2_symlink_err_rsp) ||
- err_iov.iov_len + server->vals->header_preamble_size < SMB2_SYMLINK_STRUCT_SIZE) {
+ err_iov.iov_len < SMB2_SYMLINK_STRUCT_SIZE) {
kfree(utf16_path);
return -ENOENT;
}
@@ -1506,14 +1557,13 @@ smb2_query_symlink(const unsigned int xid, struct cifs_tcon *tcon,
print_len = le16_to_cpu(symlink->PrintNameLength);
print_offset = le16_to_cpu(symlink->PrintNameOffset);
- if (err_iov.iov_len + server->vals->header_preamble_size <
- SMB2_SYMLINK_STRUCT_SIZE + sub_offset + sub_len) {
+ if (err_iov.iov_len < SMB2_SYMLINK_STRUCT_SIZE + sub_offset + sub_len) {
kfree(utf16_path);
return -ENOENT;
}
- if (err_iov.iov_len + server->vals->header_preamble_size <
- SMB2_SYMLINK_STRUCT_SIZE + print_offset + print_len) {
+ if (err_iov.iov_len <
+ SMB2_SYMLINK_STRUCT_SIZE + print_offset + print_len) {
kfree(utf16_path);
return -ENOENT;
}
@@ -1587,8 +1637,11 @@ get_smb2_acl_by_path(struct cifs_sb_info *cifs_sb,
oparms.create_options = 0;
utf16_path = cifs_convert_path_to_utf16(path, cifs_sb);
- if (!utf16_path)
- return ERR_PTR(-ENOMEM);
+ if (!utf16_path) {
+ rc = -ENOMEM;
+ free_xid(xid);
+ return ERR_PTR(rc);
+ }
oparms.tcon = tcon;
oparms.desired_access = READ_CONTROL;
@@ -1646,8 +1699,11 @@ set_smb2_acl(struct cifs_ntsd *pnntsd, __u32 acllen,
access_flags = WRITE_DAC;
utf16_path = cifs_convert_path_to_utf16(path, cifs_sb);
- if (!utf16_path)
- return -ENOMEM;
+ if (!utf16_path) {
+ rc = -ENOMEM;
+ free_xid(xid);
+ return rc;
+ }
oparms.tcon = tcon;
oparms.desired_access = access_flags;
@@ -1707,15 +1763,21 @@ static long smb3_zero_range(struct file *file, struct cifs_tcon *tcon,
/* if file not oplocked can't be sure whether asking to extend size */
if (!CIFS_CACHE_READ(cifsi))
- if (keep_size == false)
- return -EOPNOTSUPP;
+ if (keep_size == false) {
+ rc = -EOPNOTSUPP;
+ free_xid(xid);
+ return rc;
+ }
/*
* Must check if file sparse since fallocate -z (zero range) assumes
* non-sparse allocation
*/
- if (!(cifsi->cifsAttrs & FILE_ATTRIBUTE_SPARSE_FILE))
- return -EOPNOTSUPP;
+ if (!(cifsi->cifsAttrs & FILE_ATTRIBUTE_SPARSE_FILE)) {
+ rc = -EOPNOTSUPP;
+ free_xid(xid);
+ return rc;
+ }
/*
* need to make sure we are not asked to extend the file since the SMB3
@@ -1724,8 +1786,11 @@ static long smb3_zero_range(struct file *file, struct cifs_tcon *tcon,
* which for a non sparse file would zero the newly extended range
*/
if (keep_size == false)
- if (i_size_read(inode) < offset + len)
- return -EOPNOTSUPP;
+ if (i_size_read(inode) < offset + len) {
+ rc = -EOPNOTSUPP;
+ free_xid(xid);
+ return rc;
+ }
cifs_dbg(FYI, "offset %lld len %lld", offset, len);
@@ -1758,8 +1823,11 @@ static long smb3_punch_hole(struct file *file, struct cifs_tcon *tcon,
/* Need to make file sparse, if not already, before freeing range. */
/* Consider adding equivalent for compressed since it could also work */
- if (!smb2_set_sparse(xid, tcon, cfile, inode, set_sparse))
- return -EOPNOTSUPP;
+ if (!smb2_set_sparse(xid, tcon, cfile, inode, set_sparse)) {
+ rc = -EOPNOTSUPP;
+ free_xid(xid);
+ return rc;
+ }
cifs_dbg(FYI, "offset %lld len %lld", offset, len);
@@ -1790,8 +1858,10 @@ static long smb3_simple_falloc(struct file *file, struct cifs_tcon *tcon,
/* if file not oplocked can't be sure whether asking to extend size */
if (!CIFS_CACHE_READ(cifsi))
- if (keep_size == false)
- return -EOPNOTSUPP;
+ if (keep_size == false) {
+ free_xid(xid);
+ return rc;
+ }
/*
* Files are non-sparse by default so falloc may be a no-op
@@ -1800,14 +1870,16 @@ static long smb3_simple_falloc(struct file *file, struct cifs_tcon *tcon,
*/
if ((cifsi->cifsAttrs & FILE_ATTRIBUTE_SPARSE_FILE) == 0) {
if (keep_size == true)
- return 0;
+ rc = 0;
/* check if extending file */
else if (i_size_read(inode) >= off + len)
/* not extending file and already not sparse */
- return 0;
+ rc = 0;
/* BB: in future add else clause to extend file */
else
- return -EOPNOTSUPP;
+ rc = -EOPNOTSUPP;
+ free_xid(xid);
+ return rc;
}
if ((keep_size == true) || (i_size_read(inode) >= off + len)) {
@@ -1819,8 +1891,11 @@ static long smb3_simple_falloc(struct file *file, struct cifs_tcon *tcon,
* ie potentially making a few extra pages at the beginning
* or end of the file non-sparse via set_sparse is harmless.
*/
- if ((off > 8192) || (off + len + 8192 < i_size_read(inode)))
- return -EOPNOTSUPP;
+ if ((off > 8192) || (off + len + 8192 < i_size_read(inode))) {
+ rc = -EOPNOTSUPP;
+ free_xid(xid);
+ return rc;
+ }
rc = smb2_set_sparse(xid, tcon, cfile, inode, false);
}
@@ -2029,7 +2104,7 @@ smb3_create_lease_buf(u8 *lease_key, u8 oplock)
}
static __u8
-smb2_parse_lease_buf(void *buf, unsigned int *epoch)
+smb2_parse_lease_buf(void *buf, unsigned int *epoch, char *lease_key)
{
struct create_lease *lc = (struct create_lease *)buf;
@@ -2040,13 +2115,16 @@ smb2_parse_lease_buf(void *buf, unsigned int *epoch)
}
static __u8
-smb3_parse_lease_buf(void *buf, unsigned int *epoch)
+smb3_parse_lease_buf(void *buf, unsigned int *epoch, char *lease_key)
{
struct create_lease_v2 *lc = (struct create_lease_v2 *)buf;
*epoch = le16_to_cpu(lc->lcontext.Epoch);
if (lc->lcontext.LeaseFlags & SMB2_LEASE_FLAG_BREAK_IN_PROGRESS)
return SMB2_OPLOCK_LEVEL_NOCHANGE;
+ if (lease_key)
+ memcpy(lease_key, &lc->lcontext.LeaseKeyLow,
+ SMB2_LEASE_KEY_SIZE);
return le32_to_cpu(lc->lcontext.LeaseState);
}
@@ -2064,12 +2142,11 @@ smb2_dir_needs_close(struct cifsFileInfo *cfile)
}
static void
-fill_transform_hdr(struct TCP_Server_Info *server,
- struct smb2_transform_hdr *tr_hdr, struct smb_rqst *old_rq)
+fill_transform_hdr(struct smb2_transform_hdr *tr_hdr, unsigned int orig_len,
+ struct smb_rqst *old_rq)
{
struct smb2_sync_hdr *shdr =
(struct smb2_sync_hdr *)old_rq->rq_iov[1].iov_base;
- unsigned int orig_len = get_rfc1002_length(old_rq->rq_iov[0].iov_base);
memset(tr_hdr, 0, sizeof(struct smb2_transform_hdr));
tr_hdr->ProtocolId = SMB2_TRANSFORM_PROTO_NUM;
@@ -2077,8 +2154,6 @@ fill_transform_hdr(struct TCP_Server_Info *server,
tr_hdr->Flags = cpu_to_le16(0x01);
get_random_bytes(&tr_hdr->Nonce, SMB3_AES128CMM_NONCE);
memcpy(&tr_hdr->SessionId, &shdr->SessionId, 8);
- inc_rfc1001_len(tr_hdr, sizeof(struct smb2_transform_hdr) - server->vals->header_preamble_size);
- inc_rfc1001_len(tr_hdr, orig_len);
}
/* We can not use the normal sg_set_buf() as we will sometimes pass a
@@ -2090,11 +2165,16 @@ static inline void smb2_sg_set_buf(struct scatterlist *sg, const void *buf,
sg_set_page(sg, virt_to_page(buf), buflen, offset_in_page(buf));
}
+/* Assumes:
+ * rqst->rq_iov[0] is rfc1002 length
+ * rqst->rq_iov[1] is tranform header
+ * rqst->rq_iov[2+] data to be encrypted/decrypted
+ */
static struct scatterlist *
init_sg(struct smb_rqst *rqst, u8 *sign)
{
- unsigned int sg_len = rqst->rq_nvec + rqst->rq_npages + 1;
- unsigned int assoc_data_len = sizeof(struct smb2_transform_hdr) - 24;
+ unsigned int sg_len = rqst->rq_nvec + rqst->rq_npages;
+ unsigned int assoc_data_len = sizeof(struct smb2_transform_hdr) - 20;
struct scatterlist *sg;
unsigned int i;
unsigned int j;
@@ -2104,10 +2184,10 @@ init_sg(struct smb_rqst *rqst, u8 *sign)
return NULL;
sg_init_table(sg, sg_len);
- smb2_sg_set_buf(&sg[0], rqst->rq_iov[0].iov_base + 24, assoc_data_len);
- for (i = 1; i < rqst->rq_nvec; i++)
- smb2_sg_set_buf(&sg[i], rqst->rq_iov[i].iov_base,
- rqst->rq_iov[i].iov_len);
+ smb2_sg_set_buf(&sg[0], rqst->rq_iov[1].iov_base + 20, assoc_data_len);
+ for (i = 1; i < rqst->rq_nvec - 1; i++)
+ smb2_sg_set_buf(&sg[i], rqst->rq_iov[i+1].iov_base,
+ rqst->rq_iov[i+1].iov_len);
for (j = 0; i < sg_len - 1; i++, j++) {
unsigned int len = (j < rqst->rq_npages - 1) ? rqst->rq_pagesz
: rqst->rq_tailsz;
@@ -2139,9 +2219,10 @@ smb2_get_enc_key(struct TCP_Server_Info *server, __u64 ses_id, int enc, u8 *key)
}
/*
* Encrypt or decrypt @rqst message. @rqst has the following format:
- * iov[0] - transform header (associate data),
- * iov[1-N] and pages - data to encrypt.
- * On success return encrypted data in iov[1-N] and pages, leave iov[0]
+ * iov[0] - rfc1002 length
+ * iov[1] - transform header (associate data),
+ * iov[2-N] and pages - data to encrypt.
+ * On success return encrypted data in iov[2-N] and pages, leave iov[0-1]
* untouched.
*/
static int
@@ -2149,7 +2230,7 @@ crypt_message(struct TCP_Server_Info *server, struct smb_rqst *rqst, int enc)
{
struct smb2_transform_hdr *tr_hdr =
(struct smb2_transform_hdr *)rqst->rq_iov[0].iov_base;
- unsigned int assoc_data_len = sizeof(struct smb2_transform_hdr) - 20 - server->vals->header_preamble_size;
+ unsigned int assoc_data_len = sizeof(struct smb2_transform_hdr) - 20;
int rc = 0;
struct scatterlist *sg;
u8 sign[SMB2_SIGNATURE_SIZE] = {};
@@ -2236,6 +2317,10 @@ free_req:
return rc;
}
+/*
+ * This is called from smb_send_rqst. At this point we have the rfc1002
+ * header as the first element in the vector.
+ */
static int
smb3_init_transform_rq(struct TCP_Server_Info *server, struct smb_rqst *new_rq,
struct smb_rqst *old_rq)
@@ -2244,6 +2329,7 @@ smb3_init_transform_rq(struct TCP_Server_Info *server, struct smb_rqst *new_rq,
struct page **pages;
struct smb2_transform_hdr *tr_hdr;
unsigned int npages = old_rq->rq_npages;
+ unsigned int orig_len = get_rfc1002_length(old_rq->rq_iov[0].iov_base);
int i;
int rc = -ENOMEM;
@@ -2262,24 +2348,34 @@ smb3_init_transform_rq(struct TCP_Server_Info *server, struct smb_rqst *new_rq,
goto err_free_pages;
}
- iov = kmalloc_array(old_rq->rq_nvec, sizeof(struct kvec), GFP_KERNEL);
+ /* Make space for one extra iov to hold the transform header */
+ iov = kmalloc_array(old_rq->rq_nvec + 1, sizeof(struct kvec),
+ GFP_KERNEL);
if (!iov)
goto err_free_pages;
/* copy all iovs from the old except the 1st one (rfc1002 length) */
- memcpy(&iov[1], &old_rq->rq_iov[1],
+ memcpy(&iov[2], &old_rq->rq_iov[1],
sizeof(struct kvec) * (old_rq->rq_nvec - 1));
+ /* copy the rfc1002 iov */
+ iov[0].iov_base = old_rq->rq_iov[0].iov_base;
+ iov[0].iov_len = old_rq->rq_iov[0].iov_len;
+
new_rq->rq_iov = iov;
- new_rq->rq_nvec = old_rq->rq_nvec;
+ new_rq->rq_nvec = old_rq->rq_nvec + 1;
tr_hdr = kmalloc(sizeof(struct smb2_transform_hdr), GFP_KERNEL);
if (!tr_hdr)
goto err_free_iov;
- /* fill the 1st iov with a transform header */
- fill_transform_hdr(server, tr_hdr, old_rq);
- new_rq->rq_iov[0].iov_base = tr_hdr;
- new_rq->rq_iov[0].iov_len = sizeof(struct smb2_transform_hdr);
+ /* fill the 2nd iov with a transform header */
+ fill_transform_hdr(tr_hdr, orig_len, old_rq);
+ new_rq->rq_iov[1].iov_base = tr_hdr;
+ new_rq->rq_iov[1].iov_len = sizeof(struct smb2_transform_hdr);
+
+ /* Update rfc1002 header */
+ inc_rfc1001_len(new_rq->rq_iov[0].iov_base,
+ sizeof(struct smb2_transform_hdr));
/* copy pages form the old */
for (i = 0; i < npages; i++) {
@@ -2319,7 +2415,7 @@ smb3_free_transform_rq(struct smb_rqst *rqst)
put_page(rqst->rq_pages[i]);
kfree(rqst->rq_pages);
/* free transform header */
- kfree(rqst->rq_iov[0].iov_base);
+ kfree(rqst->rq_iov[1].iov_base);
kfree(rqst->rq_iov);
}
@@ -2336,18 +2432,19 @@ decrypt_raw_data(struct TCP_Server_Info *server, char *buf,
unsigned int buf_data_size, struct page **pages,
unsigned int npages, unsigned int page_data_size)
{
- struct kvec iov[2];
+ struct kvec iov[3];
struct smb_rqst rqst = {NULL};
- struct smb2_hdr *hdr;
int rc;
- iov[0].iov_base = buf;
- iov[0].iov_len = sizeof(struct smb2_transform_hdr);
- iov[1].iov_base = buf + sizeof(struct smb2_transform_hdr);
- iov[1].iov_len = buf_data_size;
+ iov[0].iov_base = NULL;
+ iov[0].iov_len = 0;
+ iov[1].iov_base = buf;
+ iov[1].iov_len = sizeof(struct smb2_transform_hdr);
+ iov[2].iov_base = buf + sizeof(struct smb2_transform_hdr);
+ iov[2].iov_len = buf_data_size;
rqst.rq_iov = iov;
- rqst.rq_nvec = 2;
+ rqst.rq_nvec = 3;
rqst.rq_pages = pages;
rqst.rq_npages = npages;
rqst.rq_pagesz = PAGE_SIZE;
@@ -2359,10 +2456,9 @@ decrypt_raw_data(struct TCP_Server_Info *server, char *buf,
if (rc)
return rc;
- memmove(buf + server->vals->header_preamble_size, iov[1].iov_base, buf_data_size);
- hdr = (struct smb2_hdr *)buf;
- hdr->smb2_buf_length = cpu_to_be32(buf_data_size + page_data_size);
- server->total_read = buf_data_size + page_data_size + server->vals->header_preamble_size;
+ memmove(buf, iov[2].iov_base, buf_data_size);
+
+ server->total_read = buf_data_size + page_data_size;
return rc;
}
@@ -2387,7 +2483,7 @@ read_data_into_pages(struct TCP_Server_Info *server, struct page **pages,
zero_user(page, len, PAGE_SIZE - len);
len = 0;
}
- length = cifs_read_page_from_socket(server, page, n);
+ length = cifs_read_page_from_socket(server, page, 0, n);
if (length < 0)
return length;
server->total_read += length;
@@ -2435,7 +2531,7 @@ handle_read_data(struct TCP_Server_Info *server, struct mid_q_entry *mid,
unsigned int cur_page_idx;
unsigned int pad_len;
struct cifs_readdata *rdata = mid->callback_data;
- struct smb2_sync_hdr *shdr = get_sync_hdr(buf);
+ struct smb2_sync_hdr *shdr = (struct smb2_sync_hdr *)buf;
struct bio_vec *bvec = NULL;
struct iov_iter iter;
struct kvec iov;
@@ -2466,7 +2562,7 @@ handle_read_data(struct TCP_Server_Info *server, struct mid_q_entry *mid,
return 0;
}
- data_offset = server->ops->read_data_offset(buf) + server->vals->header_preamble_size;
+ data_offset = server->ops->read_data_offset(buf);
#ifdef CONFIG_CIFS_SMB_DIRECT
use_rdma_mr = rdata->mr;
#endif
@@ -2562,12 +2658,11 @@ receive_encrypted_read(struct TCP_Server_Info *server, struct mid_q_entry **mid)
unsigned int npages;
struct page **pages;
unsigned int len;
- unsigned int buflen = server->pdu_size + server->vals->header_preamble_size;
+ unsigned int buflen = server->pdu_size;
int rc;
int i = 0;
- len = min_t(unsigned int, buflen, server->vals->read_rsp_size -
- server->vals->header_preamble_size +
+ len = min_t(unsigned int, buflen, server->vals->read_rsp_size +
sizeof(struct smb2_transform_hdr)) - HEADER_SIZE(server) + 1;
rc = cifs_read_from_socket(server, buf + HEADER_SIZE(server) - 1, len);
@@ -2575,8 +2670,7 @@ receive_encrypted_read(struct TCP_Server_Info *server, struct mid_q_entry **mid)
return rc;
server->total_read += rc;
- len = le32_to_cpu(tr_hdr->OriginalMessageSize) +
- server->vals->header_preamble_size -
+ len = le32_to_cpu(tr_hdr->OriginalMessageSize) -
server->vals->read_rsp_size;
npages = DIV_ROUND_UP(len, PAGE_SIZE);
@@ -2603,8 +2697,7 @@ receive_encrypted_read(struct TCP_Server_Info *server, struct mid_q_entry **mid)
if (rc)
goto free_pages;
- rc = decrypt_raw_data(server, buf, server->vals->read_rsp_size -
- server->vals->header_preamble_size,
+ rc = decrypt_raw_data(server, buf, server->vals->read_rsp_size,
pages, npages, len);
if (rc)
goto free_pages;
@@ -2641,7 +2734,7 @@ receive_encrypted_standard(struct TCP_Server_Info *server,
struct mid_q_entry *mid_entry;
/* switch to large buffer if too big for a small one */
- if (pdu_length + server->vals->header_preamble_size > MAX_CIFS_SMALL_BUFFER_SIZE) {
+ if (pdu_length > MAX_CIFS_SMALL_BUFFER_SIZE) {
server->large_buf = true;
memcpy(server->bigbuf, buf, server->total_read);
buf = server->bigbuf;
@@ -2649,13 +2742,12 @@ receive_encrypted_standard(struct TCP_Server_Info *server,
/* now read the rest */
length = cifs_read_from_socket(server, buf + HEADER_SIZE(server) - 1,
- pdu_length - HEADER_SIZE(server) + 1 +
- server->vals->header_preamble_size);
+ pdu_length - HEADER_SIZE(server) + 1);
if (length < 0)
return length;
server->total_read += length;
- buf_size = pdu_length + server->vals->header_preamble_size - sizeof(struct smb2_transform_hdr);
+ buf_size = pdu_length - sizeof(struct smb2_transform_hdr);
length = decrypt_raw_data(server, buf, buf_size, NULL, 0, 0);
if (length)
return length;
@@ -2684,7 +2776,7 @@ smb3_receive_transform(struct TCP_Server_Info *server, struct mid_q_entry **mid)
struct smb2_transform_hdr *tr_hdr = (struct smb2_transform_hdr *)buf;
unsigned int orig_len = le32_to_cpu(tr_hdr->OriginalMessageSize);
- if (pdu_length + server->vals->header_preamble_size < sizeof(struct smb2_transform_hdr) +
+ if (pdu_length < sizeof(struct smb2_transform_hdr) +
sizeof(struct smb2_sync_hdr)) {
cifs_dbg(VFS, "Transform message is too small (%u)\n",
pdu_length);
@@ -2693,14 +2785,14 @@ smb3_receive_transform(struct TCP_Server_Info *server, struct mid_q_entry **mid)
return -ECONNABORTED;
}
- if (pdu_length + server->vals->header_preamble_size < orig_len + sizeof(struct smb2_transform_hdr)) {
+ if (pdu_length < orig_len + sizeof(struct smb2_transform_hdr)) {
cifs_dbg(VFS, "Transform message is broken\n");
cifs_reconnect(server);
wake_up(&server->response_q);
return -ECONNABORTED;
}
- if (pdu_length + server->vals->header_preamble_size > CIFSMaxBufSize + MAX_HEADER_SIZE(server))
+ if (pdu_length > CIFSMaxBufSize + MAX_HEADER_SIZE(server))
return receive_encrypted_read(server, mid);
return receive_encrypted_standard(server, mid);
@@ -2711,11 +2803,23 @@ smb3_handle_read_data(struct TCP_Server_Info *server, struct mid_q_entry *mid)
{
char *buf = server->large_buf ? server->bigbuf : server->smallbuf;
- return handle_read_data(server, mid, buf, server->pdu_size +
- server->vals->header_preamble_size,
+ return handle_read_data(server, mid, buf, server->pdu_size,
NULL, 0, 0);
}
+static int
+smb2_next_header(char *buf)
+{
+ struct smb2_sync_hdr *hdr = (struct smb2_sync_hdr *)buf;
+ struct smb2_transform_hdr *t_hdr = (struct smb2_transform_hdr *)buf;
+
+ if (hdr->ProtocolId == SMB2_TRANSFORM_PROTO_NUM)
+ return sizeof(struct smb2_transform_hdr) +
+ le32_to_cpu(t_hdr->OriginalMessageSize);
+
+ return le32_to_cpu(hdr->NextCommand);
+}
+
struct smb_version_operations smb20_operations = {
.compare_fids = smb2_compare_fids,
.setup_request = smb2_setup_request,
@@ -2807,6 +2911,7 @@ struct smb_version_operations smb20_operations = {
.get_acl_by_fid = get_smb2_acl_by_fid,
.set_acl = set_smb2_acl,
#endif /* CIFS_ACL */
+ .next_header = smb2_next_header,
};
struct smb_version_operations smb21_operations = {
@@ -2901,6 +3006,7 @@ struct smb_version_operations smb21_operations = {
.get_acl_by_fid = get_smb2_acl_by_fid,
.set_acl = set_smb2_acl,
#endif /* CIFS_ACL */
+ .next_header = smb2_next_header,
};
struct smb_version_operations smb30_operations = {
@@ -3005,6 +3111,7 @@ struct smb_version_operations smb30_operations = {
.get_acl_by_fid = get_smb2_acl_by_fid,
.set_acl = set_smb2_acl,
#endif /* CIFS_ACL */
+ .next_header = smb2_next_header,
};
#ifdef CONFIG_CIFS_SMB311
@@ -3105,6 +3212,7 @@ struct smb_version_operations smb311_operations = {
.query_all_EAs = smb2_query_eas,
.set_EA = smb2_set_ea,
#endif /* CIFS_XATTR */
+ .next_header = smb2_next_header,
};
#endif /* CIFS_SMB311 */
@@ -3116,8 +3224,8 @@ struct smb_version_values smb20_values = {
.exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE_LOCK,
.shared_lock_type = SMB2_LOCKFLAG_SHARED_LOCK,
.unlock_lock_type = SMB2_LOCKFLAG_UNLOCK,
- .header_size = sizeof(struct smb2_hdr),
- .header_preamble_size = 4,
+ .header_size = sizeof(struct smb2_sync_hdr),
+ .header_preamble_size = 0,
.max_header_size = MAX_SMB2_HDR_SIZE,
.read_rsp_size = sizeof(struct smb2_read_rsp) - 1,
.lock_cmd = SMB2_LOCK,
@@ -3137,8 +3245,8 @@ struct smb_version_values smb21_values = {
.exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE_LOCK,
.shared_lock_type = SMB2_LOCKFLAG_SHARED_LOCK,
.unlock_lock_type = SMB2_LOCKFLAG_UNLOCK,
- .header_size = sizeof(struct smb2_hdr),
- .header_preamble_size = 4,
+ .header_size = sizeof(struct smb2_sync_hdr),
+ .header_preamble_size = 0,
.max_header_size = MAX_SMB2_HDR_SIZE,
.read_rsp_size = sizeof(struct smb2_read_rsp) - 1,
.lock_cmd = SMB2_LOCK,
@@ -3158,8 +3266,8 @@ struct smb_version_values smb3any_values = {
.exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE_LOCK,
.shared_lock_type = SMB2_LOCKFLAG_SHARED_LOCK,
.unlock_lock_type = SMB2_LOCKFLAG_UNLOCK,
- .header_size = sizeof(struct smb2_hdr),
- .header_preamble_size = 4,
+ .header_size = sizeof(struct smb2_sync_hdr),
+ .header_preamble_size = 0,
.max_header_size = MAX_SMB2_HDR_SIZE,
.read_rsp_size = sizeof(struct smb2_read_rsp) - 1,
.lock_cmd = SMB2_LOCK,
@@ -3179,8 +3287,8 @@ struct smb_version_values smbdefault_values = {
.exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE_LOCK,
.shared_lock_type = SMB2_LOCKFLAG_SHARED_LOCK,
.unlock_lock_type = SMB2_LOCKFLAG_UNLOCK,
- .header_size = sizeof(struct smb2_hdr),
- .header_preamble_size = 4,
+ .header_size = sizeof(struct smb2_sync_hdr),
+ .header_preamble_size = 0,
.max_header_size = MAX_SMB2_HDR_SIZE,
.read_rsp_size = sizeof(struct smb2_read_rsp) - 1,
.lock_cmd = SMB2_LOCK,
@@ -3200,8 +3308,8 @@ struct smb_version_values smb30_values = {
.exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE_LOCK,
.shared_lock_type = SMB2_LOCKFLAG_SHARED_LOCK,
.unlock_lock_type = SMB2_LOCKFLAG_UNLOCK,
- .header_size = sizeof(struct smb2_hdr),
- .header_preamble_size = 4,
+ .header_size = sizeof(struct smb2_sync_hdr),
+ .header_preamble_size = 0,
.max_header_size = MAX_SMB2_HDR_SIZE,
.read_rsp_size = sizeof(struct smb2_read_rsp) - 1,
.lock_cmd = SMB2_LOCK,
@@ -3221,8 +3329,8 @@ struct smb_version_values smb302_values = {
.exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE_LOCK,
.shared_lock_type = SMB2_LOCKFLAG_SHARED_LOCK,
.unlock_lock_type = SMB2_LOCKFLAG_UNLOCK,
- .header_size = sizeof(struct smb2_hdr),
- .header_preamble_size = 4,
+ .header_size = sizeof(struct smb2_sync_hdr),
+ .header_preamble_size = 0,
.max_header_size = MAX_SMB2_HDR_SIZE,
.read_rsp_size = sizeof(struct smb2_read_rsp) - 1,
.lock_cmd = SMB2_LOCK,
@@ -3243,8 +3351,8 @@ struct smb_version_values smb311_values = {
.exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE_LOCK,
.shared_lock_type = SMB2_LOCKFLAG_SHARED_LOCK,
.unlock_lock_type = SMB2_LOCKFLAG_UNLOCK,
- .header_size = sizeof(struct smb2_hdr),
- .header_preamble_size = 4,
+ .header_size = sizeof(struct smb2_sync_hdr),
+ .header_preamble_size = 0,
.max_header_size = MAX_SMB2_HDR_SIZE,
.read_rsp_size = sizeof(struct smb2_read_rsp) - 1,
.lock_cmd = SMB2_LOCK,
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 60db51bae0e3..281fbc1dc720 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -49,6 +49,7 @@
#include "cifspdu.h"
#include "cifs_spnego.h"
#include "smbdirect.h"
+#include "trace.h"
/*
* The following table defines the expected "StructureSize" of SMB2 requests
@@ -79,7 +80,7 @@ static const int smb2_req_struct_sizes[NUMBER_OF_SMB2_COMMANDS] = {
/* SMB2_OPLOCK_BREAK */ 24 /* BB this is 36 for LEASE_BREAK variant */
};
-static int encryption_required(const struct cifs_tcon *tcon)
+static int smb3_encryption_required(const struct cifs_tcon *tcon)
{
if (!tcon)
return 0;
@@ -145,7 +146,7 @@ smb2_hdr_assemble(struct smb2_sync_hdr *shdr, __le16 smb2_cmd,
shdr->Flags |= SMB2_FLAGS_DFS_OPERATIONS; */
if (tcon->ses && tcon->ses->server && tcon->ses->server->sign &&
- !encryption_required(tcon))
+ !smb3_encryption_required(tcon))
shdr->Flags |= SMB2_FLAGS_SIGNED;
out:
return;
@@ -367,6 +368,7 @@ smb2_plain_req_init(__le16 smb2_command, struct cifs_tcon *tcon,
#define SMB2_PREAUTH_INTEGRITY_CAPABILITIES cpu_to_le16(1)
#define SMB2_ENCRYPTION_CAPABILITIES cpu_to_le16(2)
+#define SMB2_POSIX_EXTENSIONS_AVAILABLE cpu_to_le16(0x100)
static void
build_preauth_ctxt(struct smb2_preauth_neg_context *pneg_ctxt)
@@ -390,21 +392,35 @@ build_encrypt_ctxt(struct smb2_encryption_neg_context *pneg_ctxt)
}
static void
+build_posix_ctxt(struct smb2_posix_neg_context *pneg_ctxt)
+{
+ pneg_ctxt->ContextType = SMB2_POSIX_EXTENSIONS_AVAILABLE;
+ pneg_ctxt->DataLength = cpu_to_le16(POSIX_CTXT_DATA_LEN);
+}
+
+static void
assemble_neg_contexts(struct smb2_negotiate_req *req,
unsigned int *total_len)
{
char *pneg_ctxt = (char *)req + OFFSET_OF_NEG_CONTEXT;
+ unsigned int ctxt_len;
+ *total_len += 2; /* Add 2 due to round to 8 byte boundary for 1st ctxt */
build_preauth_ctxt((struct smb2_preauth_neg_context *)pneg_ctxt);
- /* Add 2 to size to round to 8 byte boundary */
+ ctxt_len = DIV_ROUND_UP(sizeof(struct smb2_preauth_neg_context), 8) * 8;
+ *total_len += ctxt_len;
+ pneg_ctxt += ctxt_len;
- pneg_ctxt += 2 + sizeof(struct smb2_preauth_neg_context);
build_encrypt_ctxt((struct smb2_encryption_neg_context *)pneg_ctxt);
- req->NegotiateContextOffset = cpu_to_le32(OFFSET_OF_NEG_CONTEXT);
- req->NegotiateContextCount = cpu_to_le16(2);
+ ctxt_len = DIV_ROUND_UP(sizeof(struct smb2_encryption_neg_context), 8) * 8;
+ *total_len += ctxt_len;
+ pneg_ctxt += ctxt_len;
+
+ build_posix_ctxt((struct smb2_posix_neg_context *)pneg_ctxt);
+ *total_len += sizeof(struct smb2_posix_neg_context);
- *total_len += 4 + sizeof(struct smb2_preauth_neg_context)
- + sizeof(struct smb2_encryption_neg_context);
+ req->NegotiateContextOffset = cpu_to_le32(OFFSET_OF_NEG_CONTEXT);
+ req->NegotiateContextCount = cpu_to_le16(3);
}
static void decode_preauth_context(struct smb2_preauth_neg_context *ctxt)
@@ -449,12 +465,12 @@ static int decode_encrypt_ctx(struct TCP_Server_Info *server,
}
static int smb311_decode_neg_context(struct smb2_negotiate_rsp *rsp,
- struct TCP_Server_Info *server)
+ struct TCP_Server_Info *server,
+ unsigned int len_of_smb)
{
struct smb2_neg_context *pctx;
unsigned int offset = le32_to_cpu(rsp->NegotiateContextOffset);
unsigned int ctxt_cnt = le16_to_cpu(rsp->NegotiateContextCount);
- unsigned int len_of_smb = be32_to_cpu(rsp->hdr.smb2_buf_length);
unsigned int len_of_ctxts, i;
int rc = 0;
@@ -475,8 +491,7 @@ static int smb311_decode_neg_context(struct smb2_negotiate_rsp *rsp,
if (len_of_ctxts < sizeof(struct smb2_neg_context))
break;
- pctx = (struct smb2_neg_context *)(offset +
- server->vals->header_preamble_size + (char *)rsp);
+ pctx = (struct smb2_neg_context *)(offset + (char *)rsp);
clen = le16_to_cpu(pctx->DataLength);
if (clen > len_of_ctxts)
break;
@@ -487,6 +502,8 @@ static int smb311_decode_neg_context(struct smb2_negotiate_rsp *rsp,
else if (pctx->ContextType == SMB2_ENCRYPTION_CAPABILITIES)
rc = decode_encrypt_ctx(server,
(struct smb2_encryption_neg_context *)pctx);
+ else if (pctx->ContextType == SMB2_POSIX_EXTENSIONS_AVAILABLE)
+ server->posix_ext_supported = true;
else
cifs_dbg(VFS, "unknown negcontext of type %d ignored\n",
le16_to_cpu(pctx->ContextType));
@@ -501,6 +518,64 @@ static int smb311_decode_neg_context(struct smb2_negotiate_rsp *rsp,
return rc;
}
+static struct create_posix *
+create_posix_buf(umode_t mode)
+{
+ struct create_posix *buf;
+
+ buf = kzalloc(sizeof(struct create_posix),
+ GFP_KERNEL);
+ if (!buf)
+ return NULL;
+
+ buf->ccontext.DataOffset =
+ cpu_to_le16(offsetof(struct create_posix, Mode));
+ buf->ccontext.DataLength = cpu_to_le32(4);
+ buf->ccontext.NameOffset =
+ cpu_to_le16(offsetof(struct create_posix, Name));
+ buf->ccontext.NameLength = cpu_to_le16(16);
+
+ /* SMB2_CREATE_TAG_POSIX is "0x93AD25509CB411E7B42383DE968BCD7C" */
+ buf->Name[0] = 0x93;
+ buf->Name[1] = 0xAD;
+ buf->Name[2] = 0x25;
+ buf->Name[3] = 0x50;
+ buf->Name[4] = 0x9C;
+ buf->Name[5] = 0xB4;
+ buf->Name[6] = 0x11;
+ buf->Name[7] = 0xE7;
+ buf->Name[8] = 0xB4;
+ buf->Name[9] = 0x23;
+ buf->Name[10] = 0x83;
+ buf->Name[11] = 0xDE;
+ buf->Name[12] = 0x96;
+ buf->Name[13] = 0x8B;
+ buf->Name[14] = 0xCD;
+ buf->Name[15] = 0x7C;
+ buf->Mode = cpu_to_le32(mode);
+ cifs_dbg(FYI, "mode on posix create 0%o", mode);
+ return buf;
+}
+
+static int
+add_posix_context(struct kvec *iov, unsigned int *num_iovec, umode_t mode)
+{
+ struct smb2_create_req *req = iov[0].iov_base;
+ unsigned int num = *num_iovec;
+
+ iov[num].iov_base = create_posix_buf(mode);
+ if (iov[num].iov_base == NULL)
+ return -ENOMEM;
+ iov[num].iov_len = sizeof(struct create_posix);
+ if (!req->CreateContextsOffset)
+ req->CreateContextsOffset = cpu_to_le32(
+ sizeof(struct smb2_create_req) +
+ iov[num - 1].iov_len);
+ le32_add_cpu(&req->CreateContextsLength, sizeof(struct create_posix));
+ *num_iovec = num + 1;
+ return 0;
+}
+
#else
static void assemble_neg_contexts(struct smb2_negotiate_req *req,
unsigned int *total_len)
@@ -691,7 +766,7 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses)
server->capabilities |= SMB2_NT_FIND | SMB2_LARGE_FILES;
security_blob = smb2_get_data_area_len(&blob_offset, &blob_length,
- &rsp->hdr);
+ (struct smb2_sync_hdr *)rsp);
/*
* See MS-SMB2 section 2.2.4: if no blob, client picks default which
* for us will be
@@ -718,7 +793,8 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses)
#ifdef CONFIG_CIFS_SMB311
if (rsp->DialectRevision == cpu_to_le16(SMB311_PROT_ID)) {
if (rsp->NegotiateContextCount)
- rc = smb311_decode_neg_context(rsp, server);
+ rc = smb311_decode_neg_context(rsp, server,
+ rsp_iov.iov_len);
else
cifs_dbg(VFS, "Missing expected negotiate contexts\n");
}
@@ -730,19 +806,14 @@ neg_exit:
int smb3_validate_negotiate(const unsigned int xid, struct cifs_tcon *tcon)
{
- int rc = 0;
- struct validate_negotiate_info_req vneg_inbuf;
+ int rc;
+ struct validate_negotiate_info_req *pneg_inbuf;
struct validate_negotiate_info_rsp *pneg_rsp = NULL;
u32 rsplen;
u32 inbuflen; /* max of 4 dialects */
cifs_dbg(FYI, "validate negotiate\n");
-#ifdef CONFIG_CIFS_SMB_DIRECT
- if (tcon->ses->server->rdma)
- return 0;
-#endif
-
/* In SMB3.11 preauth integrity supersedes validate negotiate */
if (tcon->ses->server->dialect == SMB311_PROT_ID)
return 0;
@@ -765,63 +836,69 @@ int smb3_validate_negotiate(const unsigned int xid, struct cifs_tcon *tcon)
if (tcon->ses->session_flags & SMB2_SESSION_FLAG_IS_NULL)
cifs_dbg(VFS, "Unexpected null user (anonymous) auth flag sent by server\n");
- vneg_inbuf.Capabilities =
+ pneg_inbuf = kmalloc(sizeof(*pneg_inbuf), GFP_NOFS);
+ if (!pneg_inbuf)
+ return -ENOMEM;
+
+ pneg_inbuf->Capabilities =
cpu_to_le32(tcon->ses->server->vals->req_capabilities);
- memcpy(vneg_inbuf.Guid, tcon->ses->server->client_guid,
+ memcpy(pneg_inbuf->Guid, tcon->ses->server->client_guid,
SMB2_CLIENT_GUID_SIZE);
if (tcon->ses->sign)
- vneg_inbuf.SecurityMode =
+ pneg_inbuf->SecurityMode =
cpu_to_le16(SMB2_NEGOTIATE_SIGNING_REQUIRED);
else if (global_secflags & CIFSSEC_MAY_SIGN)
- vneg_inbuf.SecurityMode =
+ pneg_inbuf->SecurityMode =
cpu_to_le16(SMB2_NEGOTIATE_SIGNING_ENABLED);
else
- vneg_inbuf.SecurityMode = 0;
+ pneg_inbuf->SecurityMode = 0;
if (strcmp(tcon->ses->server->vals->version_string,
SMB3ANY_VERSION_STRING) == 0) {
- vneg_inbuf.Dialects[0] = cpu_to_le16(SMB30_PROT_ID);
- vneg_inbuf.Dialects[1] = cpu_to_le16(SMB302_PROT_ID);
- vneg_inbuf.DialectCount = cpu_to_le16(2);
+ pneg_inbuf->Dialects[0] = cpu_to_le16(SMB30_PROT_ID);
+ pneg_inbuf->Dialects[1] = cpu_to_le16(SMB302_PROT_ID);
+ pneg_inbuf->DialectCount = cpu_to_le16(2);
/* structure is big enough for 3 dialects, sending only 2 */
- inbuflen = sizeof(struct validate_negotiate_info_req) - 2;
+ inbuflen = sizeof(*pneg_inbuf) -
+ sizeof(pneg_inbuf->Dialects[0]);
} else if (strcmp(tcon->ses->server->vals->version_string,
SMBDEFAULT_VERSION_STRING) == 0) {
- vneg_inbuf.Dialects[0] = cpu_to_le16(SMB21_PROT_ID);
- vneg_inbuf.Dialects[1] = cpu_to_le16(SMB30_PROT_ID);
- vneg_inbuf.Dialects[2] = cpu_to_le16(SMB302_PROT_ID);
- vneg_inbuf.DialectCount = cpu_to_le16(3);
+ pneg_inbuf->Dialects[0] = cpu_to_le16(SMB21_PROT_ID);
+ pneg_inbuf->Dialects[1] = cpu_to_le16(SMB30_PROT_ID);
+ pneg_inbuf->Dialects[2] = cpu_to_le16(SMB302_PROT_ID);
+ pneg_inbuf->DialectCount = cpu_to_le16(3);
/* structure is big enough for 3 dialects */
- inbuflen = sizeof(struct validate_negotiate_info_req);
+ inbuflen = sizeof(*pneg_inbuf);
} else {
/* otherwise specific dialect was requested */
- vneg_inbuf.Dialects[0] =
+ pneg_inbuf->Dialects[0] =
cpu_to_le16(tcon->ses->server->vals->protocol_id);
- vneg_inbuf.DialectCount = cpu_to_le16(1);
+ pneg_inbuf->DialectCount = cpu_to_le16(1);
/* structure is big enough for 3 dialects, sending only 1 */
- inbuflen = sizeof(struct validate_negotiate_info_req) - 4;
+ inbuflen = sizeof(*pneg_inbuf) -
+ sizeof(pneg_inbuf->Dialects[0]) * 2;
}
rc = SMB2_ioctl(xid, tcon, NO_FILE_ID, NO_FILE_ID,
FSCTL_VALIDATE_NEGOTIATE_INFO, true /* is_fsctl */,
- (char *)&vneg_inbuf, sizeof(struct validate_negotiate_info_req),
- (char **)&pneg_rsp, &rsplen);
+ (char *)pneg_inbuf, inbuflen, (char **)&pneg_rsp, &rsplen);
if (rc != 0) {
cifs_dbg(VFS, "validate protocol negotiate failed: %d\n", rc);
- return -EIO;
+ rc = -EIO;
+ goto out_free_inbuf;
}
- if (rsplen != sizeof(struct validate_negotiate_info_rsp)) {
+ rc = -EIO;
+ if (rsplen != sizeof(*pneg_rsp)) {
cifs_dbg(VFS, "invalid protocol negotiate response size: %d\n",
rsplen);
/* relax check since Mac returns max bufsize allowed on ioctl */
- if ((rsplen > CIFSMaxBufSize)
- || (rsplen < sizeof(struct validate_negotiate_info_rsp)))
- goto err_rsp_free;
+ if (rsplen > CIFSMaxBufSize || rsplen < sizeof(*pneg_rsp))
+ goto out_free_rsp;
}
/* check validate negotiate info response matches what we got earlier */
@@ -838,15 +915,17 @@ int smb3_validate_negotiate(const unsigned int xid, struct cifs_tcon *tcon)
goto vneg_out;
/* validate negotiate successful */
+ rc = 0;
cifs_dbg(FYI, "validate negotiate info successful\n");
- kfree(pneg_rsp);
- return 0;
+ goto out_free_rsp;
vneg_out:
cifs_dbg(VFS, "protocol revalidation - security settings mismatch\n");
-err_rsp_free:
+out_free_rsp:
kfree(pneg_rsp);
- return -EIO;
+out_free_inbuf:
+ kfree(pneg_inbuf);
+ return rc;
}
enum securityEnum
@@ -1051,7 +1130,7 @@ SMB2_auth_kerberos(struct SMB2_sess_data *sess_data)
goto out_put_spnego_key;
rsp = (struct smb2_sess_setup_rsp *)sess_data->iov[0].iov_base;
- ses->Suid = rsp->hdr.sync_hdr.SessionId;
+ ses->Suid = rsp->sync_hdr.SessionId;
ses->session_flags = le16_to_cpu(rsp->SessionFlags);
@@ -1127,13 +1206,13 @@ SMB2_sess_auth_rawntlmssp_negotiate(struct SMB2_sess_data *sess_data)
/* If true, rc here is expected and not an error */
if (sess_data->buf0_type != CIFS_NO_BUFFER &&
- rsp->hdr.sync_hdr.Status == STATUS_MORE_PROCESSING_REQUIRED)
+ rsp->sync_hdr.Status == STATUS_MORE_PROCESSING_REQUIRED)
rc = 0;
if (rc)
goto out;
- if (offsetof(struct smb2_sess_setup_rsp, Buffer) - ses->server->vals->header_preamble_size !=
+ if (offsetof(struct smb2_sess_setup_rsp, Buffer) !=
le16_to_cpu(rsp->SecurityBufferOffset)) {
cifs_dbg(VFS, "Invalid security buffer offset %d\n",
le16_to_cpu(rsp->SecurityBufferOffset));
@@ -1148,7 +1227,7 @@ SMB2_sess_auth_rawntlmssp_negotiate(struct SMB2_sess_data *sess_data)
cifs_dbg(FYI, "rawntlmssp session setup challenge phase\n");
- ses->Suid = rsp->hdr.sync_hdr.SessionId;
+ ses->Suid = rsp->sync_hdr.SessionId;
ses->session_flags = le16_to_cpu(rsp->SessionFlags);
out:
@@ -1206,7 +1285,7 @@ SMB2_sess_auth_rawntlmssp_authenticate(struct SMB2_sess_data *sess_data)
rsp = (struct smb2_sess_setup_rsp *)sess_data->iov[0].iov_base;
- ses->Suid = rsp->hdr.sync_hdr.SessionId;
+ ses->Suid = rsp->sync_hdr.SessionId;
ses->session_flags = le16_to_cpu(rsp->SessionFlags);
rc = SMB2_sess_establish_session(sess_data);
@@ -1273,6 +1352,7 @@ SMB2_sess_setup(const unsigned int xid, struct cifs_ses *ses,
sess_data->ses = ses;
sess_data->buf0_type = CIFS_NO_BUFFER;
sess_data->nls_cp = (struct nls_table *) nls_cp;
+ sess_data->previous_session = ses->Suid;
#ifdef CONFIG_CIFS_SMB311
/*
@@ -1400,7 +1480,7 @@ SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree,
return rc;
}
- if (encryption_required(tcon))
+ if (smb3_encryption_required(tcon))
flags |= CIFS_TRANSFORM_REQ;
iov[0].iov_base = (char *)req;
@@ -1416,7 +1496,7 @@ SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree,
/* 3.11 tcon req must be signed if not encrypted. See MS-SMB2 3.2.4.1.1 */
if ((ses->server->dialect == SMB311_PROT_ID) &&
- !encryption_required(tcon))
+ !smb3_encryption_required(tcon))
req->sync_hdr.Flags |= SMB2_FLAGS_SIGNED;
rc = smb2_send_recv(xid, ses, iov, 2, &resp_buftype, flags, &rsp_iov);
@@ -1454,7 +1534,7 @@ SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree,
tcon->maximal_access = le32_to_cpu(rsp->MaximalAccess);
tcon->tidStatus = CifsGood;
tcon->need_reconnect = false;
- tcon->tid = rsp->hdr.sync_hdr.TreeId;
+ tcon->tid = rsp->sync_hdr.TreeId;
strlcpy(tcon->treeName, tree, sizeof(tcon->treeName));
if ((rsp->Capabilities & SMB2_SHARE_CAP_DFS) &&
@@ -1474,7 +1554,7 @@ tcon_exit:
return rc;
tcon_error_exit:
- if (rsp && rsp->hdr.sync_hdr.Status == STATUS_BAD_NETWORK_NAME) {
+ if (rsp && rsp->sync_hdr.Status == STATUS_BAD_NETWORK_NAME) {
cifs_dbg(VFS, "BAD_NETWORK_NAME: %s\n", tree);
}
goto tcon_exit;
@@ -1505,7 +1585,7 @@ SMB2_tdis(const unsigned int xid, struct cifs_tcon *tcon)
if (rc)
return rc;
- if (encryption_required(tcon))
+ if (smb3_encryption_required(tcon))
flags |= CIFS_TRANSFORM_REQ;
flags |= CIFS_NO_RESP;
@@ -1572,7 +1652,7 @@ create_reconnect_durable_buf(struct cifs_fid *fid)
static __u8
parse_lease_state(struct TCP_Server_Info *server, struct smb2_create_rsp *rsp,
- unsigned int *epoch)
+ unsigned int *epoch, char *lease_key)
{
char *data_offset;
struct create_context *cc;
@@ -1580,14 +1660,15 @@ parse_lease_state(struct TCP_Server_Info *server, struct smb2_create_rsp *rsp,
unsigned int remaining;
char *name;
- data_offset = (char *)rsp + server->vals->header_preamble_size + le32_to_cpu(rsp->CreateContextsOffset);
+ data_offset = (char *)rsp + le32_to_cpu(rsp->CreateContextsOffset);
remaining = le32_to_cpu(rsp->CreateContextsLength);
cc = (struct create_context *)data_offset;
while (remaining >= sizeof(struct create_context)) {
name = le16_to_cpu(cc->NameOffset) + (char *)cc;
if (le16_to_cpu(cc->NameLength) == 4 &&
strncmp(name, "RqLs", 4) == 0)
- return server->ops->parse_lease_buf(cc, epoch);
+ return server->ops->parse_lease_buf(cc, epoch,
+ lease_key);
next = le32_to_cpu(cc->Next);
if (!next)
@@ -1815,7 +1896,7 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path,
struct TCP_Server_Info *server;
struct cifs_tcon *tcon = oparms->tcon;
struct cifs_ses *ses = tcon->ses;
- struct kvec iov[4];
+ struct kvec iov[5]; /* make sure at least one for each open context */
struct kvec rsp_iov = {NULL, 0};
int resp_buftype;
int uni_path_len;
@@ -1824,7 +1905,7 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path,
int rc = 0;
unsigned int n_iov = 2;
__u32 file_attributes = 0;
- char *dhc_buf = NULL, *lc_buf = NULL;
+ char *dhc_buf = NULL, *lc_buf = NULL, *pc_buf = NULL;
int flags = 0;
unsigned int total_len;
@@ -1840,7 +1921,7 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path,
if (rc)
return rc;
- if (encryption_required(tcon))
+ if (smb3_encryption_required(tcon))
flags |= CIFS_TRANSFORM_REQ;
if (oparms->create_options & CREATE_OPTION_READONLY)
@@ -1941,6 +2022,27 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path,
dhc_buf = iov[n_iov-1].iov_base;
}
+#ifdef CONFIG_CIFS_SMB311
+ if (tcon->posix_extensions) {
+ if (n_iov > 2) {
+ struct create_context *ccontext =
+ (struct create_context *)iov[n_iov-1].iov_base;
+ ccontext->Next =
+ cpu_to_le32(iov[n_iov-1].iov_len);
+ }
+
+ rc = add_posix_context(iov, &n_iov, oparms->mode);
+ if (rc) {
+ cifs_small_buf_release(req);
+ kfree(copy_path);
+ kfree(lc_buf);
+ kfree(dhc_buf);
+ return rc;
+ }
+ pc_buf = iov[n_iov-1].iov_base;
+ }
+#endif /* SMB311 */
+
rc = smb2_send_recv(xid, ses, iov, n_iov, &resp_buftype, flags,
&rsp_iov);
cifs_small_buf_release(req);
@@ -1953,8 +2055,13 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path,
resp_buftype = CIFS_NO_BUFFER;
rsp = NULL;
}
+ trace_smb3_open_err(xid, tcon->tid, ses->Suid,
+ oparms->create_options, oparms->desired_access, rc);
goto creat_exit;
- }
+ } else
+ trace_smb3_open_done(xid, rsp->PersistentFileId, tcon->tid,
+ ses->Suid, oparms->create_options,
+ oparms->desired_access);
oparms->fid->persistent_fid = rsp->PersistentFileId;
oparms->fid->volatile_fid = rsp->VolatileFileId;
@@ -1969,13 +2076,15 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path,
}
if (rsp->OplockLevel == SMB2_OPLOCK_LEVEL_LEASE)
- *oplock = parse_lease_state(server, rsp, &oparms->fid->epoch);
+ *oplock = parse_lease_state(server, rsp, &oparms->fid->epoch,
+ oparms->fid->lease_key);
else
*oplock = rsp->OplockLevel;
creat_exit:
kfree(copy_path);
kfree(lc_buf);
kfree(dhc_buf);
+ kfree(pc_buf);
free_rsp_buf(resp_buftype, rsp);
return rc;
}
@@ -1991,7 +2100,6 @@ SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
{
struct smb2_ioctl_req *req;
struct smb2_ioctl_rsp *rsp;
- struct smb2_sync_hdr *shdr;
struct cifs_ses *ses;
struct kvec iov[2];
struct kvec rsp_iov;
@@ -2022,7 +2130,7 @@ SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
if (rc)
return rc;
- if (encryption_required(tcon))
+ if (smb3_encryption_required(tcon))
flags |= CIFS_TRANSFORM_REQ;
req->CtlCode = cpu_to_le32(opcode);
@@ -2085,6 +2193,10 @@ SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
cifs_small_buf_release(req);
rsp = (struct smb2_ioctl_rsp *)rsp_iov.iov_base;
+ if (rc != 0)
+ trace_smb3_fsctl_err(xid, persistent_fid, tcon->tid,
+ ses->Suid, 0, opcode, rc);
+
if ((rc != 0) && (rc != -EINVAL)) {
cifs_stats_fail_inc(tcon, SMB2_IOCTL_HE);
goto ioctl_exit;
@@ -2112,7 +2224,7 @@ SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
goto ioctl_exit;
}
- if (get_rfc1002_length(rsp) < le32_to_cpu(rsp->OutputOffset) + *plen) {
+ if (rsp_iov.iov_len < le32_to_cpu(rsp->OutputOffset) + *plen) {
cifs_dbg(VFS, "Malformed ioctl resp: len %d offset %d\n", *plen,
le32_to_cpu(rsp->OutputOffset));
*plen = 0;
@@ -2126,8 +2238,7 @@ SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
goto ioctl_exit;
}
- shdr = get_sync_hdr(rsp);
- memcpy(*out_data, (char *)shdr + le32_to_cpu(rsp->OutputOffset), *plen);
+ memcpy(*out_data, (char *)rsp + le32_to_cpu(rsp->OutputOffset), *plen);
ioctl_exit:
free_rsp_buf(resp_buftype, rsp);
return rc;
@@ -2159,8 +2270,8 @@ SMB2_set_compression(const unsigned int xid, struct cifs_tcon *tcon,
}
int
-SMB2_close(const unsigned int xid, struct cifs_tcon *tcon,
- u64 persistent_fid, u64 volatile_fid)
+SMB2_close_flags(const unsigned int xid, struct cifs_tcon *tcon,
+ u64 persistent_fid, u64 volatile_fid, int flags)
{
struct smb2_close_req *req;
struct smb2_close_rsp *rsp;
@@ -2169,7 +2280,6 @@ SMB2_close(const unsigned int xid, struct cifs_tcon *tcon,
struct kvec rsp_iov;
int resp_buftype;
int rc = 0;
- int flags = 0;
unsigned int total_len;
cifs_dbg(FYI, "Close\n");
@@ -2181,7 +2291,7 @@ SMB2_close(const unsigned int xid, struct cifs_tcon *tcon,
if (rc)
return rc;
- if (encryption_required(tcon))
+ if (smb3_encryption_required(tcon))
flags |= CIFS_TRANSFORM_REQ;
req->PersistentFileId = persistent_fid;
@@ -2196,6 +2306,8 @@ SMB2_close(const unsigned int xid, struct cifs_tcon *tcon,
if (rc != 0) {
cifs_stats_fail_inc(tcon, SMB2_CLOSE_HE);
+ trace_smb3_close_err(xid, persistent_fid, tcon->tid, ses->Suid,
+ rc);
goto close_exit;
}
@@ -2206,14 +2318,20 @@ close_exit:
return rc;
}
+int
+SMB2_close(const unsigned int xid, struct cifs_tcon *tcon,
+ u64 persistent_fid, u64 volatile_fid)
+{
+ return SMB2_close_flags(xid, tcon, persistent_fid, volatile_fid, 0);
+}
+
static int
-validate_iov(struct TCP_Server_Info *server,
- unsigned int offset, unsigned int buffer_length,
+validate_iov(unsigned int offset, unsigned int buffer_length,
struct kvec *iov, unsigned int min_buf_size)
{
unsigned int smb_len = iov->iov_len;
- char *end_of_smb = smb_len + server->vals->header_preamble_size + (char *)iov->iov_base;
- char *begin_of_buf = server->vals->header_preamble_size + offset + (char *)iov->iov_base;
+ char *end_of_smb = smb_len + (char *)iov->iov_base;
+ char *begin_of_buf = offset + (char *)iov->iov_base;
char *end_of_buf = begin_of_buf + buffer_length;
@@ -2243,18 +2361,17 @@ validate_iov(struct TCP_Server_Info *server,
* Caller must free buffer.
*/
static int
-validate_and_copy_iov(struct TCP_Server_Info *server,
- unsigned int offset, unsigned int buffer_length,
+validate_and_copy_iov(unsigned int offset, unsigned int buffer_length,
struct kvec *iov, unsigned int minbufsize,
char *data)
{
- char *begin_of_buf = server->vals->header_preamble_size + offset + (char *)(iov->iov_base);
+ char *begin_of_buf = offset + (char *)iov->iov_base;
int rc;
if (!data)
return -EINVAL;
- rc = validate_iov(server, offset, buffer_length, iov, minbufsize);
+ rc = validate_iov(offset, buffer_length, iov, minbufsize);
if (rc)
return rc;
@@ -2289,7 +2406,7 @@ query_info(const unsigned int xid, struct cifs_tcon *tcon,
if (rc)
return rc;
- if (encryption_required(tcon))
+ if (smb3_encryption_required(tcon))
flags |= CIFS_TRANSFORM_REQ;
req->InfoType = info_type;
@@ -2315,6 +2432,8 @@ query_info(const unsigned int xid, struct cifs_tcon *tcon,
if (rc) {
cifs_stats_fail_inc(tcon, SMB2_QUERY_INFO_HE);
+ trace_smb3_query_info_err(xid, persistent_fid, tcon->tid,
+ ses->Suid, info_class, (__u32)info_type, rc);
goto qinf_exit;
}
@@ -2332,8 +2451,7 @@ query_info(const unsigned int xid, struct cifs_tcon *tcon,
}
}
- rc = validate_and_copy_iov(ses->server,
- le16_to_cpu(rsp->OutputBufferOffset),
+ rc = validate_and_copy_iov(le16_to_cpu(rsp->OutputBufferOffset),
le32_to_cpu(rsp->OutputBufferLength),
&rsp_iov, min_len, *data);
@@ -2404,7 +2522,7 @@ smb2_echo_callback(struct mid_q_entry *mid)
unsigned int credits_received = 1;
if (mid->mid_state == MID_RESPONSE_RECEIVED)
- credits_received = le16_to_cpu(rsp->hdr.sync_hdr.CreditRequest);
+ credits_received = le16_to_cpu(rsp->sync_hdr.CreditRequest);
DeleteMidQEntry(mid);
add_credits(server, credits_received, CIFS_ECHO_OP);
@@ -2533,7 +2651,7 @@ SMB2_flush(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
if (rc)
return rc;
- if (encryption_required(tcon))
+ if (smb3_encryption_required(tcon))
flags |= CIFS_TRANSFORM_REQ;
req->PersistentFileId = persistent_fid;
@@ -2545,8 +2663,11 @@ SMB2_flush(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
rc = smb2_send_recv(xid, ses, iov, 1, &resp_buftype, flags, &rsp_iov);
cifs_small_buf_release(req);
- if (rc != 0)
+ if (rc != 0) {
cifs_stats_fail_inc(tcon, SMB2_FLUSH_HE);
+ trace_smb3_flush_err(xid, persistent_fid, tcon->tid, ses->Suid,
+ rc);
+ }
free_rsp_buf(resp_buftype, rsp_iov.iov_base);
return rc;
@@ -2655,11 +2776,12 @@ smb2_readv_callback(struct mid_q_entry *mid)
struct cifs_tcon *tcon = tlink_tcon(rdata->cfile->tlink);
struct TCP_Server_Info *server = tcon->ses->server;
struct smb2_sync_hdr *shdr =
- (struct smb2_sync_hdr *)rdata->iov[1].iov_base;
+ (struct smb2_sync_hdr *)rdata->iov[0].iov_base;
unsigned int credits_received = 1;
struct smb_rqst rqst = { .rq_iov = rdata->iov,
.rq_nvec = 2,
.rq_pages = rdata->pages,
+ .rq_offset = rdata->page_offset,
.rq_npages = rdata->nr_pages,
.rq_pagesz = rdata->pagesz,
.rq_tailsz = rdata->tailsz };
@@ -2757,7 +2879,7 @@ smb2_async_readv(struct cifs_readdata *rdata)
return rc;
}
- if (encryption_required(io_parms.tcon))
+ if (smb3_encryption_required(io_parms.tcon))
flags |= CIFS_TRANSFORM_REQ;
req_len = cpu_to_be32(total_len);
@@ -2788,7 +2910,13 @@ smb2_async_readv(struct cifs_readdata *rdata)
if (rc) {
kref_put(&rdata->refcount, cifs_readdata_release);
cifs_stats_fail_inc(io_parms.tcon, SMB2_READ_HE);
- }
+ trace_smb3_read_err(rc, 0 /* xid */, io_parms.persistent_fid,
+ io_parms.tcon->tid, io_parms.tcon->ses->Suid,
+ io_parms.offset, io_parms.length);
+ } else
+ trace_smb3_read_done(0 /* xid */, io_parms.persistent_fid,
+ io_parms.tcon->tid, io_parms.tcon->ses->Suid,
+ io_parms.offset, io_parms.length);
cifs_small_buf_release(buf);
return rc;
@@ -2801,7 +2929,6 @@ SMB2_read(const unsigned int xid, struct cifs_io_parms *io_parms,
int resp_buftype, rc = -EACCES;
struct smb2_read_plain_req *req = NULL;
struct smb2_read_rsp *rsp = NULL;
- struct smb2_sync_hdr *shdr;
struct kvec iov[1];
struct kvec rsp_iov;
unsigned int total_len;
@@ -2813,7 +2940,7 @@ SMB2_read(const unsigned int xid, struct cifs_io_parms *io_parms,
if (rc)
return rc;
- if (encryption_required(io_parms->tcon))
+ if (smb3_encryption_required(io_parms->tcon))
flags |= CIFS_TRANSFORM_REQ;
iov[0].iov_base = (char *)req;
@@ -2829,9 +2956,15 @@ SMB2_read(const unsigned int xid, struct cifs_io_parms *io_parms,
cifs_stats_fail_inc(io_parms->tcon, SMB2_READ_HE);
cifs_dbg(VFS, "Send error in read = %d\n", rc);
}
+ trace_smb3_read_err(rc, xid, req->PersistentFileId,
+ io_parms->tcon->tid, ses->Suid,
+ io_parms->offset, io_parms->length);
free_rsp_buf(resp_buftype, rsp_iov.iov_base);
return rc == -ENODATA ? 0 : rc;
- }
+ } else
+ trace_smb3_read_done(xid, req->PersistentFileId,
+ io_parms->tcon->tid, ses->Suid,
+ io_parms->offset, io_parms->length);
*nbytes = le32_to_cpu(rsp->DataLength);
if ((*nbytes > CIFS_MAX_MSGSIZE) ||
@@ -2842,10 +2975,8 @@ SMB2_read(const unsigned int xid, struct cifs_io_parms *io_parms,
*nbytes = 0;
}
- shdr = get_sync_hdr(rsp);
-
if (*buf) {
- memcpy(*buf, (char *)shdr + rsp->DataOffset, *nbytes);
+ memcpy(*buf, (char *)rsp + rsp->DataOffset, *nbytes);
free_rsp_buf(resp_buftype, rsp_iov.iov_base);
} else if (resp_buftype != CIFS_NO_BUFFER) {
*buf = rsp_iov.iov_base;
@@ -2872,7 +3003,7 @@ smb2_writev_callback(struct mid_q_entry *mid)
switch (mid->mid_state) {
case MID_RESPONSE_RECEIVED:
- credits_received = le16_to_cpu(rsp->hdr.sync_hdr.CreditRequest);
+ credits_received = le16_to_cpu(rsp->sync_hdr.CreditRequest);
wdata->result = smb2_check_receive(mid, tcon->ses->server, 0);
if (wdata->result != 0)
break;
@@ -2949,7 +3080,7 @@ smb2_async_writev(struct cifs_writedata *wdata,
goto async_writev_out;
}
- if (encryption_required(tcon))
+ if (smb3_encryption_required(tcon))
flags |= CIFS_TRANSFORM_REQ;
shdr = (struct smb2_sync_hdr *)req;
@@ -3010,6 +3141,7 @@ smb2_async_writev(struct cifs_writedata *wdata,
rqst.rq_iov = iov;
rqst.rq_nvec = 2;
rqst.rq_pages = wdata->pages;
+ rqst.rq_offset = wdata->page_offset;
rqst.rq_npages = wdata->nr_pages;
rqst.rq_pagesz = wdata->pagesz;
rqst.rq_tailsz = wdata->tailsz;
@@ -3047,9 +3179,15 @@ smb2_async_writev(struct cifs_writedata *wdata,
wdata, flags);
if (rc) {
+ trace_smb3_write_err(0 /* no xid */, req->PersistentFileId,
+ tcon->tid, tcon->ses->Suid, wdata->offset,
+ wdata->bytes, rc);
kref_put(&wdata->refcount, release);
cifs_stats_fail_inc(tcon, SMB2_WRITE_HE);
- }
+ } else
+ trace_smb3_write_done(0 /* no xid */, req->PersistentFileId,
+ tcon->tid, tcon->ses->Suid, wdata->offset,
+ wdata->bytes);
async_writev_out:
cifs_small_buf_release(req);
@@ -3087,7 +3225,7 @@ SMB2_write(const unsigned int xid, struct cifs_io_parms *io_parms,
if (io_parms->tcon->ses->server == NULL)
return -ECONNABORTED;
- if (encryption_required(io_parms->tcon))
+ if (smb3_encryption_required(io_parms->tcon))
flags |= CIFS_TRANSFORM_REQ;
req->sync_hdr.ProcessId = cpu_to_le32(io_parms->pid);
@@ -3113,10 +3251,19 @@ SMB2_write(const unsigned int xid, struct cifs_io_parms *io_parms,
rsp = (struct smb2_write_rsp *)rsp_iov.iov_base;
if (rc) {
+ trace_smb3_write_err(xid, req->PersistentFileId,
+ io_parms->tcon->tid,
+ io_parms->tcon->ses->Suid,
+ io_parms->offset, io_parms->length, rc);
cifs_stats_fail_inc(io_parms->tcon, SMB2_WRITE_HE);
cifs_dbg(VFS, "Send error in write = %d\n", rc);
- } else
+ } else {
*nbytes = le32_to_cpu(rsp->DataLength);
+ trace_smb3_write_done(xid, req->PersistentFileId,
+ io_parms->tcon->tid,
+ io_parms->tcon->ses->Suid,
+ io_parms->offset, *nbytes);
+ }
free_rsp_buf(resp_buftype, rsp);
return rc;
@@ -3197,7 +3344,7 @@ SMB2_query_directory(const unsigned int xid, struct cifs_tcon *tcon,
if (rc)
return rc;
- if (encryption_required(tcon))
+ if (smb3_encryption_required(tcon))
flags |= CIFS_TRANSFORM_REQ;
switch (srch_inf->info_level) {
@@ -3248,7 +3395,7 @@ SMB2_query_directory(const unsigned int xid, struct cifs_tcon *tcon,
if (rc) {
if (rc == -ENODATA &&
- rsp->hdr.sync_hdr.Status == STATUS_NO_MORE_FILES) {
+ rsp->sync_hdr.Status == STATUS_NO_MORE_FILES) {
srch_inf->endOfSearch = true;
rc = 0;
}
@@ -3256,8 +3403,7 @@ SMB2_query_directory(const unsigned int xid, struct cifs_tcon *tcon,
goto qdir_exit;
}
- rc = validate_iov(server,
- le16_to_cpu(rsp->OutputBufferOffset),
+ rc = validate_iov(le16_to_cpu(rsp->OutputBufferOffset),
le32_to_cpu(rsp->OutputBufferLength), &rsp_iov,
info_buf_size);
if (rc)
@@ -3272,10 +3418,9 @@ SMB2_query_directory(const unsigned int xid, struct cifs_tcon *tcon,
cifs_buf_release(srch_inf->ntwrk_buf_start);
}
srch_inf->ntwrk_buf_start = (char *)rsp;
- srch_inf->srch_entries_start = srch_inf->last_entry = 4 /* rfclen */ +
- (char *)&rsp->hdr + le16_to_cpu(rsp->OutputBufferOffset);
- /* 4 for rfc1002 length field */
- end_of_smb = get_rfc1002_length(rsp) + 4 + (char *)&rsp->hdr;
+ srch_inf->srch_entries_start = srch_inf->last_entry =
+ (char *)rsp + le16_to_cpu(rsp->OutputBufferOffset);
+ end_of_smb = rsp_iov.iov_len + (char *)rsp;
srch_inf->entries_in_buffer =
num_entries(srch_inf->srch_entries_start, end_of_smb,
&srch_inf->last_entry, info_buf_size);
@@ -3330,7 +3475,7 @@ send_set_info(const unsigned int xid, struct cifs_tcon *tcon,
return rc;
}
- if (encryption_required(tcon))
+ if (smb3_encryption_required(tcon))
flags |= CIFS_TRANSFORM_REQ;
req->sync_hdr.ProcessId = cpu_to_le32(pid);
@@ -3363,8 +3508,11 @@ send_set_info(const unsigned int xid, struct cifs_tcon *tcon,
cifs_small_buf_release(req);
rsp = (struct smb2_set_info_rsp *)rsp_iov.iov_base;
- if (rc != 0)
+ if (rc != 0) {
cifs_stats_fail_inc(tcon, SMB2_SET_INFO_HE);
+ trace_smb3_set_info_err(xid, persistent_fid, tcon->tid,
+ ses->Suid, info_class, (__u32)info_type, rc);
+ }
free_rsp_buf(resp_buftype, rsp);
kfree(iov);
@@ -3511,7 +3659,7 @@ SMB2_oplock_break(const unsigned int xid, struct cifs_tcon *tcon,
__u8 oplock_level)
{
int rc;
- struct smb2_oplock_break_req *req = NULL;
+ struct smb2_oplock_break *req = NULL;
struct cifs_ses *ses = tcon->ses;
int flags = CIFS_OBREAK_OP;
unsigned int total_len;
@@ -3525,7 +3673,7 @@ SMB2_oplock_break(const unsigned int xid, struct cifs_tcon *tcon,
if (rc)
return rc;
- if (encryption_required(tcon))
+ if (smb3_encryption_required(tcon))
flags |= CIFS_TRANSFORM_REQ;
req->VolatileFid = volatile_fid;
@@ -3590,7 +3738,7 @@ build_qfs_info_req(struct kvec *iov, struct cifs_tcon *tcon, int level,
req->InputBufferOffset =
cpu_to_le16(sizeof(struct smb2_query_info_req) - 1);
req->OutputBufferLength = cpu_to_le32(
- outbuf_len + sizeof(struct smb2_query_info_rsp) - 1 - server->vals->header_preamble_size);
+ outbuf_len + sizeof(struct smb2_query_info_rsp) - 1);
iov->iov_base = (char *)req;
iov->iov_len = total_len;
@@ -3607,7 +3755,6 @@ SMB2_QFS_info(const unsigned int xid, struct cifs_tcon *tcon,
int rc = 0;
int resp_buftype;
struct cifs_ses *ses = tcon->ses;
- struct TCP_Server_Info *server = ses->server;
struct smb2_fs_full_size_info *info = NULL;
int flags = 0;
@@ -3617,7 +3764,7 @@ SMB2_QFS_info(const unsigned int xid, struct cifs_tcon *tcon,
if (rc)
return rc;
- if (encryption_required(tcon))
+ if (smb3_encryption_required(tcon))
flags |= CIFS_TRANSFORM_REQ;
rc = smb2_send_recv(xid, ses, &iov, 1, &resp_buftype, flags, &rsp_iov);
@@ -3628,10 +3775,9 @@ SMB2_QFS_info(const unsigned int xid, struct cifs_tcon *tcon,
}
rsp = (struct smb2_query_info_rsp *)rsp_iov.iov_base;
- info = (struct smb2_fs_full_size_info *)(server->vals->header_preamble_size +
- le16_to_cpu(rsp->OutputBufferOffset) + (char *)&rsp->hdr);
- rc = validate_iov(server,
- le16_to_cpu(rsp->OutputBufferOffset),
+ info = (struct smb2_fs_full_size_info *)(
+ le16_to_cpu(rsp->OutputBufferOffset) + (char *)rsp);
+ rc = validate_iov(le16_to_cpu(rsp->OutputBufferOffset),
le32_to_cpu(rsp->OutputBufferLength), &rsp_iov,
sizeof(struct smb2_fs_full_size_info));
if (!rc)
@@ -3652,7 +3798,6 @@ SMB2_QFS_attr(const unsigned int xid, struct cifs_tcon *tcon,
int rc = 0;
int resp_buftype, max_len, min_len;
struct cifs_ses *ses = tcon->ses;
- struct TCP_Server_Info *server = ses->server;
unsigned int rsp_len, offset;
int flags = 0;
@@ -3675,7 +3820,7 @@ SMB2_QFS_attr(const unsigned int xid, struct cifs_tcon *tcon,
if (rc)
return rc;
- if (encryption_required(tcon))
+ if (smb3_encryption_required(tcon))
flags |= CIFS_TRANSFORM_REQ;
rc = smb2_send_recv(xid, ses, &iov, 1, &resp_buftype, flags, &rsp_iov);
@@ -3688,20 +3833,20 @@ SMB2_QFS_attr(const unsigned int xid, struct cifs_tcon *tcon,
rsp_len = le32_to_cpu(rsp->OutputBufferLength);
offset = le16_to_cpu(rsp->OutputBufferOffset);
- rc = validate_iov(server, offset, rsp_len, &rsp_iov, min_len);
+ rc = validate_iov(offset, rsp_len, &rsp_iov, min_len);
if (rc)
goto qfsattr_exit;
if (level == FS_ATTRIBUTE_INFORMATION)
- memcpy(&tcon->fsAttrInfo, server->vals->header_preamble_size + offset
- + (char *)&rsp->hdr, min_t(unsigned int,
+ memcpy(&tcon->fsAttrInfo, offset
+ + (char *)rsp, min_t(unsigned int,
rsp_len, max_len));
else if (level == FS_DEVICE_INFORMATION)
- memcpy(&tcon->fsDevInfo, server->vals->header_preamble_size + offset
- + (char *)&rsp->hdr, sizeof(FILE_SYSTEM_DEVICE_INFO));
+ memcpy(&tcon->fsDevInfo, offset
+ + (char *)rsp, sizeof(FILE_SYSTEM_DEVICE_INFO));
else if (level == FS_SECTOR_SIZE_INFORMATION) {
struct smb3_fs_ss_info *ss_info = (struct smb3_fs_ss_info *)
- (server->vals->header_preamble_size + offset + (char *)&rsp->hdr);
+ (offset + (char *)rsp);
tcon->ss_flags = le32_to_cpu(ss_info->Flags);
tcon->perf_sector_size =
le32_to_cpu(ss_info->PhysicalBytesPerSectorForPerf);
@@ -3732,7 +3877,7 @@ smb2_lockv(const unsigned int xid, struct cifs_tcon *tcon,
if (rc)
return rc;
- if (encryption_required(tcon))
+ if (smb3_encryption_required(tcon))
flags |= CIFS_TRANSFORM_REQ;
req->sync_hdr.ProcessId = cpu_to_le32(pid);
@@ -3755,6 +3900,8 @@ smb2_lockv(const unsigned int xid, struct cifs_tcon *tcon,
if (rc) {
cifs_dbg(FYI, "Send error in smb2_lockv = %d\n", rc);
cifs_stats_fail_inc(tcon, SMB2_LOCK_HE);
+ trace_smb3_lock_err(xid, persist_fid, tcon->tid,
+ tcon->ses->Suid, rc);
}
return rc;
@@ -3796,7 +3943,7 @@ SMB2_lease_break(const unsigned int xid, struct cifs_tcon *tcon,
if (rc)
return rc;
- if (encryption_required(tcon))
+ if (smb3_encryption_required(tcon))
flags |= CIFS_TRANSFORM_REQ;
req->sync_hdr.CreditRequest = cpu_to_le16(1);
diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h
index d28f358022c5..a345560001ce 100644
--- a/fs/cifs/smb2pdu.h
+++ b/fs/cifs/smb2pdu.h
@@ -122,25 +122,10 @@ struct smb2_sync_pdu {
__le16 StructureSize2; /* size of wct area (varies, request specific) */
} __packed;
-struct smb2_hdr {
- __be32 smb2_buf_length; /* big endian on wire */
- /* length is only two or three bytes - with */
- /* one or two byte type preceding it that MBZ */
- struct smb2_sync_hdr sync_hdr;
-} __packed;
-
-struct smb2_pdu {
- struct smb2_hdr hdr;
- __le16 StructureSize2; /* size of wct area (varies, request specific) */
-} __packed;
-
#define SMB3_AES128CMM_NONCE 11
#define SMB3_AES128GCM_NONCE 12
struct smb2_transform_hdr {
- __be32 smb2_buf_length; /* big endian on wire */
- /* length is only two or three bytes - with
- one or two byte type preceding it that MBZ */
__le32 ProtocolId; /* 0xFD 'S' 'M' 'B' */
__u8 Signature[16];
__u8 Nonce[16];
@@ -171,7 +156,7 @@ struct smb2_transform_hdr {
#define SMB2_ERROR_STRUCTURE_SIZE2 cpu_to_le16(9)
struct smb2_err_rsp {
- struct smb2_hdr hdr;
+ struct smb2_sync_hdr sync_hdr;
__le16 StructureSize;
__le16 Reserved; /* MBZ */
__le32 ByteCount; /* even if zero, at least one byte follows */
@@ -300,8 +285,16 @@ struct smb2_encryption_neg_context {
__le16 Ciphers[1]; /* Ciphers[0] since only one used now */
} __packed;
+#define POSIX_CTXT_DATA_LEN 8
+struct smb2_posix_neg_context {
+ __le16 ContextType; /* 0x100 */
+ __le16 DataLength;
+ __le32 Reserved;
+ __le64 Reserved1; /* In case needed for future (eg version or caps) */
+} __packed;
+
struct smb2_negotiate_rsp {
- struct smb2_hdr hdr;
+ struct smb2_sync_hdr sync_hdr;
__le16 StructureSize; /* Must be 65 */
__le16 SecurityMode;
__le16 DialectRevision;
@@ -341,7 +334,7 @@ struct smb2_sess_setup_req {
#define SMB2_SESSION_FLAG_IS_NULL 0x0002
#define SMB2_SESSION_FLAG_ENCRYPT_DATA 0x0004
struct smb2_sess_setup_rsp {
- struct smb2_hdr hdr;
+ struct smb2_sync_hdr sync_hdr;
__le16 StructureSize; /* Must be 9 */
__le16 SessionFlags;
__le16 SecurityBufferOffset;
@@ -356,7 +349,7 @@ struct smb2_logoff_req {
} __packed;
struct smb2_logoff_rsp {
- struct smb2_hdr hdr;
+ struct smb2_sync_hdr sync_hdr;
__le16 StructureSize; /* Must be 4 */
__le16 Reserved;
} __packed;
@@ -452,7 +445,7 @@ struct smb2_tree_connect_req_extension {
} __packed;
struct smb2_tree_connect_rsp {
- struct smb2_hdr hdr;
+ struct smb2_sync_hdr sync_hdr;
__le16 StructureSize; /* Must be 16 */
__u8 ShareType; /* see below */
__u8 Reserved;
@@ -503,7 +496,7 @@ struct smb2_tree_disconnect_req {
} __packed;
struct smb2_tree_disconnect_rsp {
- struct smb2_hdr hdr;
+ struct smb2_sync_hdr sync_hdr;
__le16 StructureSize; /* Must be 4 */
__le16 Reserved;
} __packed;
@@ -615,7 +608,9 @@ struct smb2_tree_disconnect_rsp {
#define SMB2_CREATE_DURABLE_HANDLE_REQUEST_V2 "DH2Q"
#define SMB2_CREATE_DURABLE_HANDLE_RECONNECT_V2 "DH2C"
#define SMB2_CREATE_APP_INSTANCE_ID 0x45BCA66AEFA7F74A9008FA462E144D74
-#define SVHDX_OPEN_DEVICE_CONTEXT 0x83CE6F1AD851E0986E34401CC9BCFCE9
+#define SVHDX_OPEN_DEVICE_CONTEX 0x9CCBCF9E04C1E643980E158DA1F6EC83
+#define SMB2_CREATE_TAG_POSIX 0x93AD25509CB411E7B42383DE968BCD7C
+
struct smb2_create_req {
struct smb2_sync_hdr sync_hdr;
@@ -638,7 +633,7 @@ struct smb2_create_req {
} __packed;
struct smb2_create_rsp {
- struct smb2_hdr hdr;
+ struct smb2_sync_hdr sync_hdr;
__le16 StructureSize; /* Must be 89 */
__u8 OplockLevel;
__u8 Reserved;
@@ -727,6 +722,13 @@ struct create_durable {
} Data;
} __packed;
+struct create_posix {
+ struct create_context ccontext;
+ __u8 Name[16];
+ __le32 Mode;
+ __u32 Reserved;
+} __packed;
+
/* See MS-SMB2 2.2.13.2.11 */
/* Flags */
#define SMB2_DHANDLE_FLAG_PERSISTENT 0x00000002
@@ -894,7 +896,7 @@ struct smb2_ioctl_req {
} __packed;
struct smb2_ioctl_rsp {
- struct smb2_hdr hdr;
+ struct smb2_sync_hdr sync_hdr;
__le16 StructureSize; /* Must be 57 */
__u16 Reserved;
__le32 CtlCode;
@@ -921,7 +923,7 @@ struct smb2_close_req {
} __packed;
struct smb2_close_rsp {
- struct smb2_hdr hdr;
+ struct smb2_sync_hdr sync_hdr;
__le16 StructureSize; /* 60 */
__le16 Flags;
__le32 Reserved;
@@ -944,7 +946,7 @@ struct smb2_flush_req {
} __packed;
struct smb2_flush_rsp {
- struct smb2_hdr hdr;
+ struct smb2_sync_hdr sync_hdr;
__le16 StructureSize;
__le16 Reserved;
} __packed;
@@ -976,7 +978,7 @@ struct smb2_read_plain_req {
} __packed;
struct smb2_read_rsp {
- struct smb2_hdr hdr;
+ struct smb2_sync_hdr sync_hdr;
__le16 StructureSize; /* Must be 17 */
__u8 DataOffset;
__u8 Reserved;
@@ -1007,7 +1009,7 @@ struct smb2_write_req {
} __packed;
struct smb2_write_rsp {
- struct smb2_hdr hdr;
+ struct smb2_sync_hdr sync_hdr;
__le16 StructureSize; /* Must be 17 */
__u8 DataOffset;
__u8 Reserved;
@@ -1041,7 +1043,7 @@ struct smb2_lock_req {
} __packed;
struct smb2_lock_rsp {
- struct smb2_hdr hdr;
+ struct smb2_sync_hdr sync_hdr;
__le16 StructureSize; /* Must be 4 */
__le16 Reserved;
} __packed;
@@ -1053,7 +1055,7 @@ struct smb2_echo_req {
} __packed;
struct smb2_echo_rsp {
- struct smb2_hdr hdr;
+ struct smb2_sync_hdr sync_hdr;
__le16 StructureSize; /* Must be 4 */
__u16 Reserved;
} __packed;
@@ -1079,7 +1081,7 @@ struct smb2_query_directory_req {
} __packed;
struct smb2_query_directory_rsp {
- struct smb2_hdr hdr;
+ struct smb2_sync_hdr sync_hdr;
__le16 StructureSize; /* Must be 9 */
__le16 OutputBufferOffset;
__le32 OutputBufferLength;
@@ -1128,7 +1130,7 @@ struct smb2_query_info_req {
} __packed;
struct smb2_query_info_rsp {
- struct smb2_hdr hdr;
+ struct smb2_sync_hdr sync_hdr;
__le16 StructureSize; /* Must be 9 */
__le16 OutputBufferOffset;
__le32 OutputBufferLength;
@@ -1150,12 +1152,11 @@ struct smb2_set_info_req {
} __packed;
struct smb2_set_info_rsp {
- struct smb2_hdr hdr;
+ struct smb2_sync_hdr sync_hdr;
__le16 StructureSize; /* Must be 2 */
} __packed;
-/* oplock break without an rfc1002 header */
-struct smb2_oplock_break_req {
+struct smb2_oplock_break {
struct smb2_sync_hdr sync_hdr;
__le16 StructureSize; /* Must be 24 */
__u8 OplockLevel;
@@ -1165,21 +1166,10 @@ struct smb2_oplock_break_req {
__u64 VolatileFid;
} __packed;
-/* oplock break with an rfc1002 header */
-struct smb2_oplock_break_rsp {
- struct smb2_hdr hdr;
- __le16 StructureSize; /* Must be 24 */
- __u8 OplockLevel;
- __u8 Reserved;
- __le32 Reserved2;
- __u64 PersistentFid;
- __u64 VolatileFid;
-} __packed;
-
#define SMB2_NOTIFY_BREAK_LEASE_FLAG_ACK_REQUIRED cpu_to_le32(0x01)
struct smb2_lease_break {
- struct smb2_hdr hdr;
+ struct smb2_sync_hdr sync_hdr;
__le16 StructureSize; /* Must be 44 */
__le16 Reserved;
__le32 Flags;
diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h
index 8ba24a95db71..908555b1c6b5 100644
--- a/fs/cifs/smb2proto.h
+++ b/fs/cifs/smb2proto.h
@@ -36,8 +36,9 @@ struct smb_rqst;
extern int map_smb2_to_linux_error(char *buf, bool log_err);
extern int smb2_check_message(char *buf, unsigned int length,
struct TCP_Server_Info *server);
-extern unsigned int smb2_calc_size(void *buf);
-extern char *smb2_get_data_area_len(int *off, int *len, struct smb2_hdr *hdr);
+extern unsigned int smb2_calc_size(void *buf, struct TCP_Server_Info *server);
+extern char *smb2_get_data_area_len(int *off, int *len,
+ struct smb2_sync_hdr *shdr);
extern __le16 *cifs_convert_path_to_utf16(const char *from,
struct cifs_sb_info *cifs_sb);
@@ -65,6 +66,8 @@ extern struct cifs_ses *smb2_find_smb_ses(struct TCP_Server_Info *server,
extern int smb3_handle_read_data(struct TCP_Server_Info *server,
struct mid_q_entry *mid);
+extern int open_shroot(unsigned int xid, struct cifs_tcon *tcon,
+ struct cifs_fid *pfid);
extern void move_smb2_info_to_cifs(FILE_ALL_INFO *dst,
struct smb2_file_all_info *src);
extern int smb2_query_path_info(const unsigned int xid, struct cifs_tcon *tcon,
@@ -129,6 +132,8 @@ extern int SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon,
char **out_data, u32 *plen /* returned data len */);
extern int SMB2_close(const unsigned int xid, struct cifs_tcon *tcon,
u64 persistent_file_id, u64 volatile_file_id);
+extern int SMB2_close_flags(const unsigned int xid, struct cifs_tcon *tcon,
+ u64 persistent_fid, u64 volatile_fid, int flags);
extern int SMB2_flush(const unsigned int xid, struct cifs_tcon *tcon,
u64 persistent_file_id, u64 volatile_file_id);
extern int SMB2_query_eas(const unsigned int xid, struct cifs_tcon *tcon,
diff --git a/fs/cifs/smb2transport.c b/fs/cifs/smb2transport.c
index 8806f3f76c1d..2c671123a6bf 100644
--- a/fs/cifs/smb2transport.c
+++ b/fs/cifs/smb2transport.c
@@ -480,7 +480,7 @@ smb2_verify_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server)
unsigned int rc;
char server_response_sig[16];
struct smb2_sync_hdr *shdr =
- (struct smb2_sync_hdr *)rqst->rq_iov[1].iov_base;
+ (struct smb2_sync_hdr *)rqst->rq_iov[0].iov_base;
if ((shdr->Command == SMB2_NEGOTIATE) ||
(shdr->Command == SMB2_SESSION_SETUP) ||
@@ -605,14 +605,12 @@ smb2_check_receive(struct mid_q_entry *mid, struct TCP_Server_Info *server,
bool log_error)
{
unsigned int len = mid->resp_buf_size;
- struct kvec iov[2];
+ struct kvec iov[1];
struct smb_rqst rqst = { .rq_iov = iov,
- .rq_nvec = 2 };
+ .rq_nvec = 1 };
iov[0].iov_base = (char *)mid->resp_buf;
- iov[0].iov_len = 4;
- iov[1].iov_base = (char *)mid->resp_buf + 4;
- iov[1].iov_len = len;
+ iov[0].iov_len = len;
dump_smb(mid->resp_buf, min_t(u32, 80, len));
/* convert the length into a more usable form */
diff --git a/fs/cifs/trace.c b/fs/cifs/trace.c
new file mode 100644
index 000000000000..bd4a546feec1
--- /dev/null
+++ b/fs/cifs/trace.c
@@ -0,0 +1,18 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2018, Microsoft Corporation.
+ *
+ * Author(s): Steve French <stfrench@microsoft.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
+ * the GNU General Public License for more details.
+ */
+#define CREATE_TRACE_POINTS
+#include "trace.h"
diff --git a/fs/cifs/trace.h b/fs/cifs/trace.h
new file mode 100644
index 000000000000..61e74d455d90
--- /dev/null
+++ b/fs/cifs/trace.h
@@ -0,0 +1,429 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2018, Microsoft Corporation.
+ *
+ * Author(s): Steve French <stfrench@microsoft.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
+ * the GNU General Public License for more details.
+ */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM cifs
+
+#if !defined(_CIFS_TRACE_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _CIFS_TRACE_H
+
+#include <linux/tracepoint.h>
+
+/* For logging errors in read or write */
+DECLARE_EVENT_CLASS(smb3_rw_err_class,
+ TP_PROTO(unsigned int xid,
+ __u64 fid,
+ __u32 tid,
+ __u64 sesid,
+ __u64 offset,
+ __u32 len,
+ int rc),
+ TP_ARGS(xid, fid, tid, sesid, offset, len, rc),
+ TP_STRUCT__entry(
+ __field(unsigned int, xid)
+ __field(__u64, fid)
+ __field(__u32, tid)
+ __field(__u64, sesid)
+ __field(__u64, offset)
+ __field(__u32, len)
+ __field(int, rc)
+ ),
+ TP_fast_assign(
+ __entry->xid = xid;
+ __entry->fid = fid;
+ __entry->tid = tid;
+ __entry->sesid = sesid;
+ __entry->offset = offset;
+ __entry->len = len;
+ __entry->rc = rc;
+ ),
+ TP_printk("\txid=%u sid=0x%llx tid=0x%x fid=0x%llx offset=0x%llx len=0x%x rc=%d",
+ __entry->xid, __entry->sesid, __entry->tid, __entry->fid,
+ __entry->offset, __entry->len, __entry->rc)
+)
+
+#define DEFINE_SMB3_RW_ERR_EVENT(name) \
+DEFINE_EVENT(smb3_rw_err_class, smb3_##name, \
+ TP_PROTO(unsigned int xid, \
+ __u64 fid, \
+ __u32 tid, \
+ __u64 sesid, \
+ __u64 offset, \
+ __u32 len, \
+ int rc), \
+ TP_ARGS(xid, fid, tid, sesid, offset, len, rc))
+
+DEFINE_SMB3_RW_ERR_EVENT(write_err);
+DEFINE_SMB3_RW_ERR_EVENT(read_err);
+
+
+/* For logging successful read or write */
+DECLARE_EVENT_CLASS(smb3_rw_done_class,
+ TP_PROTO(unsigned int xid,
+ __u64 fid,
+ __u32 tid,
+ __u64 sesid,
+ __u64 offset,
+ __u32 len),
+ TP_ARGS(xid, fid, tid, sesid, offset, len),
+ TP_STRUCT__entry(
+ __field(unsigned int, xid)
+ __field(__u64, fid)
+ __field(__u32, tid)
+ __field(__u64, sesid)
+ __field(__u64, offset)
+ __field(__u32, len)
+ ),
+ TP_fast_assign(
+ __entry->xid = xid;
+ __entry->fid = fid;
+ __entry->tid = tid;
+ __entry->sesid = sesid;
+ __entry->offset = offset;
+ __entry->len = len;
+ ),
+ TP_printk("xid=%u sid=0x%llx tid=0x%x fid=0x%llx offset=0x%llx len=0x%x",
+ __entry->xid, __entry->sesid, __entry->tid, __entry->fid,
+ __entry->offset, __entry->len)
+)
+
+#define DEFINE_SMB3_RW_DONE_EVENT(name) \
+DEFINE_EVENT(smb3_rw_done_class, smb3_##name, \
+ TP_PROTO(unsigned int xid, \
+ __u64 fid, \
+ __u32 tid, \
+ __u64 sesid, \
+ __u64 offset, \
+ __u32 len), \
+ TP_ARGS(xid, fid, tid, sesid, offset, len))
+
+DEFINE_SMB3_RW_DONE_EVENT(write_done);
+DEFINE_SMB3_RW_DONE_EVENT(read_done);
+
+/*
+ * For handle based calls other than read and write, and get/set info
+ */
+DECLARE_EVENT_CLASS(smb3_fd_err_class,
+ TP_PROTO(unsigned int xid,
+ __u64 fid,
+ __u32 tid,
+ __u64 sesid,
+ int rc),
+ TP_ARGS(xid, fid, tid, sesid, rc),
+ TP_STRUCT__entry(
+ __field(unsigned int, xid)
+ __field(__u64, fid)
+ __field(__u32, tid)
+ __field(__u64, sesid)
+ __field(int, rc)
+ ),
+ TP_fast_assign(
+ __entry->xid = xid;
+ __entry->fid = fid;
+ __entry->tid = tid;
+ __entry->sesid = sesid;
+ __entry->rc = rc;
+ ),
+ TP_printk("\txid=%u sid=0x%llx tid=0x%x fid=0x%llx rc=%d",
+ __entry->xid, __entry->sesid, __entry->tid, __entry->fid,
+ __entry->rc)
+)
+
+#define DEFINE_SMB3_FD_ERR_EVENT(name) \
+DEFINE_EVENT(smb3_fd_err_class, smb3_##name, \
+ TP_PROTO(unsigned int xid, \
+ __u64 fid, \
+ __u32 tid, \
+ __u64 sesid, \
+ int rc), \
+ TP_ARGS(xid, fid, tid, sesid, rc))
+
+DEFINE_SMB3_FD_ERR_EVENT(flush_err);
+DEFINE_SMB3_FD_ERR_EVENT(lock_err);
+DEFINE_SMB3_FD_ERR_EVENT(close_err);
+
+/*
+ * For handle based query/set info calls
+ */
+DECLARE_EVENT_CLASS(smb3_inf_err_class,
+ TP_PROTO(unsigned int xid,
+ __u64 fid,
+ __u32 tid,
+ __u64 sesid,
+ __u8 infclass,
+ __u32 type,
+ int rc),
+ TP_ARGS(xid, fid, tid, sesid, infclass, type, rc),
+ TP_STRUCT__entry(
+ __field(unsigned int, xid)
+ __field(__u64, fid)
+ __field(__u32, tid)
+ __field(__u64, sesid)
+ __field(__u8, infclass)
+ __field(__u32, type)
+ __field(int, rc)
+ ),
+ TP_fast_assign(
+ __entry->xid = xid;
+ __entry->fid = fid;
+ __entry->tid = tid;
+ __entry->sesid = sesid;
+ __entry->infclass = infclass;
+ __entry->type = type;
+ __entry->rc = rc;
+ ),
+ TP_printk("xid=%u sid=0x%llx tid=0x%x fid=0x%llx class=%u type=0x%x rc=%d",
+ __entry->xid, __entry->sesid, __entry->tid, __entry->fid,
+ __entry->infclass, __entry->type, __entry->rc)
+)
+
+#define DEFINE_SMB3_INF_ERR_EVENT(name) \
+DEFINE_EVENT(smb3_inf_err_class, smb3_##name, \
+ TP_PROTO(unsigned int xid, \
+ __u64 fid, \
+ __u32 tid, \
+ __u64 sesid, \
+ __u8 infclass, \
+ __u32 type, \
+ int rc), \
+ TP_ARGS(xid, fid, tid, sesid, infclass, type, rc))
+
+DEFINE_SMB3_INF_ERR_EVENT(query_info_err);
+DEFINE_SMB3_INF_ERR_EVENT(set_info_err);
+DEFINE_SMB3_INF_ERR_EVENT(fsctl_err);
+
+/*
+ * For logging SMB3 Status code and Command for responses which return errors
+ */
+DECLARE_EVENT_CLASS(smb3_cmd_err_class,
+ TP_PROTO(__u32 tid,
+ __u64 sesid,
+ __u16 cmd,
+ __u64 mid,
+ __u32 status,
+ int rc),
+ TP_ARGS(tid, sesid, cmd, mid, status, rc),
+ TP_STRUCT__entry(
+ __field(__u32, tid)
+ __field(__u64, sesid)
+ __field(__u16, cmd)
+ __field(__u64, mid)
+ __field(__u32, status)
+ __field(int, rc)
+ ),
+ TP_fast_assign(
+ __entry->tid = tid;
+ __entry->sesid = sesid;
+ __entry->cmd = cmd;
+ __entry->mid = mid;
+ __entry->status = status;
+ __entry->rc = rc;
+ ),
+ TP_printk("\tsid=0x%llx tid=0x%x cmd=%u mid=%llu status=0x%x rc=%d",
+ __entry->sesid, __entry->tid, __entry->cmd, __entry->mid,
+ __entry->status, __entry->rc)
+)
+
+#define DEFINE_SMB3_CMD_ERR_EVENT(name) \
+DEFINE_EVENT(smb3_cmd_err_class, smb3_##name, \
+ TP_PROTO(__u32 tid, \
+ __u64 sesid, \
+ __u16 cmd, \
+ __u64 mid, \
+ __u32 status, \
+ int rc), \
+ TP_ARGS(tid, sesid, cmd, mid, status, rc))
+
+DEFINE_SMB3_CMD_ERR_EVENT(cmd_err);
+
+DECLARE_EVENT_CLASS(smb3_cmd_done_class,
+ TP_PROTO(__u32 tid,
+ __u64 sesid,
+ __u16 cmd,
+ __u64 mid),
+ TP_ARGS(tid, sesid, cmd, mid),
+ TP_STRUCT__entry(
+ __field(__u32, tid)
+ __field(__u64, sesid)
+ __field(__u16, cmd)
+ __field(__u64, mid)
+ ),
+ TP_fast_assign(
+ __entry->tid = tid;
+ __entry->sesid = sesid;
+ __entry->cmd = cmd;
+ __entry->mid = mid;
+ ),
+ TP_printk("\tsid=0x%llx tid=0x%x cmd=%u mid=%llu",
+ __entry->sesid, __entry->tid,
+ __entry->cmd, __entry->mid)
+)
+
+#define DEFINE_SMB3_CMD_DONE_EVENT(name) \
+DEFINE_EVENT(smb3_cmd_done_class, smb3_##name, \
+ TP_PROTO(__u32 tid, \
+ __u64 sesid, \
+ __u16 cmd, \
+ __u64 mid), \
+ TP_ARGS(tid, sesid, cmd, mid))
+
+DEFINE_SMB3_CMD_DONE_EVENT(cmd_done);
+
+DECLARE_EVENT_CLASS(smb3_exit_err_class,
+ TP_PROTO(unsigned int xid,
+ const char *func_name,
+ int rc),
+ TP_ARGS(xid, func_name, rc),
+ TP_STRUCT__entry(
+ __field(unsigned int, xid)
+ __field(const char *, func_name)
+ __field(int, rc)
+ ),
+ TP_fast_assign(
+ __entry->xid = xid;
+ __entry->func_name = func_name;
+ __entry->rc = rc;
+ ),
+ TP_printk("\t%s: xid=%u rc=%d",
+ __entry->func_name, __entry->xid, __entry->rc)
+)
+
+#define DEFINE_SMB3_EXIT_ERR_EVENT(name) \
+DEFINE_EVENT(smb3_exit_err_class, smb3_##name, \
+ TP_PROTO(unsigned int xid, \
+ const char *func_name, \
+ int rc), \
+ TP_ARGS(xid, func_name, rc))
+
+DEFINE_SMB3_EXIT_ERR_EVENT(exit_err);
+
+DECLARE_EVENT_CLASS(smb3_enter_exit_class,
+ TP_PROTO(unsigned int xid,
+ const char *func_name),
+ TP_ARGS(xid, func_name),
+ TP_STRUCT__entry(
+ __field(unsigned int, xid)
+ __field(const char *, func_name)
+ ),
+ TP_fast_assign(
+ __entry->xid = xid;
+ __entry->func_name = func_name;
+ ),
+ TP_printk("\t%s: xid=%u",
+ __entry->func_name, __entry->xid)
+)
+
+#define DEFINE_SMB3_ENTER_EXIT_EVENT(name) \
+DEFINE_EVENT(smb3_enter_exit_class, smb3_##name, \
+ TP_PROTO(unsigned int xid, \
+ const char *func_name), \
+ TP_ARGS(xid, func_name))
+
+DEFINE_SMB3_ENTER_EXIT_EVENT(enter);
+DEFINE_SMB3_ENTER_EXIT_EVENT(exit_done);
+
+/*
+ * For smb2/smb3 open call
+ */
+DECLARE_EVENT_CLASS(smb3_open_err_class,
+ TP_PROTO(unsigned int xid,
+ __u32 tid,
+ __u64 sesid,
+ int create_options,
+ int desired_access,
+ int rc),
+ TP_ARGS(xid, tid, sesid, create_options, desired_access, rc),
+ TP_STRUCT__entry(
+ __field(unsigned int, xid)
+ __field(__u32, tid)
+ __field(__u64, sesid)
+ __field(int, create_options)
+ __field(int, desired_access)
+ __field(int, rc)
+ ),
+ TP_fast_assign(
+ __entry->xid = xid;
+ __entry->tid = tid;
+ __entry->sesid = sesid;
+ __entry->create_options = create_options;
+ __entry->desired_access = desired_access;
+ __entry->rc = rc;
+ ),
+ TP_printk("xid=%u sid=0x%llx tid=0x%x cr_opts=0x%x des_access=0x%x rc=%d",
+ __entry->xid, __entry->sesid, __entry->tid,
+ __entry->create_options, __entry->desired_access, __entry->rc)
+)
+
+#define DEFINE_SMB3_OPEN_ERR_EVENT(name) \
+DEFINE_EVENT(smb3_open_err_class, smb3_##name, \
+ TP_PROTO(unsigned int xid, \
+ __u32 tid, \
+ __u64 sesid, \
+ int create_options, \
+ int desired_access, \
+ int rc), \
+ TP_ARGS(xid, tid, sesid, create_options, desired_access, rc))
+
+DEFINE_SMB3_OPEN_ERR_EVENT(open_err);
+
+
+DECLARE_EVENT_CLASS(smb3_open_done_class,
+ TP_PROTO(unsigned int xid,
+ __u64 fid,
+ __u32 tid,
+ __u64 sesid,
+ int create_options,
+ int desired_access),
+ TP_ARGS(xid, fid, tid, sesid, create_options, desired_access),
+ TP_STRUCT__entry(
+ __field(unsigned int, xid)
+ __field(__u64, fid)
+ __field(__u32, tid)
+ __field(__u64, sesid)
+ __field(int, create_options)
+ __field(int, desired_access)
+ ),
+ TP_fast_assign(
+ __entry->xid = xid;
+ __entry->fid = fid;
+ __entry->tid = tid;
+ __entry->sesid = sesid;
+ __entry->create_options = create_options;
+ __entry->desired_access = desired_access;
+ ),
+ TP_printk("xid=%u sid=0x%llx tid=0x%x fid=0x%llx cr_opts=0x%x des_access=0x%x",
+ __entry->xid, __entry->sesid, __entry->tid, __entry->fid,
+ __entry->create_options, __entry->desired_access)
+)
+
+#define DEFINE_SMB3_OPEN_DONE_EVENT(name) \
+DEFINE_EVENT(smb3_open_done_class, smb3_##name, \
+ TP_PROTO(unsigned int xid, \
+ __u64 fid, \
+ __u32 tid, \
+ __u64 sesid, \
+ int create_options, \
+ int desired_access), \
+ TP_ARGS(xid, fid, tid, sesid, create_options, desired_access))
+
+DEFINE_SMB3_OPEN_DONE_EVENT(open_done);
+
+#endif /* _CIFS_TRACE_H */
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE trace
+#include <trace/define_trace.h>
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index 927226a2122f..e7254e386b79 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -800,8 +800,8 @@ cifs_send_recv(const unsigned int xid, struct cifs_ses *ses,
#ifdef CONFIG_CIFS_SMB311
if ((ses->status == CifsNew) || (optype & CIFS_NEG_OP)) {
struct kvec iov = {
- .iov_base = buf + 4,
- .iov_len = get_rfc1002_length(buf)
+ .iov_base = buf,
+ .iov_len = midQ->resp_buf_size
};
smb311_update_preauth_hash(ses, &iov, 1);
}
diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c
index 017b0ab19bc4..c4fb9ad7c808 100644
--- a/fs/cramfs/inode.c
+++ b/fs/cramfs/inode.c
@@ -492,7 +492,7 @@ static void cramfs_kill_sb(struct super_block *sb)
{
struct cramfs_sb_info *sbi = CRAMFS_SB(sb);
- if (IS_ENABLED(CCONFIG_CRAMFS_MTD) && sb->s_mtd) {
+ if (IS_ENABLED(CONFIG_CRAMFS_MTD) && sb->s_mtd) {
if (sbi && sbi->mtd_point_size)
mtd_unpoint(sb->s_mtd, 0, sbi->mtd_point_size);
kill_mtd_super(sb);
@@ -808,10 +808,7 @@ static struct dentry *cramfs_lookup(struct inode *dir, struct dentry *dentry, un
}
out:
mutex_unlock(&read_mutex);
- if (IS_ERR(inode))
- return ERR_CAST(inode);
- d_add(dentry, inode);
- return NULL;
+ return d_splice_alias(inode, dentry);
}
static int cramfs_readpage(struct file *file, struct page *page)
diff --git a/fs/dax.c b/fs/dax.c
index aaec72ded1b6..aa86d9f971a4 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -677,7 +677,7 @@ static void dax_mapping_entry_mkclean(struct address_space *mapping,
* downgrading page table protection not changing it to point
* to a new page.
*
- * See Documentation/vm/mmu_notifier.txt
+ * See Documentation/vm/mmu_notifier.rst
*/
if (pmdp) {
#ifdef CONFIG_FS_DAX_PMD
diff --git a/fs/dcache.c b/fs/dcache.c
index 86d2de63461e..0e8e5de3c48a 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -580,6 +580,7 @@ static void __dentry_kill(struct dentry *dentry)
spin_unlock(&dentry->d_lock);
if (likely(can_free))
dentry_free(dentry);
+ cond_resched();
}
static struct dentry *__lock_parent(struct dentry *dentry)
@@ -827,30 +828,24 @@ static inline bool fast_dput(struct dentry *dentry)
*/
void dput(struct dentry *dentry)
{
- if (unlikely(!dentry))
- return;
+ while (dentry) {
+ might_sleep();
-repeat:
- might_sleep();
+ rcu_read_lock();
+ if (likely(fast_dput(dentry))) {
+ rcu_read_unlock();
+ return;
+ }
- rcu_read_lock();
- if (likely(fast_dput(dentry))) {
+ /* Slow case: now with the dentry lock held */
rcu_read_unlock();
- return;
- }
-
- /* Slow case: now with the dentry lock held */
- rcu_read_unlock();
- if (likely(retain_dentry(dentry))) {
- spin_unlock(&dentry->d_lock);
- return;
- }
+ if (likely(retain_dentry(dentry))) {
+ spin_unlock(&dentry->d_lock);
+ return;
+ }
- dentry = dentry_kill(dentry);
- if (dentry) {
- cond_resched();
- goto repeat;
+ dentry = dentry_kill(dentry);
}
}
EXPORT_SYMBOL(dput);
@@ -907,6 +902,35 @@ repeat:
}
EXPORT_SYMBOL(dget_parent);
+static struct dentry * __d_find_any_alias(struct inode *inode)
+{
+ struct dentry *alias;
+
+ if (hlist_empty(&inode->i_dentry))
+ return NULL;
+ alias = hlist_entry(inode->i_dentry.first, struct dentry, d_u.d_alias);
+ __dget(alias);
+ return alias;
+}
+
+/**
+ * d_find_any_alias - find any alias for a given inode
+ * @inode: inode to find an alias for
+ *
+ * If any aliases exist for the given inode, take and return a
+ * reference for one of them. If no aliases exist, return %NULL.
+ */
+struct dentry *d_find_any_alias(struct inode *inode)
+{
+ struct dentry *de;
+
+ spin_lock(&inode->i_lock);
+ de = __d_find_any_alias(inode);
+ spin_unlock(&inode->i_lock);
+ return de;
+}
+EXPORT_SYMBOL(d_find_any_alias);
+
/**
* d_find_alias - grab a hashed alias of inode
* @inode: inode in question
@@ -923,34 +947,19 @@ EXPORT_SYMBOL(dget_parent);
*/
static struct dentry *__d_find_alias(struct inode *inode)
{
- struct dentry *alias, *discon_alias;
+ struct dentry *alias;
+
+ if (S_ISDIR(inode->i_mode))
+ return __d_find_any_alias(inode);
-again:
- discon_alias = NULL;
hlist_for_each_entry(alias, &inode->i_dentry, d_u.d_alias) {
spin_lock(&alias->d_lock);
- if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) {
- if (IS_ROOT(alias) &&
- (alias->d_flags & DCACHE_DISCONNECTED)) {
- discon_alias = alias;
- } else {
- __dget_dlock(alias);
- spin_unlock(&alias->d_lock);
- return alias;
- }
- }
- spin_unlock(&alias->d_lock);
- }
- if (discon_alias) {
- alias = discon_alias;
- spin_lock(&alias->d_lock);
- if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) {
+ if (!d_unhashed(alias)) {
__dget_dlock(alias);
spin_unlock(&alias->d_lock);
return alias;
}
spin_unlock(&alias->d_lock);
- goto again;
}
return NULL;
}
@@ -1052,8 +1061,6 @@ static void shrink_dentry_list(struct list_head *list)
while (!list_empty(list)) {
struct dentry *dentry, *parent;
- cond_resched();
-
dentry = list_entry(list->prev, struct dentry, d_lru);
spin_lock(&dentry->d_lock);
rcu_read_lock();
@@ -1230,13 +1237,11 @@ enum d_walk_ret {
* @parent: start of walk
* @data: data passed to @enter() and @finish()
* @enter: callback when first entering the dentry
- * @finish: callback when successfully finished the walk
*
- * The @enter() and @finish() callbacks are called with d_lock held.
+ * The @enter() callbacks are called with d_lock held.
*/
static void d_walk(struct dentry *parent, void *data,
- enum d_walk_ret (*enter)(void *, struct dentry *),
- void (*finish)(void *))
+ enum d_walk_ret (*enter)(void *, struct dentry *))
{
struct dentry *this_parent;
struct list_head *next;
@@ -1325,8 +1330,6 @@ ascend:
if (need_seqretry(&rename_lock, seq))
goto rename_retry;
rcu_read_unlock();
- if (finish)
- finish(data);
out_unlock:
spin_unlock(&this_parent->d_lock);
@@ -1375,7 +1378,7 @@ int path_has_submounts(const struct path *parent)
struct check_mount data = { .mnt = parent->mnt, .mounted = 0 };
read_seqlock_excl(&mount_lock);
- d_walk(parent->dentry, &data, path_check_mount, NULL);
+ d_walk(parent->dentry, &data, path_check_mount);
read_sequnlock_excl(&mount_lock);
return data.mounted;
@@ -1483,11 +1486,16 @@ void shrink_dcache_parent(struct dentry *parent)
data.start = parent;
data.found = 0;
- d_walk(parent, &data, select_collect, NULL);
+ d_walk(parent, &data, select_collect);
+
+ if (!list_empty(&data.dispose)) {
+ shrink_dentry_list(&data.dispose);
+ continue;
+ }
+
+ cond_resched();
if (!data.found)
break;
-
- shrink_dentry_list(&data.dispose);
}
}
EXPORT_SYMBOL(shrink_dcache_parent);
@@ -1518,7 +1526,7 @@ static enum d_walk_ret umount_check(void *_data, struct dentry *dentry)
static void do_one_tree(struct dentry *dentry)
{
shrink_dcache_parent(dentry);
- d_walk(dentry, dentry, umount_check, NULL);
+ d_walk(dentry, dentry, umount_check);
d_drop(dentry);
dput(dentry);
}
@@ -1542,78 +1550,48 @@ void shrink_dcache_for_umount(struct super_block *sb)
}
}
-struct detach_data {
- struct select_data select;
- struct dentry *mountpoint;
-};
-static enum d_walk_ret detach_and_collect(void *_data, struct dentry *dentry)
+static enum d_walk_ret find_submount(void *_data, struct dentry *dentry)
{
- struct detach_data *data = _data;
-
+ struct dentry **victim = _data;
if (d_mountpoint(dentry)) {
__dget_dlock(dentry);
- data->mountpoint = dentry;
+ *victim = dentry;
return D_WALK_QUIT;
}
-
- return select_collect(&data->select, dentry);
-}
-
-static void check_and_drop(void *_data)
-{
- struct detach_data *data = _data;
-
- if (!data->mountpoint && list_empty(&data->select.dispose))
- __d_drop(data->select.start);
+ return D_WALK_CONTINUE;
}
/**
* d_invalidate - detach submounts, prune dcache, and drop
* @dentry: dentry to invalidate (aka detach, prune and drop)
- *
- * no dcache lock.
- *
- * The final d_drop is done as an atomic operation relative to
- * rename_lock ensuring there are no races with d_set_mounted. This
- * ensures there are no unhashed dentries on the path to a mountpoint.
*/
void d_invalidate(struct dentry *dentry)
{
- /*
- * If it's already been dropped, return OK.
- */
+ bool had_submounts = false;
spin_lock(&dentry->d_lock);
if (d_unhashed(dentry)) {
spin_unlock(&dentry->d_lock);
return;
}
+ __d_drop(dentry);
spin_unlock(&dentry->d_lock);
/* Negative dentries can be dropped without further checks */
- if (!dentry->d_inode) {
- d_drop(dentry);
+ if (!dentry->d_inode)
return;
- }
+ shrink_dcache_parent(dentry);
for (;;) {
- struct detach_data data;
-
- data.mountpoint = NULL;
- INIT_LIST_HEAD(&data.select.dispose);
- data.select.start = dentry;
- data.select.found = 0;
-
- d_walk(dentry, &data, detach_and_collect, check_and_drop);
-
- if (!list_empty(&data.select.dispose))
- shrink_dentry_list(&data.select.dispose);
- else if (!data.mountpoint)
+ struct dentry *victim = NULL;
+ d_walk(dentry, &victim, find_submount);
+ if (!victim) {
+ if (had_submounts)
+ shrink_dcache_parent(dentry);
return;
-
- if (data.mountpoint) {
- detach_mounts(data.mountpoint);
- dput(data.mountpoint);
}
+ had_submounts = true;
+ detach_mounts(victim);
+ dput(victim);
}
}
EXPORT_SYMBOL(d_invalidate);
@@ -1899,6 +1877,28 @@ void d_instantiate(struct dentry *entry, struct inode * inode)
}
EXPORT_SYMBOL(d_instantiate);
+/*
+ * This should be equivalent to d_instantiate() + unlock_new_inode(),
+ * with lockdep-related part of unlock_new_inode() done before
+ * anything else. Use that instead of open-coding d_instantiate()/
+ * unlock_new_inode() combinations.
+ */
+void d_instantiate_new(struct dentry *entry, struct inode *inode)
+{
+ BUG_ON(!hlist_unhashed(&entry->d_u.d_alias));
+ BUG_ON(!inode);
+ lockdep_annotate_inode_mutex_key(inode);
+ security_d_instantiate(entry, inode);
+ spin_lock(&inode->i_lock);
+ __d_instantiate(entry, inode);
+ WARN_ON(!(inode->i_state & I_NEW));
+ inode->i_state &= ~I_NEW;
+ smp_mb();
+ wake_up_bit(&inode->i_state, __I_NEW);
+ spin_unlock(&inode->i_lock);
+}
+EXPORT_SYMBOL(d_instantiate_new);
+
/**
* d_instantiate_no_diralias - instantiate a non-aliased dentry
* @entry: dentry to complete
@@ -1941,35 +1941,6 @@ struct dentry *d_make_root(struct inode *root_inode)
}
EXPORT_SYMBOL(d_make_root);
-static struct dentry * __d_find_any_alias(struct inode *inode)
-{
- struct dentry *alias;
-
- if (hlist_empty(&inode->i_dentry))
- return NULL;
- alias = hlist_entry(inode->i_dentry.first, struct dentry, d_u.d_alias);
- __dget(alias);
- return alias;
-}
-
-/**
- * d_find_any_alias - find any alias for a given inode
- * @inode: inode to find an alias for
- *
- * If any aliases exist for the given inode, take and return a
- * reference for one of them. If no aliases exist, return %NULL.
- */
-struct dentry *d_find_any_alias(struct inode *inode)
-{
- struct dentry *de;
-
- spin_lock(&inode->i_lock);
- de = __d_find_any_alias(inode);
- spin_unlock(&inode->i_lock);
- return de;
-}
-EXPORT_SYMBOL(d_find_any_alias);
-
static struct dentry *__d_instantiate_anon(struct dentry *dentry,
struct inode *inode,
bool disconnected)
@@ -3112,7 +3083,7 @@ static enum d_walk_ret d_genocide_kill(void *data, struct dentry *dentry)
void d_genocide(struct dentry *parent)
{
- d_walk(parent, parent, d_genocide_kill, NULL);
+ d_walk(parent, parent, d_genocide_kill);
}
EXPORT_SYMBOL(d_genocide);
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 874607bb6e02..093fb54cd316 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -432,8 +432,8 @@ dio_bio_alloc(struct dio *dio, struct dio_submit *sdio,
struct bio *bio;
/*
- * bio_alloc() is guaranteed to return a bio when called with
- * __GFP_RECLAIM and we request a valid number of vectors.
+ * bio_alloc() is guaranteed to return a bio when allowed to sleep and
+ * we request a valid number of vectors.
*/
bio = bio_alloc(GFP_KERNEL, nr_vecs);
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 5243989a60cc..a5e4a221435c 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -1037,6 +1037,7 @@ static void sctp_connect_to_sock(struct connection *con)
int result;
int addr_len;
struct socket *sock;
+ struct timeval tv = { .tv_sec = 5, .tv_usec = 0 };
if (con->nodeid == 0) {
log_print("attempt to connect sock 0 foiled");
@@ -1080,11 +1081,22 @@ static void sctp_connect_to_sock(struct connection *con)
log_print("connecting to %d", con->nodeid);
/* Turn off Nagle's algorithm */
- kernel_setsockopt(sock, SOL_TCP, TCP_NODELAY, (char *)&one,
+ kernel_setsockopt(sock, SOL_SCTP, SCTP_NODELAY, (char *)&one,
sizeof(one));
+ /*
+ * Make sock->ops->connect() function return in specified time,
+ * since O_NONBLOCK argument in connect() function does not work here,
+ * then, we should restore the default value of this attribute.
+ */
+ kernel_setsockopt(sock, SOL_SOCKET, SO_SNDTIMEO, (char *)&tv,
+ sizeof(tv));
result = sock->ops->connect(sock, (struct sockaddr *)&daddr, addr_len,
- O_NONBLOCK);
+ 0);
+ memset(&tv, 0, sizeof(tv));
+ kernel_setsockopt(sock, SOL_SOCKET, SO_SNDTIMEO, (char *)&tv,
+ sizeof(tv));
+
if (result == -EINPROGRESS)
result = 0;
if (result == 0)
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 97d17eaeba07..49121e5a8de2 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -283,8 +283,7 @@ ecryptfs_create(struct inode *directory_inode, struct dentry *ecryptfs_dentry,
iget_failed(ecryptfs_inode);
goto out;
}
- unlock_new_inode(ecryptfs_inode);
- d_instantiate(ecryptfs_dentry, ecryptfs_inode);
+ d_instantiate_new(ecryptfs_dentry, ecryptfs_inode);
out:
return rc;
}
diff --git a/fs/eventfd.c b/fs/eventfd.c
index 08d3bd602f73..61c9514da5e9 100644
--- a/fs/eventfd.c
+++ b/fs/eventfd.c
@@ -101,14 +101,20 @@ static int eventfd_release(struct inode *inode, struct file *file)
return 0;
}
-static __poll_t eventfd_poll(struct file *file, poll_table *wait)
+static struct wait_queue_head *
+eventfd_get_poll_head(struct file *file, __poll_t events)
+{
+ struct eventfd_ctx *ctx = file->private_data;
+
+ return &ctx->wqh;
+}
+
+static __poll_t eventfd_poll_mask(struct file *file, __poll_t eventmask)
{
struct eventfd_ctx *ctx = file->private_data;
__poll_t events = 0;
u64 count;
- poll_wait(file, &ctx->wqh, wait);
-
/*
* All writes to ctx->count occur within ctx->wqh.lock. This read
* can be done outside ctx->wqh.lock because we know that poll_wait
@@ -305,7 +311,8 @@ static const struct file_operations eventfd_fops = {
.show_fdinfo = eventfd_show_fdinfo,
#endif
.release = eventfd_release,
- .poll = eventfd_poll,
+ .get_poll_head = eventfd_get_poll_head,
+ .poll_mask = eventfd_poll_mask,
.read = eventfd_read,
.write = eventfd_write,
.llseek = noop_llseek,
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 602ca4285b2e..67db22fe99c5 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -884,8 +884,7 @@ static __poll_t ep_item_poll(const struct epitem *epi, poll_table *pt,
pt->_key = epi->event.events;
if (!is_file_epoll(epi->ffd.file))
- return epi->ffd.file->f_op->poll(epi->ffd.file, pt) &
- epi->event.events;
+ return vfs_poll(epi->ffd.file, pt) & epi->event.events;
ep = epi->ffd.file->private_data;
poll_wait(epi->ffd.file, &ep->poll_wait, pt);
@@ -2025,7 +2024,7 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
/* The target file descriptor must support poll */
error = -EPERM;
- if (!tf.file->f_op->poll)
+ if (!file_can_poll(tf.file))
goto error_tgt_fput;
/* Check if EPOLLWAKEUP is allowed */
diff --git a/fs/exofs/ore.c b/fs/exofs/ore.c
index 3c6a9c156b7a..ddbf87246898 100644
--- a/fs/exofs/ore.c
+++ b/fs/exofs/ore.c
@@ -790,7 +790,7 @@ int ore_create(struct ore_io_state *ios)
for (i = 0; i < ios->oc->numdevs; i++) {
struct osd_request *or;
- or = osd_start_request(_ios_od(ios, i), GFP_KERNEL);
+ or = osd_start_request(_ios_od(ios, i));
if (unlikely(!or)) {
ORE_ERR("%s: osd_start_request failed\n", __func__);
ret = -ENOMEM;
@@ -815,7 +815,7 @@ int ore_remove(struct ore_io_state *ios)
for (i = 0; i < ios->oc->numdevs; i++) {
struct osd_request *or;
- or = osd_start_request(_ios_od(ios, i), GFP_KERNEL);
+ or = osd_start_request(_ios_od(ios, i));
if (unlikely(!or)) {
ORE_ERR("%s: osd_start_request failed\n", __func__);
ret = -ENOMEM;
@@ -847,7 +847,7 @@ static int _write_mirror(struct ore_io_state *ios, int cur_comp)
struct ore_per_dev_state *per_dev = &ios->per_dev[cur_comp];
struct osd_request *or;
- or = osd_start_request(_ios_od(ios, dev), GFP_KERNEL);
+ or = osd_start_request(_ios_od(ios, dev));
if (unlikely(!or)) {
ORE_ERR("%s: osd_start_request failed\n", __func__);
ret = -ENOMEM;
@@ -966,7 +966,7 @@ int _ore_read_mirror(struct ore_io_state *ios, unsigned cur_comp)
return 0; /* Just an empty slot */
first_dev = per_dev->dev + first_dev % ios->layout->mirrors_p1;
- or = osd_start_request(_ios_od(ios, first_dev), GFP_KERNEL);
+ or = osd_start_request(_ios_od(ios, first_dev));
if (unlikely(!or)) {
ORE_ERR("%s: osd_start_request failed\n", __func__);
return -ENOMEM;
@@ -1060,7 +1060,7 @@ static int _truncate_mirrors(struct ore_io_state *ios, unsigned cur_comp,
struct ore_per_dev_state *per_dev = &ios->per_dev[cur_comp];
struct osd_request *or;
- or = osd_start_request(_ios_od(ios, cur_comp), GFP_KERNEL);
+ or = osd_start_request(_ios_od(ios, cur_comp));
if (unlikely(!or)) {
ORE_ERR("%s: osd_start_request failed\n", __func__);
return -ENOMEM;
diff --git a/fs/exofs/super.c b/fs/exofs/super.c
index 179cd5c2f52a..719a3152da80 100644
--- a/fs/exofs/super.c
+++ b/fs/exofs/super.c
@@ -229,7 +229,7 @@ void exofs_make_credential(u8 cred_a[OSD_CAP_LEN], const struct osd_obj_id *obj)
static int exofs_read_kern(struct osd_dev *od, u8 *cred, struct osd_obj_id *obj,
u64 offset, void *p, unsigned length)
{
- struct osd_request *or = osd_start_request(od, GFP_KERNEL);
+ struct osd_request *or = osd_start_request(od);
/* struct osd_sense_info osi = {.key = 0};*/
int ret;
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 1e01fabef130..71635909df3b 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -1264,21 +1264,11 @@ do_indirects:
static void ext2_truncate_blocks(struct inode *inode, loff_t offset)
{
- /*
- * XXX: it seems like a bug here that we don't allow
- * IS_APPEND inode to have blocks-past-i_size trimmed off.
- * review and fix this.
- *
- * Also would be nice to be able to handle IO errors and such,
- * but that's probably too much to ask.
- */
if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
S_ISLNK(inode->i_mode)))
return;
if (ext2_inode_is_fast_symlink(inode))
return;
- if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
- return;
dax_sem_down_write(EXT2_I(inode));
__ext2_truncate_blocks(inode, offset);
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
index 55f7caadb093..152453a91877 100644
--- a/fs/ext2/namei.c
+++ b/fs/ext2/namei.c
@@ -41,8 +41,7 @@ static inline int ext2_add_nondir(struct dentry *dentry, struct inode *inode)
{
int err = ext2_add_link(dentry, inode);
if (!err) {
- unlock_new_inode(inode);
- d_instantiate(dentry, inode);
+ d_instantiate_new(dentry, inode);
return 0;
}
inode_dec_link_count(inode);
@@ -255,8 +254,7 @@ static int ext2_mkdir(struct inode * dir, struct dentry * dentry, umode_t mode)
if (err)
goto out_fail;
- unlock_new_inode(inode);
- d_instantiate(dentry, inode);
+ d_instantiate_new(dentry, inode);
out:
return err;
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index a42e71203e53..229ea4da6785 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -2390,7 +2390,7 @@ extern int ext4_init_inode_table(struct super_block *sb,
extern void ext4_end_bitmap_read(struct buffer_head *bh, int uptodate);
/* mballoc.c */
-extern const struct file_operations ext4_seq_mb_groups_fops;
+extern const struct seq_operations ext4_mb_seq_groups_ops;
extern long ext4_mb_stats;
extern long ext4_mb_max_to_scan;
extern int ext4_mb_init(struct super_block *);
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 769a62708b1c..6884e81c1465 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -2254,7 +2254,7 @@ out:
static void *ext4_mb_seq_groups_start(struct seq_file *seq, loff_t *pos)
{
- struct super_block *sb = seq->private;
+ struct super_block *sb = PDE_DATA(file_inode(seq->file));
ext4_group_t group;
if (*pos < 0 || *pos >= ext4_get_groups_count(sb))
@@ -2265,7 +2265,7 @@ static void *ext4_mb_seq_groups_start(struct seq_file *seq, loff_t *pos)
static void *ext4_mb_seq_groups_next(struct seq_file *seq, void *v, loff_t *pos)
{
- struct super_block *sb = seq->private;
+ struct super_block *sb = PDE_DATA(file_inode(seq->file));
ext4_group_t group;
++*pos;
@@ -2277,7 +2277,7 @@ static void *ext4_mb_seq_groups_next(struct seq_file *seq, void *v, loff_t *pos)
static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v)
{
- struct super_block *sb = seq->private;
+ struct super_block *sb = PDE_DATA(file_inode(seq->file));
ext4_group_t group = (ext4_group_t) ((unsigned long) v);
int i;
int err, buddy_loaded = 0;
@@ -2330,34 +2330,13 @@ static void ext4_mb_seq_groups_stop(struct seq_file *seq, void *v)
{
}
-static const struct seq_operations ext4_mb_seq_groups_ops = {
+const struct seq_operations ext4_mb_seq_groups_ops = {
.start = ext4_mb_seq_groups_start,
.next = ext4_mb_seq_groups_next,
.stop = ext4_mb_seq_groups_stop,
.show = ext4_mb_seq_groups_show,
};
-static int ext4_mb_seq_groups_open(struct inode *inode, struct file *file)
-{
- struct super_block *sb = PDE_DATA(inode);
- int rc;
-
- rc = seq_open(file, &ext4_mb_seq_groups_ops);
- if (rc == 0) {
- struct seq_file *m = file->private_data;
- m->private = sb;
- }
- return rc;
-
-}
-
-const struct file_operations ext4_seq_mb_groups_fops = {
- .open = ext4_mb_seq_groups_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
static struct kmem_cache *get_groupinfo_cache(int blocksize_bits)
{
int cache_index = blocksize_bits - EXT4_MIN_BLOCK_LOG_SIZE;
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index b1f21e3a0763..4a09063ce1d2 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -2411,8 +2411,7 @@ static int ext4_add_nondir(handle_t *handle,
int err = ext4_add_entry(handle, dentry, inode);
if (!err) {
ext4_mark_inode_dirty(handle, inode);
- unlock_new_inode(inode);
- d_instantiate(dentry, inode);
+ d_instantiate_new(dentry, inode);
return 0;
}
drop_nlink(inode);
@@ -2651,8 +2650,7 @@ out_clear_inode:
err = ext4_mark_inode_dirty(handle, dir);
if (err)
goto out_clear_inode;
- unlock_new_inode(inode);
- d_instantiate(dentry, inode);
+ d_instantiate_new(dentry, inode);
if (IS_DIRSYNC(dir))
ext4_handle_sync(handle);
diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c
index 9ebd26c957c2..f34da0bb8f17 100644
--- a/fs/ext4/sysfs.c
+++ b/fs/ext4/sysfs.c
@@ -346,39 +346,9 @@ static struct kobject *ext4_root;
static struct kobject *ext4_feat;
-#define PROC_FILE_SHOW_DEFN(name) \
-static int name##_open(struct inode *inode, struct file *file) \
-{ \
- return single_open(file, ext4_seq_##name##_show, PDE_DATA(inode)); \
-} \
-\
-static const struct file_operations ext4_seq_##name##_fops = { \
- .open = name##_open, \
- .read = seq_read, \
- .llseek = seq_lseek, \
- .release = single_release, \
-}
-
-#define PROC_FILE_LIST(name) \
- { __stringify(name), &ext4_seq_##name##_fops }
-
-PROC_FILE_SHOW_DEFN(es_shrinker_info);
-PROC_FILE_SHOW_DEFN(options);
-
-static const struct ext4_proc_files {
- const char *name;
- const struct file_operations *fops;
-} proc_files[] = {
- PROC_FILE_LIST(options),
- PROC_FILE_LIST(es_shrinker_info),
- PROC_FILE_LIST(mb_groups),
- { NULL, NULL },
-};
-
int ext4_register_sysfs(struct super_block *sb)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
- const struct ext4_proc_files *p;
int err;
init_completion(&sbi->s_kobj_unregister);
@@ -392,11 +362,14 @@ int ext4_register_sysfs(struct super_block *sb)
if (ext4_proc_root)
sbi->s_proc = proc_mkdir(sb->s_id, ext4_proc_root);
-
if (sbi->s_proc) {
- for (p = proc_files; p->name; p++)
- proc_create_data(p->name, S_IRUGO, sbi->s_proc,
- p->fops, sb);
+ proc_create_single_data("options", S_IRUGO, sbi->s_proc,
+ ext4_seq_options_show, sb);
+ proc_create_single_data("es_shrinker_info", S_IRUGO,
+ sbi->s_proc, ext4_seq_es_shrinker_info_show,
+ sb);
+ proc_create_seq_data("mb_groups", S_IRUGO, sbi->s_proc,
+ &ext4_mb_seq_groups_ops, sb);
}
return 0;
}
@@ -404,13 +377,9 @@ int ext4_register_sysfs(struct super_block *sb)
void ext4_unregister_sysfs(struct super_block *sb)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
- const struct ext4_proc_files *p;
- if (sbi->s_proc) {
- for (p = proc_files; p->name; p++)
- remove_proc_entry(p->name, sbi->s_proc);
- remove_proc_entry(sb->s_id, ext4_proc_root);
- }
+ if (sbi->s_proc)
+ remove_proc_subtree(sb->s_id, ext4_proc_root);
kobject_del(&sbi->s_kobj);
}
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index d5098efe577c..75e37fd720b2 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -294,8 +294,7 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
alloc_nid_done(sbi, ino);
- d_instantiate(dentry, inode);
- unlock_new_inode(inode);
+ d_instantiate_new(dentry, inode);
if (IS_DIRSYNC(dir))
f2fs_sync_fs(sbi->sb, 1);
@@ -597,8 +596,7 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry,
err = page_symlink(inode, disk_link.name, disk_link.len);
err_out:
- d_instantiate(dentry, inode);
- unlock_new_inode(inode);
+ d_instantiate_new(dentry, inode);
/*
* Let's flush symlink data in order to avoid broken symlink as much as
@@ -661,8 +659,7 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
alloc_nid_done(sbi, inode->i_ino);
- d_instantiate(dentry, inode);
- unlock_new_inode(inode);
+ d_instantiate_new(dentry, inode);
if (IS_DIRSYNC(dir))
f2fs_sync_fs(sbi->sb, 1);
@@ -713,8 +710,7 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry,
alloc_nid_done(sbi, inode->i_ino);
- d_instantiate(dentry, inode);
- unlock_new_inode(inode);
+ d_instantiate_new(dentry, inode);
if (IS_DIRSYNC(dir))
f2fs_sync_fs(sbi->sb, 1);
diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
index f33a56d6e6dd..4b47ca6296a7 100644
--- a/fs/f2fs/sysfs.c
+++ b/fs/f2fs/sysfs.c
@@ -572,23 +572,6 @@ static int iostat_info_seq_show(struct seq_file *seq, void *offset)
return 0;
}
-#define F2FS_PROC_FILE_DEF(_name) \
-static int _name##_open_fs(struct inode *inode, struct file *file) \
-{ \
- return single_open(file, _name##_seq_show, PDE_DATA(inode)); \
-} \
- \
-static const struct file_operations f2fs_seq_##_name##_fops = { \
- .open = _name##_open_fs, \
- .read = seq_read, \
- .llseek = seq_lseek, \
- .release = single_release, \
-};
-
-F2FS_PROC_FILE_DEF(segment_info);
-F2FS_PROC_FILE_DEF(segment_bits);
-F2FS_PROC_FILE_DEF(iostat_info);
-
int __init f2fs_init_sysfs(void)
{
int ret;
@@ -632,12 +615,12 @@ int f2fs_register_sysfs(struct f2fs_sb_info *sbi)
sbi->s_proc = proc_mkdir(sb->s_id, f2fs_proc_root);
if (sbi->s_proc) {
- proc_create_data("segment_info", S_IRUGO, sbi->s_proc,
- &f2fs_seq_segment_info_fops, sb);
- proc_create_data("segment_bits", S_IRUGO, sbi->s_proc,
- &f2fs_seq_segment_bits_fops, sb);
- proc_create_data("iostat_info", S_IRUGO, sbi->s_proc,
- &f2fs_seq_iostat_info_fops, sb);
+ proc_create_single_data("segment_info", S_IRUGO, sbi->s_proc,
+ segment_info_seq_show, sb);
+ proc_create_single_data("segment_bits", S_IRUGO, sbi->s_proc,
+ segment_bits_seq_show, sb);
+ proc_create_single_data("iostat_info", S_IRUGO, sbi->s_proc,
+ iostat_info_seq_show, sb);
}
return 0;
}
diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c
index 582ca731a6c9..484ce674e0cd 100644
--- a/fs/fat/namei_msdos.c
+++ b/fs/fat/namei_msdos.c
@@ -314,10 +314,6 @@ static int msdos_rmdir(struct inode *dir, struct dentry *dentry)
int err;
mutex_lock(&MSDOS_SB(sb)->s_lock);
- /*
- * Check whether the directory is not in use, then check
- * whether it is empty.
- */
err = fat_dir_empty(inode);
if (err)
goto out;
diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c
index 2649759c478a..4f4362d5a04c 100644
--- a/fs/fat/namei_vfat.c
+++ b/fs/fat/namei_vfat.c
@@ -697,15 +697,6 @@ static int vfat_find(struct inode *dir, const struct qstr *qname,
return fat_search_long(dir, qname->name, len, sinfo);
}
-/*
- * (nfsd's) anonymous disconnected dentry?
- * NOTE: !IS_ROOT() is not anonymous (I.e. d_splice_alias() did the job).
- */
-static int vfat_d_anon_disconn(struct dentry *dentry)
-{
- return IS_ROOT(dentry) && (dentry->d_flags & DCACHE_DISCONNECTED);
-}
-
static struct dentry *vfat_lookup(struct inode *dir, struct dentry *dentry,
unsigned int flags)
{
@@ -738,8 +729,7 @@ static struct dentry *vfat_lookup(struct inode *dir, struct dentry *dentry,
* Checking "alias->d_parent == dentry->d_parent" to make sure
* FS is not corrupted (especially double linked dir).
*/
- if (alias && alias->d_parent == dentry->d_parent &&
- !vfat_d_anon_disconn(alias)) {
+ if (alias && alias->d_parent == dentry->d_parent) {
/*
* This inode has non anonymous-DCACHE_DISCONNECTED
* dentry. This means, the user did ->lookup() by an
@@ -747,7 +737,6 @@ static struct dentry *vfat_lookup(struct inode *dir, struct dentry *dentry,
*
* Switch to new one for reason of locality if possible.
*/
- BUG_ON(d_unhashed(alias));
if (!S_ISDIR(inode->i_mode))
d_move(alias, dentry);
iput(inode);
diff --git a/fs/fcntl.c b/fs/fcntl.c
index d737ff082472..c42169459298 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -871,9 +871,9 @@ int fasync_remove_entry(struct file *filp, struct fasync_struct **fapp)
if (fa->fa_file != filp)
continue;
- spin_lock_irq(&fa->fa_lock);
+ write_lock_irq(&fa->fa_lock);
fa->fa_file = NULL;
- spin_unlock_irq(&fa->fa_lock);
+ write_unlock_irq(&fa->fa_lock);
*fp = fa->fa_next;
call_rcu(&fa->fa_rcu, fasync_free_rcu);
@@ -918,13 +918,13 @@ struct fasync_struct *fasync_insert_entry(int fd, struct file *filp, struct fasy
if (fa->fa_file != filp)
continue;
- spin_lock_irq(&fa->fa_lock);
+ write_lock_irq(&fa->fa_lock);
fa->fa_fd = fd;
- spin_unlock_irq(&fa->fa_lock);
+ write_unlock_irq(&fa->fa_lock);
goto out;
}
- spin_lock_init(&new->fa_lock);
+ rwlock_init(&new->fa_lock);
new->magic = FASYNC_MAGIC;
new->fa_file = filp;
new->fa_fd = fd;
@@ -987,14 +987,13 @@ static void kill_fasync_rcu(struct fasync_struct *fa, int sig, int band)
{
while (fa) {
struct fown_struct *fown;
- unsigned long flags;
if (fa->magic != FASYNC_MAGIC) {
printk(KERN_ERR "kill_fasync: bad magic number in "
"fasync_struct!\n");
return;
}
- spin_lock_irqsave(&fa->fa_lock, flags);
+ read_lock(&fa->fa_lock);
if (fa->fa_file) {
fown = &fa->fa_file->f_owner;
/* Don't send SIGURG to processes which have not set a
@@ -1003,7 +1002,7 @@ static void kill_fasync_rcu(struct fasync_struct *fa, int sig, int band)
if (!(sig == SIGURG && fown->signum == 0))
send_sigio(fown, fa->fa_fd, band);
}
- spin_unlock_irqrestore(&fa->fa_lock, flags);
+ read_unlock(&fa->fa_lock);
fa = rcu_dereference(fa->fa_next);
}
}
diff --git a/fs/filesystems.c b/fs/filesystems.c
index f2728a4a03a1..b03f57b1105b 100644
--- a/fs/filesystems.c
+++ b/fs/filesystems.c
@@ -238,21 +238,9 @@ static int filesystems_proc_show(struct seq_file *m, void *v)
return 0;
}
-static int filesystems_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, filesystems_proc_show, NULL);
-}
-
-static const struct file_operations filesystems_proc_fops = {
- .open = filesystems_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
static int __init proc_filesystems_init(void)
{
- proc_create("filesystems", 0, NULL, &filesystems_proc_fops);
+ proc_create_single("filesystems", 0, NULL, filesystems_proc_show);
return 0;
}
module_init(proc_filesystems_init);
diff --git a/fs/freevxfs/vxfs_lookup.c b/fs/freevxfs/vxfs_lookup.c
index ce4785fd81c6..a51425634f65 100644
--- a/fs/freevxfs/vxfs_lookup.c
+++ b/fs/freevxfs/vxfs_lookup.c
@@ -193,13 +193,9 @@ vxfs_lookup(struct inode *dip, struct dentry *dp, unsigned int flags)
return ERR_PTR(-ENAMETOOLONG);
ino = vxfs_inode_by_name(dip, dp);
- if (ino) {
+ if (ino)
ip = vxfs_iget(dip->i_sb, ino);
- if (IS_ERR(ip))
- return ERR_CAST(ip);
- }
- d_add(dp, ip);
- return NULL;
+ return d_splice_alias(ip, dp);
}
/**
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 47d7c151fcba..471d863958bc 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -1961,7 +1961,7 @@ void wb_workfn(struct work_struct *work)
}
if (!list_empty(&wb->work_list))
- mod_delayed_work(bdi_wq, &wb->dwork, 0);
+ wb_wakeup(wb);
else if (wb_has_dirty_io(wb) && dirty_writeback_interval)
wb_wakeup_delayed(wb);
diff --git a/fs/fscache/histogram.c b/fs/fscache/histogram.c
index 15a3d042247e..9a13e9e15b69 100644
--- a/fs/fscache/histogram.c
+++ b/fs/fscache/histogram.c
@@ -83,24 +83,9 @@ static void fscache_histogram_stop(struct seq_file *m, void *v)
{
}
-static const struct seq_operations fscache_histogram_ops = {
+const struct seq_operations fscache_histogram_ops = {
.start = fscache_histogram_start,
.stop = fscache_histogram_stop,
.next = fscache_histogram_next,
.show = fscache_histogram_show,
};
-
-/*
- * open "/proc/fs/fscache/histogram" to provide latency data
- */
-static int fscache_histogram_open(struct inode *inode, struct file *file)
-{
- return seq_open(file, &fscache_histogram_ops);
-}
-
-const struct file_operations fscache_histogram_fops = {
- .open = fscache_histogram_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h
index 500650f938fe..f83328a7f048 100644
--- a/fs/fscache/internal.h
+++ b/fs/fscache/internal.h
@@ -31,6 +31,7 @@
#include <linux/fscache-cache.h>
#include <trace/events/fscache.h>
#include <linux/sched.h>
+#include <linux/seq_file.h>
#define FSCACHE_MIN_THREADS 4
#define FSCACHE_MAX_THREADS 32
@@ -84,7 +85,7 @@ static inline void fscache_hist(atomic_t histogram[], unsigned long start_jif)
atomic_inc(&histogram[jif]);
}
-extern const struct file_operations fscache_histogram_fops;
+extern const struct seq_operations fscache_histogram_ops;
#else
#define fscache_hist(hist, start_jif) do {} while (0)
@@ -294,7 +295,7 @@ static inline void fscache_stat_d(atomic_t *stat)
#define __fscache_stat(stat) (stat)
-extern const struct file_operations fscache_stats_fops;
+int fscache_stats_show(struct seq_file *m, void *v);
#else
#define __fscache_stat(stat) (NULL)
diff --git a/fs/fscache/proc.c b/fs/fscache/proc.c
index 1d9e4951a597..49a8c90414bc 100644
--- a/fs/fscache/proc.c
+++ b/fs/fscache/proc.c
@@ -26,14 +26,14 @@ int __init fscache_proc_init(void)
goto error_dir;
#ifdef CONFIG_FSCACHE_STATS
- if (!proc_create("fs/fscache/stats", S_IFREG | 0444, NULL,
- &fscache_stats_fops))
+ if (!proc_create_single("fs/fscache/stats", S_IFREG | 0444, NULL,
+ fscache_stats_show))
goto error_stats;
#endif
#ifdef CONFIG_FSCACHE_HISTOGRAM
- if (!proc_create("fs/fscache/histogram", S_IFREG | 0444, NULL,
- &fscache_histogram_fops))
+ if (!proc_create_seq("fs/fscache/histogram", S_IFREG | 0444, NULL,
+ &fscache_histogram_ops))
goto error_histogram;
#endif
diff --git a/fs/fscache/stats.c b/fs/fscache/stats.c
index fcc8c2f2690e..00564a1dfd76 100644
--- a/fs/fscache/stats.c
+++ b/fs/fscache/stats.c
@@ -138,7 +138,7 @@ atomic_t fscache_n_cache_culled_objects;
/*
* display the general statistics
*/
-static int fscache_stats_show(struct seq_file *m, void *v)
+int fscache_stats_show(struct seq_file *m, void *v)
{
seq_puts(m, "FS-Cache statistics\n");
@@ -284,18 +284,3 @@ static int fscache_stats_show(struct seq_file *m, void *v)
atomic_read(&fscache_n_cache_culled_objects));
return 0;
}
-
-/*
- * open "/proc/fs/fscache/stats" allowing provision of a statistical summary
- */
-static int fscache_stats_open(struct inode *inode, struct file *file)
-{
- return single_open(file, fscache_stats_show, NULL);
-}
-
-const struct file_operations fscache_stats_fops = {
- .open = fscache_stats_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index f58716567972..35f5ee23566d 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -54,8 +54,7 @@ static void gfs2_page_add_databufs(struct gfs2_inode *ip, struct page *page,
continue;
if (start >= to)
break;
- if (gfs2_is_jdata(ip))
- set_buffer_uptodate(bh);
+ set_buffer_uptodate(bh);
gfs2_trans_add_data(ip->i_gl, bh);
}
}
@@ -747,18 +746,21 @@ out:
put_page(page);
gfs2_trans_end(sdp);
- if (pos + len > ip->i_inode.i_size)
- gfs2_trim_blocks(&ip->i_inode);
- goto out_trans_fail;
+ if (alloc_required) {
+ gfs2_inplace_release(ip);
+ if (pos + len > ip->i_inode.i_size)
+ gfs2_trim_blocks(&ip->i_inode);
+ }
+ goto out_qunlock;
out_endtrans:
gfs2_trans_end(sdp);
out_trans_fail:
- if (alloc_required) {
+ if (alloc_required)
gfs2_inplace_release(ip);
out_qunlock:
+ if (alloc_required)
gfs2_quota_unlock(ip);
- }
out_unlock:
if (&ip->i_inode == sdp->sd_rindex) {
gfs2_glock_dq(&m_ip->i_gh);
@@ -814,7 +816,6 @@ out:
* @inode: The inode
* @dibh: The buffer_head containing the on-disk inode
* @pos: The file position
- * @len: The length of the write
* @copied: How much was actually copied by the VFS
* @page: The page
*
@@ -824,17 +825,15 @@ out:
* Returns: errno
*/
static int gfs2_stuffed_write_end(struct inode *inode, struct buffer_head *dibh,
- loff_t pos, unsigned len, unsigned copied,
+ loff_t pos, unsigned copied,
struct page *page)
{
struct gfs2_inode *ip = GFS2_I(inode);
- struct gfs2_sbd *sdp = GFS2_SB(inode);
- struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
u64 to = pos + copied;
void *kaddr;
unsigned char *buf = dibh->b_data + sizeof(struct gfs2_dinode);
- BUG_ON(pos + len > gfs2_max_stuffed_size(ip));
+ BUG_ON(pos + copied > gfs2_max_stuffed_size(ip));
kaddr = kmap_atomic(page);
memcpy(buf + pos, kaddr + pos, copied);
@@ -850,20 +849,6 @@ static int gfs2_stuffed_write_end(struct inode *inode, struct buffer_head *dibh,
i_size_write(inode, to);
mark_inode_dirty(inode);
}
-
- if (inode == sdp->sd_rindex) {
- adjust_fs_space(inode);
- sdp->sd_rindex_uptodate = 0;
- }
-
- brelse(dibh);
- gfs2_trans_end(sdp);
- if (inode == sdp->sd_rindex) {
- gfs2_glock_dq(&m_ip->i_gh);
- gfs2_holder_uninit(&m_ip->i_gh);
- }
- gfs2_glock_dq(&ip->i_gh);
- gfs2_holder_uninit(&ip->i_gh);
return copied;
}
@@ -877,9 +862,8 @@ static int gfs2_stuffed_write_end(struct inode *inode, struct buffer_head *dibh,
* @page: The page that has been written
* @fsdata: The fsdata (unused in GFS2)
*
- * The main write_end function for GFS2. We have a separate one for
- * stuffed files as they are slightly different, otherwise we just
- * put our locking around the VFS provided functions.
+ * The main write_end function for GFS2. We just put our locking around the VFS
+ * provided functions.
*
* Returns: errno
*/
@@ -900,32 +884,39 @@ static int gfs2_write_end(struct file *file, struct address_space *mapping,
BUG_ON(gfs2_glock_is_locked_by_me(ip->i_gl) == NULL);
ret = gfs2_meta_inode_buffer(ip, &dibh);
- if (unlikely(ret)) {
- unlock_page(page);
- put_page(page);
- goto failed;
- }
+ if (unlikely(ret))
+ goto out;
- if (gfs2_is_stuffed(ip))
- return gfs2_stuffed_write_end(inode, dibh, pos, len, copied, page);
+ if (gfs2_is_stuffed(ip)) {
+ ret = gfs2_stuffed_write_end(inode, dibh, pos, copied, page);
+ page = NULL;
+ goto out2;
+ }
- if (!gfs2_is_writeback(ip))
+ if (gfs2_is_jdata(ip))
gfs2_page_add_databufs(ip, page, pos & ~PAGE_MASK, len);
+ else
+ gfs2_ordered_add_inode(ip);
ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata);
+ page = NULL;
if (tr->tr_num_buf_new)
__mark_inode_dirty(inode, I_DIRTY_DATASYNC);
else
gfs2_trans_add_meta(ip->i_gl, dibh);
-
+out2:
if (inode == sdp->sd_rindex) {
adjust_fs_space(inode);
sdp->sd_rindex_uptodate = 0;
}
brelse(dibh);
-failed:
+out:
+ if (page) {
+ unlock_page(page);
+ put_page(page);
+ }
gfs2_trans_end(sdp);
gfs2_inplace_release(ip);
if (ip->i_qadata && ip->i_qadata->qa_qd_num)
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 278ed0869c3c..a7b586e02693 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -89,10 +89,12 @@ static int gfs2_unstuffer_page(struct gfs2_inode *ip, struct buffer_head *dibh,
map_bh(bh, inode->i_sb, block);
set_buffer_uptodate(bh);
- if (!gfs2_is_jdata(ip))
- mark_buffer_dirty(bh);
- if (!gfs2_is_writeback(ip))
+ if (gfs2_is_jdata(ip))
gfs2_trans_add_data(ip->i_gl, bh);
+ else {
+ mark_buffer_dirty(bh);
+ gfs2_ordered_add_inode(ip);
+ }
if (release) {
unlock_page(page);
@@ -176,8 +178,8 @@ out:
/**
* find_metapath - Find path through the metadata tree
* @sdp: The superblock
- * @mp: The metapath to return the result in
* @block: The disk block to look up
+ * @mp: The metapath to return the result in
* @height: The pre-calculated height of the metadata tree
*
* This routine returns a struct metapath structure that defines a path
@@ -188,8 +190,7 @@ out:
* filesystem with a blocksize of 4096.
*
* find_metapath() would return a struct metapath structure set to:
- * mp_offset = 101342453, mp_height = 3, mp_list[0] = 0, mp_list[1] = 48,
- * and mp_list[2] = 165.
+ * mp_fheight = 3, mp_list[0] = 0, mp_list[1] = 48, and mp_list[2] = 165.
*
* That means that in order to get to the block containing the byte at
* offset 101342453, we would load the indirect block pointed to by pointer
@@ -279,6 +280,21 @@ static inline __be64 *metapointer(unsigned int height, const struct metapath *mp
return p + mp->mp_list[height];
}
+static inline const __be64 *metaend(unsigned int height, const struct metapath *mp)
+{
+ const struct buffer_head *bh = mp->mp_bh[height];
+ return (const __be64 *)(bh->b_data + bh->b_size);
+}
+
+static void clone_metapath(struct metapath *clone, struct metapath *mp)
+{
+ unsigned int hgt;
+
+ *clone = *mp;
+ for (hgt = 0; hgt < mp->mp_aheight; hgt++)
+ get_bh(clone->mp_bh[hgt]);
+}
+
static void gfs2_metapath_ra(struct gfs2_glock *gl, __be64 *start, __be64 *end)
{
const __be64 *t;
@@ -420,20 +436,140 @@ static inline unsigned int gfs2_extent_length(void *start, unsigned int len, __b
return (ptr - first);
}
-static inline void bmap_lock(struct gfs2_inode *ip, int create)
+typedef const __be64 *(*gfs2_metadata_walker)(
+ struct metapath *mp,
+ const __be64 *start, const __be64 *end,
+ u64 factor, void *data);
+
+#define WALK_STOP ((__be64 *)0)
+#define WALK_NEXT ((__be64 *)1)
+
+static int gfs2_walk_metadata(struct inode *inode, sector_t lblock,
+ u64 len, struct metapath *mp, gfs2_metadata_walker walker,
+ void *data)
{
- if (create)
- down_write(&ip->i_rw_mutex);
- else
- down_read(&ip->i_rw_mutex);
+ struct metapath clone;
+ struct gfs2_inode *ip = GFS2_I(inode);
+ struct gfs2_sbd *sdp = GFS2_SB(inode);
+ const __be64 *start, *end, *ptr;
+ u64 factor = 1;
+ unsigned int hgt;
+ int ret = 0;
+
+ for (hgt = ip->i_height - 1; hgt >= mp->mp_aheight; hgt--)
+ factor *= sdp->sd_inptrs;
+
+ for (;;) {
+ u64 step;
+
+ /* Walk indirect block. */
+ start = metapointer(hgt, mp);
+ end = metaend(hgt, mp);
+
+ step = (end - start) * factor;
+ if (step > len)
+ end = start + DIV_ROUND_UP_ULL(len, factor);
+
+ ptr = walker(mp, start, end, factor, data);
+ if (ptr == WALK_STOP)
+ break;
+ if (step >= len)
+ break;
+ len -= step;
+ if (ptr != WALK_NEXT) {
+ BUG_ON(!*ptr);
+ mp->mp_list[hgt] += ptr - start;
+ goto fill_up_metapath;
+ }
+
+lower_metapath:
+ /* Decrease height of metapath. */
+ if (mp != &clone) {
+ clone_metapath(&clone, mp);
+ mp = &clone;
+ }
+ brelse(mp->mp_bh[hgt]);
+ mp->mp_bh[hgt] = NULL;
+ if (!hgt)
+ break;
+ hgt--;
+ factor *= sdp->sd_inptrs;
+
+ /* Advance in metadata tree. */
+ (mp->mp_list[hgt])++;
+ start = metapointer(hgt, mp);
+ end = metaend(hgt, mp);
+ if (start >= end) {
+ mp->mp_list[hgt] = 0;
+ if (!hgt)
+ break;
+ goto lower_metapath;
+ }
+
+fill_up_metapath:
+ /* Increase height of metapath. */
+ if (mp != &clone) {
+ clone_metapath(&clone, mp);
+ mp = &clone;
+ }
+ ret = fillup_metapath(ip, mp, ip->i_height - 1);
+ if (ret < 0)
+ break;
+ hgt += ret;
+ for (; ret; ret--)
+ do_div(factor, sdp->sd_inptrs);
+ mp->mp_aheight = hgt + 1;
+ }
+ if (mp == &clone)
+ release_metapath(mp);
+ return ret;
}
-static inline void bmap_unlock(struct gfs2_inode *ip, int create)
+struct gfs2_hole_walker_args {
+ u64 blocks;
+};
+
+static const __be64 *gfs2_hole_walker(struct metapath *mp,
+ const __be64 *start, const __be64 *end,
+ u64 factor, void *data)
{
- if (create)
- up_write(&ip->i_rw_mutex);
- else
- up_read(&ip->i_rw_mutex);
+ struct gfs2_hole_walker_args *args = data;
+ const __be64 *ptr;
+
+ for (ptr = start; ptr < end; ptr++) {
+ if (*ptr) {
+ args->blocks += (ptr - start) * factor;
+ if (mp->mp_aheight == mp->mp_fheight)
+ return WALK_STOP;
+ return ptr; /* increase height */
+ }
+ }
+ args->blocks += (end - start) * factor;
+ return WALK_NEXT;
+}
+
+/**
+ * gfs2_hole_size - figure out the size of a hole
+ * @inode: The inode
+ * @lblock: The logical starting block number
+ * @len: How far to look (in blocks)
+ * @mp: The metapath at lblock
+ * @iomap: The iomap to store the hole size in
+ *
+ * This function modifies @mp.
+ *
+ * Returns: errno on error
+ */
+static int gfs2_hole_size(struct inode *inode, sector_t lblock, u64 len,
+ struct metapath *mp, struct iomap *iomap)
+{
+ struct gfs2_hole_walker_args args = { };
+ int ret = 0;
+
+ ret = gfs2_walk_metadata(inode, lblock, len, mp, gfs2_hole_walker, &args);
+ if (!ret)
+ iomap->length = args.blocks << inode->i_blkbits;
+ return ret;
}
static inline __be64 *gfs2_indirect_init(struct metapath *mp,
@@ -462,15 +598,11 @@ enum alloc_state {
};
/**
- * gfs2_bmap_alloc - Build a metadata tree of the requested height
+ * gfs2_iomap_alloc - Build a metadata tree of the requested height
* @inode: The GFS2 inode
- * @lblock: The logical starting block of the extent
- * @bh_map: This is used to return the mapping details
- * @zero_new: True if newly allocated blocks should be zeroed
+ * @iomap: The iomap structure
+ * @flags: iomap flags
* @mp: The metapath, with proper height information calculated
- * @maxlen: The max number of data blocks to alloc
- * @dblock: Pointer to return the resulting new block
- * @dblks: Pointer to return the number of blocks allocated
*
* In this routine we may have to alloc:
* i) Indirect blocks to grow the metadata tree height
@@ -483,6 +615,13 @@ enum alloc_state {
* blocks are available, there will only be one request per bmap call)
* and uses the state machine to initialise the blocks in order.
*
+ * Right now, this function will allocate at most one indirect block
+ * worth of data -- with a default block size of 4K, that's slightly
+ * less than 2M. If this limitation is ever removed to allow huge
+ * allocations, we would probably still want to limit the iomap size we
+ * return to avoid stalling other tasks during huge writes; the next
+ * iomap iteration would then find the blocks already allocated.
+ *
* Returns: errno on error
*/
@@ -497,6 +636,7 @@ static int gfs2_iomap_alloc(struct inode *inode, struct iomap *iomap,
unsigned dblks = 0;
unsigned ptrs_per_blk;
const unsigned end_of_metadata = mp->mp_fheight - 1;
+ int ret;
enum alloc_state state;
__be64 *ptr;
__be64 zero_bn = 0;
@@ -507,6 +647,8 @@ static int gfs2_iomap_alloc(struct inode *inode, struct iomap *iomap,
gfs2_trans_add_meta(ip->i_gl, dibh);
+ down_write(&ip->i_rw_mutex);
+
if (mp->mp_fheight == mp->mp_aheight) {
struct buffer_head *bh;
int eob;
@@ -542,11 +684,10 @@ static int gfs2_iomap_alloc(struct inode *inode, struct iomap *iomap,
blks = dblks + iblks;
i = mp->mp_aheight;
do {
- int error;
n = blks - alloced;
- error = gfs2_alloc_blocks(ip, &bn, &n, 0, NULL);
- if (error)
- return error;
+ ret = gfs2_alloc_blocks(ip, &bn, &n, 0, NULL);
+ if (ret)
+ goto out;
alloced += n;
if (state != ALLOC_DATA || gfs2_is_jdata(ip))
gfs2_trans_add_unrevoke(sdp, bn, n);
@@ -602,7 +743,7 @@ static int gfs2_iomap_alloc(struct inode *inode, struct iomap *iomap,
dblks = n;
ptr = metapointer(end_of_metadata, mp);
iomap->addr = bn << inode->i_blkbits;
- iomap->flags |= IOMAP_F_NEW;
+ iomap->flags |= IOMAP_F_MERGED | IOMAP_F_NEW;
while (n-- > 0)
*ptr++ = cpu_to_be64(bn++);
break;
@@ -612,64 +753,10 @@ static int gfs2_iomap_alloc(struct inode *inode, struct iomap *iomap,
iomap->length = (u64)dblks << inode->i_blkbits;
ip->i_height = mp->mp_fheight;
gfs2_add_inode_blocks(&ip->i_inode, alloced);
- gfs2_dinode_out(ip, mp->mp_bh[0]->b_data);
- return 0;
-}
-
-/**
- * hole_size - figure out the size of a hole
- * @inode: The inode
- * @lblock: The logical starting block number
- * @mp: The metapath
- *
- * Returns: The hole size in bytes
- *
- */
-static u64 hole_size(struct inode *inode, sector_t lblock, struct metapath *mp)
-{
- struct gfs2_inode *ip = GFS2_I(inode);
- struct gfs2_sbd *sdp = GFS2_SB(inode);
- struct metapath mp_eof;
- u64 factor = 1;
- int hgt;
- u64 holesz = 0;
- const __be64 *first, *end, *ptr;
- const struct buffer_head *bh;
- u64 lblock_stop = (i_size_read(inode) - 1) >> inode->i_blkbits;
- int zeroptrs;
- bool done = false;
-
- /* Get another metapath, to the very last byte */
- find_metapath(sdp, lblock_stop, &mp_eof, ip->i_height);
- for (hgt = ip->i_height - 1; hgt >= 0 && !done; hgt--) {
- bh = mp->mp_bh[hgt];
- if (bh) {
- zeroptrs = 0;
- first = metapointer(hgt, mp);
- end = (const __be64 *)(bh->b_data + bh->b_size);
-
- for (ptr = first; ptr < end; ptr++) {
- if (*ptr) {
- done = true;
- break;
- } else {
- zeroptrs++;
- }
- }
- } else {
- zeroptrs = sdp->sd_inptrs;
- }
- if (factor * zeroptrs >= lblock_stop - lblock + 1) {
- holesz = lblock_stop - lblock + 1;
- break;
- }
- holesz += factor * zeroptrs;
-
- factor *= sdp->sd_inptrs;
- if (hgt && (mp->mp_list[hgt - 1] < mp_eof.mp_list[hgt - 1]))
- (mp->mp_list[hgt - 1])++;
- }
- return holesz << inode->i_blkbits;
+ gfs2_dinode_out(ip, dibh->b_data);
+out:
+ up_write(&ip->i_rw_mutex);
+ return ret;
}
static void gfs2_stuffed_iomap(struct inode *inode, struct iomap *iomap)
@@ -685,121 +772,130 @@ static void gfs2_stuffed_iomap(struct inode *inode, struct iomap *iomap)
}
/**
- * gfs2_iomap_begin - Map blocks from an inode to disk blocks
+ * gfs2_iomap_get - Map blocks from an inode to disk blocks
* @inode: The inode
* @pos: Starting position in bytes
* @length: Length to map, in bytes
* @flags: iomap flags
* @iomap: The iomap structure
+ * @mp: The metapath
*
* Returns: errno
*/
-int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t length,
- unsigned flags, struct iomap *iomap)
+static int gfs2_iomap_get(struct inode *inode, loff_t pos, loff_t length,
+ unsigned flags, struct iomap *iomap,
+ struct metapath *mp)
{
struct gfs2_inode *ip = GFS2_I(inode);
struct gfs2_sbd *sdp = GFS2_SB(inode);
- struct metapath mp = { .mp_aheight = 1, };
- unsigned int factor = sdp->sd_sb.sb_bsize;
- const u64 *arr = sdp->sd_heightsize;
__be64 *ptr;
sector_t lblock;
- sector_t lend;
- int ret = 0;
+ sector_t lblock_stop;
+ int ret;
int eob;
- unsigned int len;
+ u64 len;
struct buffer_head *bh;
u8 height;
- trace_gfs2_iomap_start(ip, pos, length, flags);
- if (!length) {
- ret = -EINVAL;
- goto out;
- }
+ if (!length)
+ return -EINVAL;
if (gfs2_is_stuffed(ip)) {
if (flags & IOMAP_REPORT) {
+ if (pos >= i_size_read(inode))
+ return -ENOENT;
gfs2_stuffed_iomap(inode, iomap);
- if (pos >= iomap->length)
- ret = -ENOENT;
- goto out;
+ return 0;
}
BUG_ON(!(flags & IOMAP_WRITE));
}
-
lblock = pos >> inode->i_blkbits;
- lend = (pos + length + sdp->sd_sb.sb_bsize - 1) >> inode->i_blkbits;
-
iomap->offset = lblock << inode->i_blkbits;
- iomap->addr = IOMAP_NULL_ADDR;
- iomap->type = IOMAP_HOLE;
- iomap->length = (u64)(lend - lblock) << inode->i_blkbits;
- iomap->flags = IOMAP_F_MERGED;
- bmap_lock(ip, flags & IOMAP_WRITE);
+ lblock_stop = (pos + length - 1) >> inode->i_blkbits;
+ len = lblock_stop - lblock + 1;
- /*
- * Directory data blocks have a struct gfs2_meta_header header, so the
- * remaining size is smaller than the filesystem block size. Logical
- * block numbers for directories are in units of this remaining size!
- */
- if (gfs2_is_dir(ip)) {
- factor = sdp->sd_jbsize;
- arr = sdp->sd_jheightsize;
- }
+ down_read(&ip->i_rw_mutex);
- ret = gfs2_meta_inode_buffer(ip, &mp.mp_bh[0]);
+ ret = gfs2_meta_inode_buffer(ip, &mp->mp_bh[0]);
if (ret)
- goto out_release;
+ goto unlock;
height = ip->i_height;
- while ((lblock + 1) * factor > arr[height])
+ while ((lblock + 1) * sdp->sd_sb.sb_bsize > sdp->sd_heightsize[height])
height++;
- find_metapath(sdp, lblock, &mp, height);
+ find_metapath(sdp, lblock, mp, height);
if (height > ip->i_height || gfs2_is_stuffed(ip))
goto do_alloc;
- ret = lookup_metapath(ip, &mp);
+ ret = lookup_metapath(ip, mp);
if (ret)
- goto out_release;
+ goto unlock;
- if (mp.mp_aheight != ip->i_height)
+ if (mp->mp_aheight != ip->i_height)
goto do_alloc;
- ptr = metapointer(ip->i_height - 1, &mp);
+ ptr = metapointer(ip->i_height - 1, mp);
if (*ptr == 0)
goto do_alloc;
- iomap->type = IOMAP_MAPPED;
- iomap->addr = be64_to_cpu(*ptr) << inode->i_blkbits;
+ bh = mp->mp_bh[ip->i_height - 1];
+ len = gfs2_extent_length(bh->b_data, bh->b_size, ptr, len, &eob);
- bh = mp.mp_bh[ip->i_height - 1];
- len = gfs2_extent_length(bh->b_data, bh->b_size, ptr, lend - lblock, &eob);
+ iomap->addr = be64_to_cpu(*ptr) << inode->i_blkbits;
+ iomap->length = len << inode->i_blkbits;
+ iomap->type = IOMAP_MAPPED;
+ iomap->flags = IOMAP_F_MERGED;
if (eob)
iomap->flags |= IOMAP_F_BOUNDARY;
- iomap->length = (u64)len << inode->i_blkbits;
-out_release:
- release_metapath(&mp);
- bmap_unlock(ip, flags & IOMAP_WRITE);
out:
- trace_gfs2_iomap_end(ip, iomap, ret);
+ iomap->bdev = inode->i_sb->s_bdev;
+unlock:
+ up_read(&ip->i_rw_mutex);
return ret;
do_alloc:
- if (flags & IOMAP_WRITE) {
- ret = gfs2_iomap_alloc(inode, iomap, flags, &mp);
- } else if (flags & IOMAP_REPORT) {
+ iomap->addr = IOMAP_NULL_ADDR;
+ iomap->length = len << inode->i_blkbits;
+ iomap->type = IOMAP_HOLE;
+ iomap->flags = 0;
+ if (flags & IOMAP_REPORT) {
loff_t size = i_size_read(inode);
if (pos >= size)
ret = -ENOENT;
- else if (height <= ip->i_height)
- iomap->length = hole_size(inode, lblock, &mp);
+ else if (height == ip->i_height)
+ ret = gfs2_hole_size(inode, lblock, len, mp, iomap);
else
iomap->length = size - pos;
}
- goto out_release;
+ goto out;
+}
+
+static int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t length,
+ unsigned flags, struct iomap *iomap)
+{
+ struct gfs2_inode *ip = GFS2_I(inode);
+ struct metapath mp = { .mp_aheight = 1, };
+ int ret;
+
+ trace_gfs2_iomap_start(ip, pos, length, flags);
+ if (flags & IOMAP_WRITE) {
+ ret = gfs2_iomap_get(inode, pos, length, flags, iomap, &mp);
+ if (!ret && iomap->type == IOMAP_HOLE)
+ ret = gfs2_iomap_alloc(inode, iomap, flags, &mp);
+ release_metapath(&mp);
+ } else {
+ ret = gfs2_iomap_get(inode, pos, length, flags, iomap, &mp);
+ release_metapath(&mp);
+ }
+ trace_gfs2_iomap_end(ip, iomap, ret);
+ return ret;
}
+const struct iomap_ops gfs2_iomap_ops = {
+ .iomap_begin = gfs2_iomap_begin,
+};
+
/**
* gfs2_block_map - Map one or more blocks of an inode to a disk block
* @inode: The inode
@@ -825,25 +921,34 @@ int gfs2_block_map(struct inode *inode, sector_t lblock,
struct buffer_head *bh_map, int create)
{
struct gfs2_inode *ip = GFS2_I(inode);
- struct iomap iomap;
- int ret, flags = 0;
+ loff_t pos = (loff_t)lblock << inode->i_blkbits;
+ loff_t length = bh_map->b_size;
+ struct metapath mp = { .mp_aheight = 1, };
+ struct iomap iomap = { };
+ int ret;
clear_buffer_mapped(bh_map);
clear_buffer_new(bh_map);
clear_buffer_boundary(bh_map);
trace_gfs2_bmap(ip, bh_map, lblock, create, 1);
- if (create)
- flags |= IOMAP_WRITE;
- ret = gfs2_iomap_begin(inode, (loff_t)lblock << inode->i_blkbits,
- bh_map->b_size, flags, &iomap);
- if (ret) {
- if (!create && ret == -ENOENT) {
- /* Return unmapped buffer beyond the end of file. */
+ if (create) {
+ ret = gfs2_iomap_get(inode, pos, length, IOMAP_WRITE, &iomap, &mp);
+ if (!ret && iomap.type == IOMAP_HOLE)
+ ret = gfs2_iomap_alloc(inode, &iomap, IOMAP_WRITE, &mp);
+ release_metapath(&mp);
+ } else {
+ ret = gfs2_iomap_get(inode, pos, length, 0, &iomap, &mp);
+ release_metapath(&mp);
+
+ /* Return unmapped buffer beyond the end of file. */
+ if (ret == -ENOENT) {
ret = 0;
+ goto out;
}
- goto out;
}
+ if (ret)
+ goto out;
if (iomap.length > bh_map->b_size) {
iomap.length = bh_map->b_size;
@@ -945,8 +1050,10 @@ static int gfs2_block_zero_range(struct inode *inode, loff_t from,
err = 0;
}
- if (!gfs2_is_writeback(ip))
+ if (gfs2_is_jdata(ip))
gfs2_trans_add_data(ip->i_gl, bh);
+ else
+ gfs2_ordered_add_inode(ip);
zero_user(page, offset, length);
mark_buffer_dirty(bh);
@@ -1056,6 +1163,19 @@ out:
return error;
}
+int gfs2_iomap_get_alloc(struct inode *inode, loff_t pos, loff_t length,
+ struct iomap *iomap)
+{
+ struct metapath mp = { .mp_aheight = 1, };
+ int ret;
+
+ ret = gfs2_iomap_get(inode, pos, length, IOMAP_WRITE, iomap, &mp);
+ if (!ret && iomap->type == IOMAP_HOLE)
+ ret = gfs2_iomap_alloc(inode, iomap, IOMAP_WRITE, &mp);
+ release_metapath(&mp);
+ return ret;
+}
+
/**
* sweep_bh_for_rgrps - find an rgrp in a meta buffer and free blocks therein
* @ip: inode
diff --git a/fs/gfs2/bmap.h b/fs/gfs2/bmap.h
index c3402fe00653..6b18fb323f0a 100644
--- a/fs/gfs2/bmap.h
+++ b/fs/gfs2/bmap.h
@@ -46,11 +46,13 @@ static inline void gfs2_write_calc_reserv(const struct gfs2_inode *ip,
}
}
+extern const struct iomap_ops gfs2_iomap_ops;
+
extern int gfs2_unstuff_dinode(struct gfs2_inode *ip, struct page *page);
extern int gfs2_block_map(struct inode *inode, sector_t lblock,
struct buffer_head *bh, int create);
-extern int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t length,
- unsigned flags, struct iomap *iomap);
+extern int gfs2_iomap_get_alloc(struct inode *inode, loff_t pos, loff_t length,
+ struct iomap *iomap);
extern int gfs2_extent_map(struct inode *inode, u64 lblock, int *new,
u64 *dblock, unsigned *extlen);
extern int gfs2_setattr_size(struct inode *inode, u64 size);
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 4b71f021a9e2..7137db7b0119 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -733,7 +733,7 @@ static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len,
struct gfs2_inode *ip = GFS2_I(inode);
loff_t end = offset + len;
struct buffer_head *dibh;
- struct iomap iomap;
+ struct iomap iomap = { };
int error;
error = gfs2_meta_inode_buffer(ip, &dibh);
@@ -749,8 +749,8 @@ static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len,
}
while (offset < end) {
- error = gfs2_iomap_begin(inode, offset, end - offset,
- IOMAP_WRITE, &iomap);
+ error = gfs2_iomap_get_alloc(inode, offset, end - offset,
+ &iomap);
if (error)
goto out;
offset = iomap.offset + iomap.length;
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 1b6b1e3f5caf..d2ad817e089f 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -116,6 +116,7 @@ static inline struct gfs2_bitmap *rbm_bi(const struct gfs2_rbm *rbm)
static inline u64 gfs2_rbm_to_block(const struct gfs2_rbm *rbm)
{
+ BUG_ON(rbm->offset >= rbm->rgd->rd_data);
return rbm->rgd->rd_data0 + (rbm_bi(rbm)->bi_start * GFS2_NBBY) +
rbm->offset;
}
@@ -696,8 +697,6 @@ struct gfs2_sbd {
u32 sd_max_dirres; /* Max blocks needed to add a directory entry */
u32 sd_max_height; /* Max height of a file's metadata tree */
u64 sd_heightsize[GFS2_MAX_META_HEIGHT + 1];
- u32 sd_max_jheight; /* Max height of journaled file's meta tree */
- u64 sd_jheightsize[GFS2_MAX_META_HEIGHT + 1];
u32 sd_max_dents_per_leaf; /* Max number of dirents in a leaf block */
struct gfs2_args sd_args; /* Mount arguments */
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 8700eb815638..feda55f67050 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -2006,10 +2006,6 @@ static int gfs2_getattr(const struct path *path, struct kstat *stat,
return 0;
}
-const struct iomap_ops gfs2_iomap_ops = {
- .iomap_begin = gfs2_iomap_begin,
-};
-
static int gfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
u64 start, u64 len)
{
diff --git a/fs/gfs2/log.h b/fs/gfs2/log.h
index 1862e310a067..20241436126d 100644
--- a/fs/gfs2/log.h
+++ b/fs/gfs2/log.h
@@ -14,6 +14,7 @@
#include <linux/spinlock.h>
#include <linux/writeback.h>
#include "incore.h"
+#include "inode.h"
/**
* gfs2_log_lock - acquire the right to mess with the log manager
@@ -50,8 +51,12 @@ static inline void gfs2_log_pointers_init(struct gfs2_sbd *sdp,
static inline void gfs2_ordered_add_inode(struct gfs2_inode *ip)
{
- struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
+ struct gfs2_sbd *sdp;
+ if (!gfs2_is_ordered(ip))
+ return;
+
+ sdp = GFS2_SB(&ip->i_inode);
if (!test_bit(GIF_ORDERED, &ip->i_flags)) {
spin_lock(&sdp->sd_ordered_lock);
if (!test_and_set_bit(GIF_ORDERED, &ip->i_flags))
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 3ba3f167641c..c2469833b4fb 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -335,25 +335,6 @@ static int gfs2_read_sb(struct gfs2_sbd *sdp, int silent)
sdp->sd_heightsize[x] = ~0;
gfs2_assert(sdp, sdp->sd_max_height <= GFS2_MAX_META_HEIGHT);
- sdp->sd_jheightsize[0] = sdp->sd_sb.sb_bsize -
- sizeof(struct gfs2_dinode);
- sdp->sd_jheightsize[1] = sdp->sd_jbsize * sdp->sd_diptrs;
- for (x = 2;; x++) {
- u64 space, d;
- u32 m;
-
- space = sdp->sd_jheightsize[x - 1] * sdp->sd_inptrs;
- d = space;
- m = do_div(d, sdp->sd_inptrs);
-
- if (d != sdp->sd_jheightsize[x - 1] || m)
- break;
- sdp->sd_jheightsize[x] = space;
- }
- sdp->sd_max_jheight = x;
- sdp->sd_jheightsize[x] = ~0;
- gfs2_assert(sdp, sdp->sd_max_jheight <= GFS2_MAX_META_HEIGHT);
-
sdp->sd_max_dents_per_leaf = (sdp->sd_sb.sb_bsize -
sizeof(struct gfs2_leaf)) /
GFS2_MIN_DIRENT_SIZE;
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index 7a98abd340ee..e8585dfd209f 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -735,7 +735,10 @@ static int gfs2_write_buf_to_page(struct gfs2_inode *ip, unsigned long index,
if (!buffer_uptodate(bh))
goto unlock_out;
}
- gfs2_trans_add_data(ip->i_gl, bh);
+ if (gfs2_is_jdata(ip))
+ gfs2_trans_add_data(ip->i_gl, bh);
+ else
+ gfs2_ordered_add_inode(ip);
/* If we need to write to the next block as well */
if (to_write > (bsize - boff)) {
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 8b683917a27e..6bc5cfe710d1 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -372,8 +372,8 @@ static u32 gfs2_free_extlen(const struct gfs2_rbm *rrbm, u32 len)
start = bi->bi_bh->b_data;
if (bi->bi_clone)
start = bi->bi_clone;
- end = start + bi->bi_bh->b_size;
start += bi->bi_offset;
+ end = start + bi->bi_len;
BUG_ON(rbm.offset & 3);
start += (rbm.offset / GFS2_NBBY);
bytes = min_t(u32, len / GFS2_NBBY, (end - start));
diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c
index c75cacaa349b..064c9a0ef046 100644
--- a/fs/gfs2/trans.c
+++ b/fs/gfs2/trans.c
@@ -143,32 +143,21 @@ static struct gfs2_bufdata *gfs2_alloc_bufdata(struct gfs2_glock *gl,
* @gl: The inode glock associated with the buffer
* @bh: The buffer to add
*
- * This is used in two distinct cases:
- * i) In ordered write mode
- * We put the data buffer on a list so that we can ensure that it's
- * synced to disk at the right time
- * ii) In journaled data mode
- * We need to journal the data block in the same way as metadata in
- * the functions above. The difference is that here we have a tag
- * which is two __be64's being the block number (as per meta data)
- * and a flag which says whether the data block needs escaping or
- * not. This means we need a new log entry for each 251 or so data
- * blocks, which isn't an enormous overhead but twice as much as
- * for normal metadata blocks.
+ * This is used in journaled data mode.
+ * We need to journal the data block in the same way as metadata in
+ * the functions above. The difference is that here we have a tag
+ * which is two __be64's being the block number (as per meta data)
+ * and a flag which says whether the data block needs escaping or
+ * not. This means we need a new log entry for each 251 or so data
+ * blocks, which isn't an enormous overhead but twice as much as
+ * for normal metadata blocks.
*/
void gfs2_trans_add_data(struct gfs2_glock *gl, struct buffer_head *bh)
{
struct gfs2_trans *tr = current->journal_info;
struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
- struct address_space *mapping = bh->b_page->mapping;
- struct gfs2_inode *ip = GFS2_I(mapping->host);
struct gfs2_bufdata *bd;
- if (!gfs2_is_jdata(ip)) {
- gfs2_ordered_add_inode(ip);
- return;
- }
-
lock_buffer(bh);
if (buffer_pinned(bh)) {
set_bit(TR_TOUCHED, &tr->tr_flags);
diff --git a/fs/hfs/dir.c b/fs/hfs/dir.c
index 75b254280ff6..3bf2ae0e467c 100644
--- a/fs/hfs/dir.c
+++ b/fs/hfs/dir.c
@@ -31,21 +31,15 @@ static struct dentry *hfs_lookup(struct inode *dir, struct dentry *dentry,
hfs_cat_build_key(dir->i_sb, fd.search_key, dir->i_ino, &dentry->d_name);
res = hfs_brec_read(&fd, &rec, sizeof(rec));
if (res) {
- hfs_find_exit(&fd);
- if (res == -ENOENT) {
- /* No such entry */
- inode = NULL;
- goto done;
- }
- return ERR_PTR(res);
+ if (res != -ENOENT)
+ inode = ERR_PTR(res);
+ } else {
+ inode = hfs_iget(dir->i_sb, &fd.search_key->cat, &rec);
+ if (!inode)
+ inode = ERR_PTR(-EACCES);
}
- inode = hfs_iget(dir->i_sb, &fd.search_key->cat, &rec);
hfs_find_exit(&fd);
- if (!inode)
- return ERR_PTR(-EACCES);
-done:
- d_add(dentry, inode);
- return NULL;
+ return d_splice_alias(inode, dentry);
}
/*
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index 2538b49cc349..b3309b83371a 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -543,9 +543,9 @@ static struct dentry *hfs_file_lookup(struct inode *dir, struct dentry *dentry,
igrab(dir);
hlist_add_fake(&inode->i_hash);
mark_inode_dirty(inode);
+ dont_mount(dentry);
out:
- d_add(dentry, inode);
- return NULL;
+ return d_splice_alias(inode, dentry);
}
void hfs_evict_inode(struct inode *inode)
diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c
index 15e06fb552da..b5254378f011 100644
--- a/fs/hfsplus/dir.c
+++ b/fs/hfsplus/dir.c
@@ -122,8 +122,7 @@ again:
if (S_ISREG(inode->i_mode))
HFSPLUS_I(inode)->linkid = linkid;
out:
- d_add(dentry, inode);
- return NULL;
+ return d_splice_alias(inode, dentry);
fail:
hfs_find_exit(&fd);
return ERR_PTR(err);
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index 513c357c734b..a6c0f54c48c3 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -588,6 +588,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
return 0;
out_put_hidden_dir:
+ cancel_delayed_work_sync(&sbi->sync_work);
iput(sbi->hidden_dir);
out_put_root:
dput(sb->s_root);
diff --git a/fs/inode.c b/fs/inode.c
index 13ceb98c3bd3..3b55391072f3 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -178,6 +178,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
mapping->a_ops = &empty_aops;
mapping->host = inode;
mapping->flags = 0;
+ mapping->wb_err = 0;
atomic_set(&mapping->i_mmap_writable, 0);
mapping_set_gfp_mask(mapping, GFP_HIGHUSER_MOVABLE);
mapping->private_data = NULL;
diff --git a/fs/internal.h b/fs/internal.h
index e08972db0303..980d005b21b4 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -125,6 +125,7 @@ int do_fchmodat(int dfd, const char __user *filename, umode_t mode);
int do_fchownat(int dfd, const char __user *filename, uid_t user, gid_t group,
int flag);
+extern int open_check_o_direct(struct file *f);
extern int vfs_open(const struct path *, struct file *, const struct cred *);
extern struct file *filp_clone_open(struct file *);
diff --git a/fs/ioctl.c b/fs/ioctl.c
index 4823431d1c9d..b445b13fc59b 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -549,7 +549,7 @@ static int ioctl_fsfreeze(struct file *filp)
{
struct super_block *sb = file_inode(filp)->i_sb;
- if (!capable(CAP_SYS_ADMIN))
+ if (!ns_capable(sb->s_user_ns, CAP_SYS_ADMIN))
return -EPERM;
/* If filesystem doesn't support freeze feature, return. */
@@ -566,7 +566,7 @@ static int ioctl_fsthaw(struct file *filp)
{
struct super_block *sb = file_inode(filp)->i_sb;
- if (!capable(CAP_SYS_ADMIN))
+ if (!ns_capable(sb->s_user_ns, CAP_SYS_ADMIN))
return -EPERM;
/* Thaw */
diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c
index 0a754f38462e..e5a6deb38e1e 100644
--- a/fs/jffs2/dir.c
+++ b/fs/jffs2/dir.c
@@ -209,8 +209,7 @@ static int jffs2_create(struct inode *dir_i, struct dentry *dentry,
__func__, inode->i_ino, inode->i_mode, inode->i_nlink,
f->inocache->pino_nlink, inode->i_mapping->nrpages);
- unlock_new_inode(inode);
- d_instantiate(dentry, inode);
+ d_instantiate_new(dentry, inode);
return 0;
fail:
@@ -430,8 +429,7 @@ static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char
mutex_unlock(&dir_f->sem);
jffs2_complete_reservation(c);
- unlock_new_inode(inode);
- d_instantiate(dentry, inode);
+ d_instantiate_new(dentry, inode);
return 0;
fail:
@@ -575,8 +573,7 @@ static int jffs2_mkdir (struct inode *dir_i, struct dentry *dentry, umode_t mode
mutex_unlock(&dir_f->sem);
jffs2_complete_reservation(c);
- unlock_new_inode(inode);
- d_instantiate(dentry, inode);
+ d_instantiate_new(dentry, inode);
return 0;
fail:
@@ -747,8 +744,7 @@ static int jffs2_mknod (struct inode *dir_i, struct dentry *dentry, umode_t mode
mutex_unlock(&dir_f->sem);
jffs2_complete_reservation(c);
- unlock_new_inode(inode);
- d_instantiate(dentry, inode);
+ d_instantiate_new(dentry, inode);
return 0;
fail:
diff --git a/fs/jfs/jfs_debug.c b/fs/jfs/jfs_debug.c
index a70907606025..35a5b2a81ae0 100644
--- a/fs/jfs/jfs_debug.c
+++ b/fs/jfs/jfs_debug.c
@@ -29,7 +29,6 @@
#ifdef PROC_FS_JFS /* see jfs_debug.h */
-static struct proc_dir_entry *base;
#ifdef CONFIG_JFS_DEBUG
static int jfs_loglevel_proc_show(struct seq_file *m, void *v)
{
@@ -66,43 +65,29 @@ static const struct file_operations jfs_loglevel_proc_fops = {
};
#endif
-static struct {
- const char *name;
- const struct file_operations *proc_fops;
-} Entries[] = {
-#ifdef CONFIG_JFS_STATISTICS
- { "lmstats", &jfs_lmstats_proc_fops, },
- { "txstats", &jfs_txstats_proc_fops, },
- { "xtstat", &jfs_xtstat_proc_fops, },
- { "mpstat", &jfs_mpstat_proc_fops, },
-#endif
-#ifdef CONFIG_JFS_DEBUG
- { "TxAnchor", &jfs_txanchor_proc_fops, },
- { "loglevel", &jfs_loglevel_proc_fops }
-#endif
-};
-#define NPROCENT ARRAY_SIZE(Entries)
-
void jfs_proc_init(void)
{
- int i;
+ struct proc_dir_entry *base;
- if (!(base = proc_mkdir("fs/jfs", NULL)))
+ base = proc_mkdir("fs/jfs", NULL);
+ if (!base)
return;
- for (i = 0; i < NPROCENT; i++)
- proc_create(Entries[i].name, 0, base, Entries[i].proc_fops);
+#ifdef CONFIG_JFS_STATISTICS
+ proc_create_single("lmstats", 0, base, jfs_lmstats_proc_show);
+ proc_create_single("txstats", 0, base, jfs_txstats_proc_show);
+ proc_create_single("xtstat", 0, base, jfs_xtstat_proc_show);
+ proc_create_single("mpstat", 0, base, jfs_mpstat_proc_show);
+#endif
+#ifdef CONFIG_JFS_DEBUG
+ proc_create_single("TxAnchor", 0, base, jfs_txanchor_proc_show);
+ proc_create("loglevel", 0, base, &jfs_loglevel_proc_fops);
+#endif
}
void jfs_proc_clean(void)
{
- int i;
-
- if (base) {
- for (i = 0; i < NPROCENT; i++)
- remove_proc_entry(Entries[i].name, base);
- remove_proc_entry("fs/jfs", NULL);
- }
+ remove_proc_subtree("fs/jfs", NULL);
}
#endif /* PROC_FS_JFS */
diff --git a/fs/jfs/jfs_debug.h b/fs/jfs/jfs_debug.h
index eafd1300a00b..0d9e35da8462 100644
--- a/fs/jfs/jfs_debug.h
+++ b/fs/jfs/jfs_debug.h
@@ -62,7 +62,7 @@ extern void jfs_proc_clean(void);
extern int jfsloglevel;
-extern const struct file_operations jfs_txanchor_proc_fops;
+int jfs_txanchor_proc_show(struct seq_file *m, void *v);
/* information message: e.g., configuration, major event */
#define jfs_info(fmt, arg...) do { \
@@ -105,10 +105,10 @@ extern const struct file_operations jfs_txanchor_proc_fops;
* ----------
*/
#ifdef CONFIG_JFS_STATISTICS
-extern const struct file_operations jfs_lmstats_proc_fops;
-extern const struct file_operations jfs_txstats_proc_fops;
-extern const struct file_operations jfs_mpstat_proc_fops;
-extern const struct file_operations jfs_xtstat_proc_fops;
+int jfs_lmstats_proc_show(struct seq_file *m, void *v);
+int jfs_txstats_proc_show(struct seq_file *m, void *v);
+int jfs_mpstat_proc_show(struct seq_file *m, void *v);
+int jfs_xtstat_proc_show(struct seq_file *m, void *v);
#define INCREMENT(x) ((x)++)
#define DECREMENT(x) ((x)--)
diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c
index 0e5d412c0b01..6b68df395892 100644
--- a/fs/jfs/jfs_logmgr.c
+++ b/fs/jfs/jfs_logmgr.c
@@ -2493,7 +2493,7 @@ exit:
}
#ifdef CONFIG_JFS_STATISTICS
-static int jfs_lmstats_proc_show(struct seq_file *m, void *v)
+int jfs_lmstats_proc_show(struct seq_file *m, void *v)
{
seq_printf(m,
"JFS Logmgr stats\n"
@@ -2510,16 +2510,4 @@ static int jfs_lmstats_proc_show(struct seq_file *m, void *v)
lmStat.partial_page);
return 0;
}
-
-static int jfs_lmstats_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, jfs_lmstats_proc_show, NULL);
-}
-
-const struct file_operations jfs_lmstats_proc_fops = {
- .open = jfs_lmstats_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
#endif /* CONFIG_JFS_STATISTICS */
diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c
index 1a3b0cc22ad3..fa2c6824c7f2 100644
--- a/fs/jfs/jfs_metapage.c
+++ b/fs/jfs/jfs_metapage.c
@@ -815,7 +815,7 @@ void __invalidate_metapages(struct inode *ip, s64 addr, int len)
}
#ifdef CONFIG_JFS_STATISTICS
-static int jfs_mpstat_proc_show(struct seq_file *m, void *v)
+int jfs_mpstat_proc_show(struct seq_file *m, void *v)
{
seq_printf(m,
"JFS Metapage statistics\n"
@@ -828,16 +828,4 @@ static int jfs_mpstat_proc_show(struct seq_file *m, void *v)
mpStat.lockwait);
return 0;
}
-
-static int jfs_mpstat_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, jfs_mpstat_proc_show, NULL);
-}
-
-const struct file_operations jfs_mpstat_proc_fops = {
- .open = jfs_mpstat_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
#endif
diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c
index 4d973524c887..a5663cb621d8 100644
--- a/fs/jfs/jfs_txnmgr.c
+++ b/fs/jfs/jfs_txnmgr.c
@@ -2998,7 +2998,7 @@ int jfs_sync(void *arg)
}
#if defined(CONFIG_PROC_FS) && defined(CONFIG_JFS_DEBUG)
-static int jfs_txanchor_proc_show(struct seq_file *m, void *v)
+int jfs_txanchor_proc_show(struct seq_file *m, void *v)
{
char *freewait;
char *freelockwait;
@@ -3032,22 +3032,10 @@ static int jfs_txanchor_proc_show(struct seq_file *m, void *v)
list_empty(&TxAnchor.unlock_queue) ? "" : "not ");
return 0;
}
-
-static int jfs_txanchor_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, jfs_txanchor_proc_show, NULL);
-}
-
-const struct file_operations jfs_txanchor_proc_fops = {
- .open = jfs_txanchor_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
#endif
#if defined(CONFIG_PROC_FS) && defined(CONFIG_JFS_STATISTICS)
-static int jfs_txstats_proc_show(struct seq_file *m, void *v)
+int jfs_txstats_proc_show(struct seq_file *m, void *v)
{
seq_printf(m,
"JFS TxStats\n"
@@ -3072,16 +3060,4 @@ static int jfs_txstats_proc_show(struct seq_file *m, void *v)
TxStat.txLockAlloc_freelock);
return 0;
}
-
-static int jfs_txstats_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, jfs_txstats_proc_show, NULL);
-}
-
-const struct file_operations jfs_txstats_proc_fops = {
- .open = jfs_txstats_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
#endif
diff --git a/fs/jfs/jfs_xtree.c b/fs/jfs/jfs_xtree.c
index 5cde6d2fcfca..2c200b5256a6 100644
--- a/fs/jfs/jfs_xtree.c
+++ b/fs/jfs/jfs_xtree.c
@@ -3874,7 +3874,7 @@ s64 xtTruncate_pmap(tid_t tid, struct inode *ip, s64 committed_size)
}
#ifdef CONFIG_JFS_STATISTICS
-static int jfs_xtstat_proc_show(struct seq_file *m, void *v)
+int jfs_xtstat_proc_show(struct seq_file *m, void *v)
{
seq_printf(m,
"JFS Xtree statistics\n"
@@ -3887,16 +3887,4 @@ static int jfs_xtstat_proc_show(struct seq_file *m, void *v)
xtStat.split);
return 0;
}
-
-static int jfs_xtstat_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, jfs_xtstat_proc_show, NULL);
-}
-
-const struct file_operations jfs_xtstat_proc_fops = {
- .open = jfs_xtstat_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
#endif
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index b41596d71858..56c3fcbfe80e 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -178,8 +178,7 @@ static int jfs_create(struct inode *dip, struct dentry *dentry, umode_t mode,
unlock_new_inode(ip);
iput(ip);
} else {
- unlock_new_inode(ip);
- d_instantiate(dentry, ip);
+ d_instantiate_new(dentry, ip);
}
out2:
@@ -313,8 +312,7 @@ static int jfs_mkdir(struct inode *dip, struct dentry *dentry, umode_t mode)
unlock_new_inode(ip);
iput(ip);
} else {
- unlock_new_inode(ip);
- d_instantiate(dentry, ip);
+ d_instantiate_new(dentry, ip);
}
out2:
@@ -1059,8 +1057,7 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry,
unlock_new_inode(ip);
iput(ip);
} else {
- unlock_new_inode(ip);
- d_instantiate(dentry, ip);
+ d_instantiate_new(dentry, ip);
}
out2:
@@ -1447,8 +1444,7 @@ static int jfs_mknod(struct inode *dir, struct dentry *dentry,
unlock_new_inode(ip);
iput(ip);
} else {
- unlock_new_inode(ip);
- d_instantiate(dentry, ip);
+ d_instantiate_new(dentry, ip);
}
out1:
diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c
index 26dd9a50f383..ff2716f9322e 100644
--- a/fs/kernfs/mount.c
+++ b/fs/kernfs/mount.c
@@ -316,6 +316,7 @@ struct dentry *kernfs_mount_ns(struct file_system_type *fs_type, int flags,
info->root = root;
info->ns = ns;
+ INIT_LIST_HEAD(&info->node);
sb = sget_userns(fs_type, kernfs_test_super, kernfs_set_super, flags,
&init_user_ns, info);
diff --git a/fs/locks.c b/fs/locks.c
index 62bbe8b31f26..05e211be8684 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -2788,22 +2788,10 @@ static const struct seq_operations locks_seq_operations = {
.show = locks_show,
};
-static int locks_open(struct inode *inode, struct file *filp)
-{
- return seq_open_private(filp, &locks_seq_operations,
- sizeof(struct locks_iterator));
-}
-
-static const struct file_operations proc_locks_operations = {
- .open = locks_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release_private,
-};
-
static int __init proc_locks_init(void)
{
- proc_create("locks", 0, NULL, &proc_locks_operations);
+ proc_create_seq_private("locks", 0, NULL, &locks_seq_operations,
+ sizeof(struct locks_iterator), NULL);
return 0;
}
fs_initcall(proc_locks_init);
diff --git a/fs/minix/namei.c b/fs/minix/namei.c
index ccf0f00030bf..1a6084d2b02e 100644
--- a/fs/minix/namei.c
+++ b/fs/minix/namei.c
@@ -28,13 +28,9 @@ static struct dentry *minix_lookup(struct inode * dir, struct dentry *dentry, un
return ERR_PTR(-ENAMETOOLONG);
ino = minix_inode_by_name(dentry);
- if (ino) {
+ if (ino)
inode = minix_iget(dir->i_sb, ino);
- if (IS_ERR(inode))
- return ERR_CAST(inode);
- }
- d_add(dentry, inode);
- return NULL;
+ return d_splice_alias(inode, dentry);
}
static int minix_mknod(struct inode * dir, struct dentry *dentry, umode_t mode, dev_t rdev)
diff --git a/fs/namei.c b/fs/namei.c
index 186bd2464fd5..6df1f61855d6 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -984,13 +984,15 @@ static bool safe_hardlink_source(struct inode *inode)
*/
static int may_linkat(struct path *link)
{
- struct inode *inode;
+ struct inode *inode = link->dentry->d_inode;
+
+ /* Inode writeback is not safe when the uid or gid are invalid. */
+ if (!uid_valid(inode->i_uid) || !gid_valid(inode->i_gid))
+ return -EOVERFLOW;
if (!sysctl_protected_hardlinks)
return 0;
- inode = link->dentry->d_inode;
-
/* Source inode owner (or CAP_FOWNER) can hardlink all they like,
* otherwise, it must be a safe source.
*/
@@ -1438,10 +1440,8 @@ static int path_parent_directory(struct path *path)
static int follow_dotdot(struct nameidata *nd)
{
while(1) {
- if (nd->path.dentry == nd->root.dentry &&
- nd->path.mnt == nd->root.mnt) {
+ if (path_equal(&nd->path, &nd->root))
break;
- }
if (nd->path.dentry != nd->path.mnt->mnt_root) {
int ret = path_parent_directory(&nd->path);
if (ret)
@@ -2749,6 +2749,11 @@ static int may_delete(struct inode *dir, struct dentry *victim, bool isdir)
BUG_ON(!inode);
BUG_ON(victim->d_parent->d_inode != dir);
+
+ /* Inode writeback is not safe when the uid or gid are invalid. */
+ if (!uid_valid(inode->i_uid) || !gid_valid(inode->i_gid))
+ return -EOVERFLOW;
+
audit_inode_child(dir, victim, AUDIT_TYPE_CHILD_DELETE);
error = inode_permission(dir, MAY_WRITE | MAY_EXEC);
@@ -3367,7 +3372,9 @@ finish_open_created:
goto out;
*opened |= FILE_OPENED;
opened:
- error = ima_file_check(file, op->acc_mode, *opened);
+ error = open_check_o_direct(file);
+ if (!error)
+ error = ima_file_check(file, op->acc_mode, *opened);
if (!error && will_truncate)
error = handle_truncate(file);
out:
@@ -3447,6 +3454,9 @@ static int do_tmpfile(struct nameidata *nd, unsigned flags,
error = finish_open(file, child, NULL, opened);
if (error)
goto out2;
+ error = open_check_o_direct(file);
+ if (error)
+ fput(file);
out2:
mnt_drop_write(path.mnt);
out:
@@ -3672,7 +3682,8 @@ int vfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev)
if (error)
return error;
- if ((S_ISCHR(mode) || S_ISBLK(mode)) && !capable(CAP_MKNOD))
+ if ((S_ISCHR(mode) || S_ISBLK(mode)) &&
+ !ns_capable(dentry->d_sb->s_user_ns, CAP_MKNOD))
return -EPERM;
if (!dir->i_op->mknod)
@@ -3847,11 +3858,11 @@ int vfs_rmdir(struct inode *dir, struct dentry *dentry)
if (error)
goto out;
- shrink_dcache_parent(dentry);
error = dir->i_op->rmdir(dir, dentry);
if (error)
goto out;
+ shrink_dcache_parent(dentry);
dentry->d_inode->i_flags |= S_DEAD;
dont_mount(dentry);
detach_mounts(dentry);
@@ -4434,8 +4445,6 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
old_dir->i_nlink >= max_links)
goto out;
}
- if (is_dir && !(flags & RENAME_EXCHANGE) && target)
- shrink_dcache_parent(new_dentry);
if (!is_dir) {
error = try_break_deleg(source, delegated_inode);
if (error)
@@ -4452,8 +4461,10 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
goto out;
if (!(flags & RENAME_EXCHANGE) && target) {
- if (is_dir)
+ if (is_dir) {
+ shrink_dcache_parent(new_dentry);
target->i_flags |= S_DEAD;
+ }
dont_mount(new_dentry);
detach_mounts(new_dentry);
}
diff --git a/fs/namespace.c b/fs/namespace.c
index 5f75969adff1..8ddd14806799 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1590,7 +1590,7 @@ static int do_umount(struct mount *mnt, int flags)
* Special case for "unmounting" root ...
* we just try to remount it readonly.
*/
- if (!capable(CAP_SYS_ADMIN))
+ if (!ns_capable(sb->s_user_ns, CAP_SYS_ADMIN))
return -EPERM;
down_write(&sb->s_umount);
if (!sb_rdonly(sb))
@@ -2333,7 +2333,7 @@ static int do_remount(struct path *path, int ms_flags, int sb_flags,
down_write(&sb->s_umount);
if (ms_flags & MS_BIND)
err = change_mount_flags(path->mnt, ms_flags);
- else if (!capable(CAP_SYS_ADMIN))
+ else if (!ns_capable(sb->s_user_ns, CAP_SYS_ADMIN))
err = -EPERM;
else
err = do_remount_sb(sb, sb_flags, data, 0);
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index b9129e2befea..bbc91d7ca1bd 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -1067,7 +1067,6 @@ void nfs_clients_init(struct net *net)
}
#ifdef CONFIG_PROC_FS
-static int nfs_server_list_open(struct inode *inode, struct file *file);
static void *nfs_server_list_start(struct seq_file *p, loff_t *pos);
static void *nfs_server_list_next(struct seq_file *p, void *v, loff_t *pos);
static void nfs_server_list_stop(struct seq_file *p, void *v);
@@ -1080,14 +1079,6 @@ static const struct seq_operations nfs_server_list_ops = {
.show = nfs_server_list_show,
};
-static const struct file_operations nfs_server_list_fops = {
- .open = nfs_server_list_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release_net,
-};
-
-static int nfs_volume_list_open(struct inode *inode, struct file *file);
static void *nfs_volume_list_start(struct seq_file *p, loff_t *pos);
static void *nfs_volume_list_next(struct seq_file *p, void *v, loff_t *pos);
static void nfs_volume_list_stop(struct seq_file *p, void *v);
@@ -1100,23 +1091,6 @@ static const struct seq_operations nfs_volume_list_ops = {
.show = nfs_volume_list_show,
};
-static const struct file_operations nfs_volume_list_fops = {
- .open = nfs_volume_list_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release_net,
-};
-
-/*
- * open "/proc/fs/nfsfs/servers" which provides a summary of servers with which
- * we're dealing
- */
-static int nfs_server_list_open(struct inode *inode, struct file *file)
-{
- return seq_open_net(inode, file, &nfs_server_list_ops,
- sizeof(struct seq_net_private));
-}
-
/*
* set up the iterator to start reading from the server list and return the first item
*/
@@ -1185,15 +1159,6 @@ static int nfs_server_list_show(struct seq_file *m, void *v)
}
/*
- * open "/proc/fs/nfsfs/volumes" which provides a summary of extant volumes
- */
-static int nfs_volume_list_open(struct inode *inode, struct file *file)
-{
- return seq_open_net(inode, file, &nfs_volume_list_ops,
- sizeof(struct seq_net_private));
-}
-
-/*
* set up the iterator to start reading from the volume list and return the first item
*/
static void *nfs_volume_list_start(struct seq_file *m, loff_t *_pos)
@@ -1278,14 +1243,14 @@ int nfs_fs_proc_net_init(struct net *net)
goto error_0;
/* a file of servers with which we're dealing */
- p = proc_create("servers", S_IFREG|S_IRUGO,
- nn->proc_nfsfs, &nfs_server_list_fops);
+ p = proc_create_net("servers", S_IFREG|S_IRUGO, nn->proc_nfsfs,
+ &nfs_server_list_ops, sizeof(struct seq_net_private));
if (!p)
goto error_1;
/* a file of volumes that we have mounted */
- p = proc_create("volumes", S_IFREG|S_IRUGO,
- nn->proc_nfsfs, &nfs_volume_list_fops);
+ p = proc_create_net("volumes", S_IFREG|S_IRUGO, nn->proc_nfsfs,
+ &nfs_volume_list_ops, sizeof(struct seq_net_private));
if (!p)
goto error_1;
return 0;
diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c
index 70b8bf781fce..a43dfedd69ec 100644
--- a/fs/nfsd/blocklayout.c
+++ b/fs/nfsd/blocklayout.c
@@ -227,7 +227,7 @@ static int nfsd4_scsi_identify_device(struct block_device *bdev,
if (!buf)
return -ENOMEM;
- rq = blk_get_request(q, REQ_OP_SCSI_IN, GFP_KERNEL);
+ rq = blk_get_request(q, REQ_OP_SCSI_IN, 0);
if (IS_ERR(rq)) {
error = -ENOMEM;
goto out_free_buf;
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 2410b093a2e6..b0555d7d8200 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1201,6 +1201,28 @@ nfsd_create_locked(struct svc_rqst *rqstp, struct svc_fh *fhp,
break;
case S_IFDIR:
host_err = vfs_mkdir(dirp, dchild, iap->ia_mode);
+ if (!host_err && unlikely(d_unhashed(dchild))) {
+ struct dentry *d;
+ d = lookup_one_len(dchild->d_name.name,
+ dchild->d_parent,
+ dchild->d_name.len);
+ if (IS_ERR(d)) {
+ host_err = PTR_ERR(d);
+ break;
+ }
+ if (unlikely(d_is_negative(d))) {
+ dput(d);
+ err = nfserr_serverfault;
+ goto out;
+ }
+ dput(resfhp->fh_dentry);
+ resfhp->fh_dentry = dget(d);
+ err = fh_update(resfhp);
+ dput(dchild);
+ dchild = d;
+ if (err)
+ goto out;
+ }
break;
case S_IFCHR:
case S_IFBLK:
diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c
index 1a2894aa0194..dd52d3f82e8d 100644
--- a/fs/nilfs2/namei.c
+++ b/fs/nilfs2/namei.c
@@ -46,8 +46,7 @@ static inline int nilfs_add_nondir(struct dentry *dentry, struct inode *inode)
int err = nilfs_add_link(dentry, inode);
if (!err) {
- d_instantiate(dentry, inode);
- unlock_new_inode(inode);
+ d_instantiate_new(dentry, inode);
return 0;
}
inode_dec_link_count(inode);
@@ -243,8 +242,7 @@ static int nilfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
goto out_fail;
nilfs_mark_inode_dirty(inode);
- d_instantiate(dentry, inode);
- unlock_new_inode(inode);
+ d_instantiate_new(dentry, inode);
out:
if (!err)
err = nilfs_transaction_commit(dir->i_sb);
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index 91a8889abf9b..ea8c551bcd7e 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -570,16 +570,7 @@ static struct bio *o2hb_setup_one_bio(struct o2hb_region *reg,
current_page, vec_len, vec_start);
len = bio_add_page(bio, page, vec_len, vec_start);
- if (len != vec_len) {
- mlog(ML_ERROR, "Adding page[%d] to bio failed, "
- "page %p, len %d, vec_len %u, vec_start %u, "
- "bi_sector %llu\n", current_page, page, len,
- vec_len, vec_start,
- (unsigned long long)bio->bi_iter.bi_sector);
- bio_put(bio);
- bio = ERR_PTR(-EIO);
- return bio;
- }
+ if (len != vec_len) break;
cs += vec_len / (PAGE_SIZE/spp);
vec_start = 0;
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 01c6b3894406..7869622af22a 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -4250,10 +4250,11 @@ out:
static int ocfs2_reflink(struct dentry *old_dentry, struct inode *dir,
struct dentry *new_dentry, bool preserve)
{
- int error;
+ int error, had_lock;
struct inode *inode = d_inode(old_dentry);
struct buffer_head *old_bh = NULL;
struct inode *new_orphan_inode = NULL;
+ struct ocfs2_lock_holder oh;
if (!ocfs2_refcount_tree(OCFS2_SB(inode->i_sb)))
return -EOPNOTSUPP;
@@ -4295,6 +4296,14 @@ static int ocfs2_reflink(struct dentry *old_dentry, struct inode *dir,
goto out;
}
+ had_lock = ocfs2_inode_lock_tracker(new_orphan_inode, NULL, 1,
+ &oh);
+ if (had_lock < 0) {
+ error = had_lock;
+ mlog_errno(error);
+ goto out;
+ }
+
/* If the security isn't preserved, we need to re-initialize them. */
if (!preserve) {
error = ocfs2_init_security_and_acl(dir, new_orphan_inode,
@@ -4302,14 +4311,15 @@ static int ocfs2_reflink(struct dentry *old_dentry, struct inode *dir,
if (error)
mlog_errno(error);
}
-out:
if (!error) {
error = ocfs2_mv_orphaned_inode_to_new(dir, new_orphan_inode,
new_dentry);
if (error)
mlog_errno(error);
}
+ ocfs2_inode_unlock_tracker(new_orphan_inode, 1, &oh, had_lock);
+out:
if (new_orphan_inode) {
/*
* We need to open_unlock the inode no matter whether we
diff --git a/fs/omfs/dir.c b/fs/omfs/dir.c
index b7146526afff..4bee3a72b9f3 100644
--- a/fs/omfs/dir.c
+++ b/fs/omfs/dir.c
@@ -305,11 +305,10 @@ static struct dentry *omfs_lookup(struct inode *dir, struct dentry *dentry,
ino_t ino = be64_to_cpu(oi->i_head.h_self);
brelse(bh);
inode = omfs_iget(dir->i_sb, ino);
- if (IS_ERR(inode))
- return ERR_CAST(inode);
+ } else if (bh != ERR_PTR(-ENOENT)) {
+ inode = ERR_CAST(bh);
}
- d_add(dentry, inode);
- return NULL;
+ return d_splice_alias(inode, dentry);
}
/* sanity check block's self pointer */
diff --git a/fs/open.c b/fs/open.c
index c5ee7cd60424..d0e955b558ad 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -724,6 +724,16 @@ SYSCALL_DEFINE3(fchown, unsigned int, fd, uid_t, user, gid_t, group)
return ksys_fchown(fd, user, group);
}
+int open_check_o_direct(struct file *f)
+{
+ /* NB: we're sure to have correct a_ops only after f_op->open */
+ if (f->f_flags & O_DIRECT) {
+ if (!f->f_mapping->a_ops || !f->f_mapping->a_ops->direct_IO)
+ return -EINVAL;
+ }
+ return 0;
+}
+
static int do_dentry_open(struct file *f,
struct inode *inode,
int (*open)(struct inode *, struct file *),
@@ -745,7 +755,7 @@ static int do_dentry_open(struct file *f,
if (unlikely(f->f_flags & O_PATH)) {
f->f_mode = FMODE_PATH;
f->f_op = &empty_fops;
- goto done;
+ return 0;
}
if (f->f_mode & FMODE_WRITE && !special_file(inode->i_mode)) {
@@ -798,12 +808,7 @@ static int do_dentry_open(struct file *f,
f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
file_ra_state_init(&f->f_ra, f->f_mapping->host->i_mapping);
-done:
- /* NB: we're sure to have correct a_ops only after f_op->open */
- error = -EINVAL;
- if ((f->f_flags & O_DIRECT) &&
- (!f->f_mapping->a_ops || !f->f_mapping->a_ops->direct_IO))
- goto out_fput;
+
return 0;
cleanup_all:
@@ -818,9 +823,6 @@ cleanup_file:
f->f_path.dentry = NULL;
f->f_inode = NULL;
return error;
-out_fput:
- fput(f);
- return error;
}
/**
@@ -918,14 +920,20 @@ struct file *dentry_open(const struct path *path, int flags,
BUG_ON(!path->mnt);
f = get_empty_filp();
- if (IS_ERR(f))
- return f;
-
- f->f_flags = flags;
- error = vfs_open(path, f, cred);
- if (error) {
- put_filp(f);
- return ERR_PTR(error);
+ if (!IS_ERR(f)) {
+ f->f_flags = flags;
+ error = vfs_open(path, f, cred);
+ if (!error) {
+ /* from now on we need fput() to dispose of f */
+ error = open_check_o_direct(f);
+ if (error) {
+ fput(f);
+ f = ERR_PTR(error);
+ }
+ } else {
+ put_filp(f);
+ f = ERR_PTR(error);
+ }
}
return f;
}
diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c
index 2200662a9bf1..607092f367ad 100644
--- a/fs/openpromfs/inode.c
+++ b/fs/openpromfs/inode.c
@@ -256,8 +256,7 @@ found:
break;
}
- d_add(dentry, inode);
- return NULL;
+ return d_splice_alias(inode, dentry);
}
static int openpromfs_readdir(struct file *file, struct dir_context *ctx)
diff --git a/fs/orangefs/namei.c b/fs/orangefs/namei.c
index 6e3134e6d98a..365cd73d9109 100644
--- a/fs/orangefs/namei.c
+++ b/fs/orangefs/namei.c
@@ -75,8 +75,7 @@ static int orangefs_create(struct inode *dir,
get_khandle_from_ino(inode),
dentry);
- d_instantiate(dentry, inode);
- unlock_new_inode(inode);
+ d_instantiate_new(dentry, inode);
orangefs_set_timeout(dentry);
ORANGEFS_I(inode)->getattr_time = jiffies - 1;
ORANGEFS_I(inode)->getattr_mask = STATX_BASIC_STATS;
@@ -111,7 +110,6 @@ static struct dentry *orangefs_lookup(struct inode *dir, struct dentry *dentry,
struct orangefs_inode_s *parent = ORANGEFS_I(dir);
struct orangefs_kernel_op_s *new_op;
struct inode *inode;
- struct dentry *res;
int ret = -EINVAL;
/*
@@ -159,65 +157,18 @@ static struct dentry *orangefs_lookup(struct inode *dir, struct dentry *dentry,
new_op->downcall.resp.lookup.refn.fs_id,
ret);
- if (ret < 0) {
- if (ret == -ENOENT) {
- /*
- * if no inode was found, add a negative dentry to
- * dcache anyway; if we don't, we don't hold expected
- * lookup semantics and we most noticeably break
- * during directory renames.
- *
- * however, if the operation failed or exited, do not
- * add the dentry (e.g. in the case that a touch is
- * issued on a file that already exists that was
- * interrupted during this lookup -- no need to add
- * another negative dentry for an existing file)
- */
-
- gossip_debug(GOSSIP_NAME_DEBUG,
- "orangefs_lookup: Adding *negative* dentry "
- "%p for %pd\n",
- dentry,
- dentry);
-
- d_add(dentry, NULL);
- res = NULL;
- goto out;
- }
-
+ if (ret >= 0) {
+ orangefs_set_timeout(dentry);
+ inode = orangefs_iget(dir->i_sb, &new_op->downcall.resp.lookup.refn);
+ } else if (ret == -ENOENT) {
+ inode = NULL;
+ } else {
/* must be a non-recoverable error */
- res = ERR_PTR(ret);
- goto out;
- }
-
- orangefs_set_timeout(dentry);
-
- inode = orangefs_iget(dir->i_sb, &new_op->downcall.resp.lookup.refn);
- if (IS_ERR(inode)) {
- gossip_debug(GOSSIP_NAME_DEBUG,
- "error %ld from iget\n", PTR_ERR(inode));
- res = ERR_CAST(inode);
- goto out;
+ inode = ERR_PTR(ret);
}
- gossip_debug(GOSSIP_NAME_DEBUG,
- "%s:%s:%d "
- "Found good inode [%lu] with count [%d]\n",
- __FILE__,
- __func__,
- __LINE__,
- inode->i_ino,
- (int)atomic_read(&inode->i_count));
-
- /* update dentry/inode pair into dcache */
- res = d_splice_alias(inode, dentry);
-
- gossip_debug(GOSSIP_NAME_DEBUG,
- "Lookup success (inode ct = %d)\n",
- (int)atomic_read(&inode->i_count));
-out:
op_release(new_op);
- return res;
+ return d_splice_alias(inode, dentry);
}
/* return 0 on success; non-zero otherwise */
@@ -332,8 +283,7 @@ static int orangefs_symlink(struct inode *dir,
"Assigned symlink inode new number of %pU\n",
get_khandle_from_ino(inode));
- d_instantiate(dentry, inode);
- unlock_new_inode(inode);
+ d_instantiate_new(dentry, inode);
orangefs_set_timeout(dentry);
ORANGEFS_I(inode)->getattr_time = jiffies - 1;
ORANGEFS_I(inode)->getattr_mask = STATX_BASIC_STATS;
@@ -402,8 +352,7 @@ static int orangefs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode
"Assigned dir inode new number of %pU\n",
get_khandle_from_ino(inode));
- d_instantiate(dentry, inode);
- unlock_new_inode(inode);
+ d_instantiate_new(dentry, inode);
orangefs_set_timeout(dentry);
ORANGEFS_I(inode)->getattr_time = jiffies - 1;
ORANGEFS_I(inode)->getattr_mask = STATX_BASIC_STATS;
diff --git a/fs/pipe.c b/fs/pipe.c
index 39d6f431da83..bb0840e234f3 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -509,19 +509,22 @@ static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
}
}
-/* No kernel lock held - fine */
-static __poll_t
-pipe_poll(struct file *filp, poll_table *wait)
+static struct wait_queue_head *
+pipe_get_poll_head(struct file *filp, __poll_t events)
{
- __poll_t mask;
struct pipe_inode_info *pipe = filp->private_data;
- int nrbufs;
- poll_wait(filp, &pipe->wait, wait);
+ return &pipe->wait;
+}
+
+/* No kernel lock held - fine */
+static __poll_t pipe_poll_mask(struct file *filp, __poll_t events)
+{
+ struct pipe_inode_info *pipe = filp->private_data;
+ int nrbufs = pipe->nrbufs;
+ __poll_t mask = 0;
/* Reading only -- no need for acquiring the semaphore. */
- nrbufs = pipe->nrbufs;
- mask = 0;
if (filp->f_mode & FMODE_READ) {
mask = (nrbufs > 0) ? EPOLLIN | EPOLLRDNORM : 0;
if (!pipe->writers && filp->f_version != pipe->w_counter)
@@ -1020,7 +1023,8 @@ const struct file_operations pipefifo_fops = {
.llseek = no_llseek,
.read_iter = pipe_read,
.write_iter = pipe_write,
- .poll = pipe_poll,
+ .get_poll_head = pipe_get_poll_head,
+ .poll_mask = pipe_poll_mask,
.unlocked_ioctl = pipe_ioctl,
.release = pipe_release,
.fasync = pipe_fasync,
diff --git a/fs/proc/array.c b/fs/proc/array.c
index ae2c807fd719..e6d7f41b6684 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -85,6 +85,7 @@
#include <linux/delayacct.h>
#include <linux/seq_file.h>
#include <linux/pid_namespace.h>
+#include <linux/prctl.h>
#include <linux/ptrace.h>
#include <linux/tracehook.h>
#include <linux/string_helpers.h>
@@ -335,6 +336,30 @@ static inline void task_seccomp(struct seq_file *m, struct task_struct *p)
#ifdef CONFIG_SECCOMP
seq_put_decimal_ull(m, "\nSeccomp:\t", p->seccomp.mode);
#endif
+ seq_printf(m, "\nSpeculation_Store_Bypass:\t");
+ switch (arch_prctl_spec_ctrl_get(p, PR_SPEC_STORE_BYPASS)) {
+ case -EINVAL:
+ seq_printf(m, "unknown");
+ break;
+ case PR_SPEC_NOT_AFFECTED:
+ seq_printf(m, "not vulnerable");
+ break;
+ case PR_SPEC_PRCTL | PR_SPEC_FORCE_DISABLE:
+ seq_printf(m, "thread force mitigated");
+ break;
+ case PR_SPEC_PRCTL | PR_SPEC_DISABLE:
+ seq_printf(m, "thread mitigated");
+ break;
+ case PR_SPEC_PRCTL | PR_SPEC_ENABLE:
+ seq_printf(m, "thread vulnerable");
+ break;
+ case PR_SPEC_DISABLE:
+ seq_printf(m, "globally mitigated");
+ break;
+ default:
+ seq_printf(m, "vulnerable");
+ break;
+ }
seq_putc(m, '\n');
}
@@ -677,25 +702,22 @@ out:
static int children_seq_show(struct seq_file *seq, void *v)
{
- struct inode *inode = seq->private;
- pid_t pid;
-
- pid = pid_nr_ns(v, inode->i_sb->s_fs_info);
- seq_printf(seq, "%d ", pid);
+ struct inode *inode = file_inode(seq->file);
+ seq_printf(seq, "%d ", pid_nr_ns(v, proc_pid_ns(inode)));
return 0;
}
static void *children_seq_start(struct seq_file *seq, loff_t *pos)
{
- return get_children_pid(seq->private, NULL, *pos);
+ return get_children_pid(file_inode(seq->file), NULL, *pos);
}
static void *children_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
struct pid *pid;
- pid = get_children_pid(seq->private, v, *pos + 1);
+ pid = get_children_pid(file_inode(seq->file), v, *pos + 1);
put_pid(v);
++*pos;
@@ -716,17 +738,7 @@ static const struct seq_operations children_seq_ops = {
static int children_seq_open(struct inode *inode, struct file *file)
{
- struct seq_file *m;
- int ret;
-
- ret = seq_open(file, &children_seq_ops);
- if (ret)
- return ret;
-
- m = file->private_data;
- m->private = inode;
-
- return ret;
+ return seq_open(file, &children_seq_ops);
}
const struct file_operations proc_tid_children_operations = {
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 1b2ede6abcdf..33ed1746927a 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -261,7 +261,7 @@ static ssize_t proc_pid_cmdline_read(struct file *file, char __user *buf,
* Inherently racy -- command line shares address space
* with code and data.
*/
- rv = access_remote_vm(mm, arg_end - 1, &c, 1, 0);
+ rv = access_remote_vm(mm, arg_end - 1, &c, 1, FOLL_ANON);
if (rv <= 0)
goto out_free_page;
@@ -279,7 +279,7 @@ static ssize_t proc_pid_cmdline_read(struct file *file, char __user *buf,
int nr_read;
_count = min3(count, len, PAGE_SIZE);
- nr_read = access_remote_vm(mm, p, page, _count, 0);
+ nr_read = access_remote_vm(mm, p, page, _count, FOLL_ANON);
if (nr_read < 0)
rv = nr_read;
if (nr_read <= 0)
@@ -325,7 +325,7 @@ static ssize_t proc_pid_cmdline_read(struct file *file, char __user *buf,
bool final;
_count = min3(count, len, PAGE_SIZE);
- nr_read = access_remote_vm(mm, p, page, _count, 0);
+ nr_read = access_remote_vm(mm, p, page, _count, FOLL_ANON);
if (nr_read < 0)
rv = nr_read;
if (nr_read <= 0)
@@ -698,7 +698,7 @@ static bool has_pid_permissions(struct pid_namespace *pid,
static int proc_pid_permission(struct inode *inode, int mask)
{
- struct pid_namespace *pid = inode->i_sb->s_fs_info;
+ struct pid_namespace *pid = proc_pid_ns(inode);
struct task_struct *task;
bool has_perms;
@@ -733,13 +733,11 @@ static const struct inode_operations proc_def_inode_operations = {
static int proc_single_show(struct seq_file *m, void *v)
{
struct inode *inode = m->private;
- struct pid_namespace *ns;
- struct pid *pid;
+ struct pid_namespace *ns = proc_pid_ns(inode);
+ struct pid *pid = proc_pid(inode);
struct task_struct *task;
int ret;
- ns = inode->i_sb->s_fs_info;
- pid = proc_pid(inode);
task = get_pid_task(pid, PIDTYPE_PID);
if (!task)
return -ESRCH;
@@ -946,7 +944,7 @@ static ssize_t environ_read(struct file *file, char __user *buf,
max_len = min_t(size_t, PAGE_SIZE, count);
this_len = min(max_len, this_len);
- retval = access_remote_vm(mm, (env_start + src), page, this_len, 0);
+ retval = access_remote_vm(mm, (env_start + src), page, this_len, FOLL_ANON);
if (retval <= 0) {
ret = retval;
@@ -1410,7 +1408,7 @@ static const struct file_operations proc_fail_nth_operations = {
static int sched_show(struct seq_file *m, void *v)
{
struct inode *inode = m->private;
- struct pid_namespace *ns = inode->i_sb->s_fs_info;
+ struct pid_namespace *ns = proc_pid_ns(inode);
struct task_struct *p;
p = get_proc_task(inode);
@@ -1782,8 +1780,8 @@ int pid_getattr(const struct path *path, struct kstat *stat,
u32 request_mask, unsigned int query_flags)
{
struct inode *inode = d_inode(path->dentry);
+ struct pid_namespace *pid = proc_pid_ns(inode);
struct task_struct *task;
- struct pid_namespace *pid = path->dentry->d_sb->s_fs_info;
generic_fillattr(inode, stat);
@@ -1809,15 +1807,22 @@ int pid_getattr(const struct path *path, struct kstat *stat,
/* dentry stuff */
/*
- * Exceptional case: normally we are not allowed to unhash a busy
- * directory. In this case, however, we can do it - no aliasing problems
- * due to the way we treat inodes.
- *
+ * Set <pid>/... inode ownership (can change due to setuid(), etc.)
+ */
+void pid_update_inode(struct task_struct *task, struct inode *inode)
+{
+ task_dump_owner(task, inode->i_mode, &inode->i_uid, &inode->i_gid);
+
+ inode->i_mode &= ~(S_ISUID | S_ISGID);
+ security_task_to_inode(task, inode);
+}
+
+/*
* Rewrite the inode's ownerships here because the owning task may have
* performed a setuid(), etc.
*
*/
-int pid_revalidate(struct dentry *dentry, unsigned int flags)
+static int pid_revalidate(struct dentry *dentry, unsigned int flags)
{
struct inode *inode;
struct task_struct *task;
@@ -1829,10 +1834,7 @@ int pid_revalidate(struct dentry *dentry, unsigned int flags)
task = get_proc_task(inode);
if (task) {
- task_dump_owner(task, inode->i_mode, &inode->i_uid, &inode->i_gid);
-
- inode->i_mode &= ~(S_ISUID | S_ISGID);
- security_task_to_inode(task, inode);
+ pid_update_inode(task, inode);
put_task_struct(task);
return 1;
}
@@ -1880,8 +1882,8 @@ bool proc_fill_cache(struct file *file, struct dir_context *ctx,
struct dentry *child, *dir = file->f_path.dentry;
struct qstr qname = QSTR_INIT(name, len);
struct inode *inode;
- unsigned type;
- ino_t ino;
+ unsigned type = DT_UNKNOWN;
+ ino_t ino = 1;
child = d_hash_and_lookup(dir, &qname);
if (!child) {
@@ -1890,22 +1892,23 @@ bool proc_fill_cache(struct file *file, struct dir_context *ctx,
if (IS_ERR(child))
goto end_instantiate;
if (d_in_lookup(child)) {
- int err = instantiate(d_inode(dir), child, task, ptr);
+ struct dentry *res;
+ res = instantiate(child, task, ptr);
d_lookup_done(child);
- if (err < 0) {
- dput(child);
+ if (IS_ERR(res))
goto end_instantiate;
+ if (unlikely(res)) {
+ dput(child);
+ child = res;
}
}
}
inode = d_inode(child);
ino = inode->i_ino;
type = inode->i_mode >> 12;
+end_instantiate:
dput(child);
return dir_emit(ctx, name, len, ino, type);
-
-end_instantiate:
- return dir_emit(ctx, name, len, 1, DT_UNKNOWN);
}
/*
@@ -2067,19 +2070,19 @@ static const struct inode_operations proc_map_files_link_inode_operations = {
.setattr = proc_setattr,
};
-static int
-proc_map_files_instantiate(struct inode *dir, struct dentry *dentry,
+static struct dentry *
+proc_map_files_instantiate(struct dentry *dentry,
struct task_struct *task, const void *ptr)
{
fmode_t mode = (fmode_t)(unsigned long)ptr;
struct proc_inode *ei;
struct inode *inode;
- inode = proc_pid_make_inode(dir->i_sb, task, S_IFLNK |
+ inode = proc_pid_make_inode(dentry->d_sb, task, S_IFLNK |
((mode & FMODE_READ ) ? S_IRUSR : 0) |
((mode & FMODE_WRITE) ? S_IWUSR : 0));
if (!inode)
- return -ENOENT;
+ return ERR_PTR(-ENOENT);
ei = PROC_I(inode);
ei->op.proc_get_link = map_files_get_link;
@@ -2088,9 +2091,7 @@ proc_map_files_instantiate(struct inode *dir, struct dentry *dentry,
inode->i_size = 64;
d_set_d_op(dentry, &tid_map_files_dentry_operations);
- d_add(dentry, inode);
-
- return 0;
+ return d_splice_alias(inode, dentry);
}
static struct dentry *proc_map_files_lookup(struct inode *dir,
@@ -2099,19 +2100,19 @@ static struct dentry *proc_map_files_lookup(struct inode *dir,
unsigned long vm_start, vm_end;
struct vm_area_struct *vma;
struct task_struct *task;
- int result;
+ struct dentry *result;
struct mm_struct *mm;
- result = -ENOENT;
+ result = ERR_PTR(-ENOENT);
task = get_proc_task(dir);
if (!task)
goto out;
- result = -EACCES;
+ result = ERR_PTR(-EACCES);
if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS))
goto out_put_task;
- result = -ENOENT;
+ result = ERR_PTR(-ENOENT);
if (dname_to_vma_addr(dentry, &vm_start, &vm_end))
goto out_put_task;
@@ -2125,7 +2126,7 @@ static struct dentry *proc_map_files_lookup(struct inode *dir,
goto out_no_vma;
if (vma->vm_file)
- result = proc_map_files_instantiate(dir, dentry, task,
+ result = proc_map_files_instantiate(dentry, task,
(void *)(unsigned long)vma->vm_file->f_mode);
out_no_vma:
@@ -2134,7 +2135,7 @@ out_no_vma:
out_put_task:
put_task_struct(task);
out:
- return ERR_PTR(result);
+ return result;
}
static const struct inode_operations proc_map_files_inode_operations = {
@@ -2337,7 +2338,7 @@ static int proc_timers_open(struct inode *inode, struct file *file)
return -ENOMEM;
tp->pid = proc_pid(inode);
- tp->ns = inode->i_sb->s_fs_info;
+ tp->ns = proc_pid_ns(inode);
return 0;
}
@@ -2435,16 +2436,16 @@ static const struct file_operations proc_pid_set_timerslack_ns_operations = {
.release = single_release,
};
-static int proc_pident_instantiate(struct inode *dir,
- struct dentry *dentry, struct task_struct *task, const void *ptr)
+static struct dentry *proc_pident_instantiate(struct dentry *dentry,
+ struct task_struct *task, const void *ptr)
{
const struct pid_entry *p = ptr;
struct inode *inode;
struct proc_inode *ei;
- inode = proc_pid_make_inode(dir->i_sb, task, p->mode);
+ inode = proc_pid_make_inode(dentry->d_sb, task, p->mode);
if (!inode)
- goto out;
+ return ERR_PTR(-ENOENT);
ei = PROC_I(inode);
if (S_ISDIR(inode->i_mode))
@@ -2454,13 +2455,9 @@ static int proc_pident_instantiate(struct inode *dir,
if (p->fop)
inode->i_fop = p->fop;
ei->op = p->op;
+ pid_update_inode(task, inode);
d_set_d_op(dentry, &pid_dentry_operations);
- d_add(dentry, inode);
- /* Close the race of the process dying before we return the dentry */
- if (pid_revalidate(dentry, 0))
- return 0;
-out:
- return -ENOENT;
+ return d_splice_alias(inode, dentry);
}
static struct dentry *proc_pident_lookup(struct inode *dir,
@@ -2468,11 +2465,9 @@ static struct dentry *proc_pident_lookup(struct inode *dir,
const struct pid_entry *ents,
unsigned int nents)
{
- int error;
struct task_struct *task = get_proc_task(dir);
const struct pid_entry *p, *last;
-
- error = -ENOENT;
+ struct dentry *res = ERR_PTR(-ENOENT);
if (!task)
goto out_no_task;
@@ -2491,11 +2486,11 @@ static struct dentry *proc_pident_lookup(struct inode *dir,
if (p >= last)
goto out;
- error = proc_pident_instantiate(dir, dentry, task, p);
+ res = proc_pident_instantiate(dentry, task, p);
out:
put_task_struct(task);
out_no_task:
- return ERR_PTR(error);
+ return res;
}
static int proc_pident_readdir(struct file *file, struct dir_context *ctx,
@@ -3138,38 +3133,32 @@ void proc_flush_task(struct task_struct *task)
}
}
-static int proc_pid_instantiate(struct inode *dir,
- struct dentry * dentry,
+static struct dentry *proc_pid_instantiate(struct dentry * dentry,
struct task_struct *task, const void *ptr)
{
struct inode *inode;
- inode = proc_pid_make_inode(dir->i_sb, task, S_IFDIR | S_IRUGO | S_IXUGO);
+ inode = proc_pid_make_inode(dentry->d_sb, task, S_IFDIR | S_IRUGO | S_IXUGO);
if (!inode)
- goto out;
+ return ERR_PTR(-ENOENT);
inode->i_op = &proc_tgid_base_inode_operations;
inode->i_fop = &proc_tgid_base_operations;
inode->i_flags|=S_IMMUTABLE;
set_nlink(inode, nlink_tgid);
+ pid_update_inode(task, inode);
d_set_d_op(dentry, &pid_dentry_operations);
-
- d_add(dentry, inode);
- /* Close the race of the process dying before we return the dentry */
- if (pid_revalidate(dentry, 0))
- return 0;
-out:
- return -ENOENT;
+ return d_splice_alias(inode, dentry);
}
struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, unsigned int flags)
{
- int result = -ENOENT;
struct task_struct *task;
unsigned tgid;
struct pid_namespace *ns;
+ struct dentry *result = ERR_PTR(-ENOENT);
tgid = name_to_int(&dentry->d_name);
if (tgid == ~0U)
@@ -3184,10 +3173,10 @@ struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, unsign
if (!task)
goto out;
- result = proc_pid_instantiate(dir, dentry, task, NULL);
+ result = proc_pid_instantiate(dentry, task, NULL);
put_task_struct(task);
out:
- return ERR_PTR(result);
+ return result;
}
/*
@@ -3239,7 +3228,7 @@ retry:
int proc_pid_readdir(struct file *file, struct dir_context *ctx)
{
struct tgid_iter iter;
- struct pid_namespace *ns = file_inode(file)->i_sb->s_fs_info;
+ struct pid_namespace *ns = proc_pid_ns(file_inode(file));
loff_t pos = ctx->pos;
if (pos >= PID_MAX_LIMIT + TGID_OFFSET)
@@ -3435,37 +3424,32 @@ static const struct inode_operations proc_tid_base_inode_operations = {
.setattr = proc_setattr,
};
-static int proc_task_instantiate(struct inode *dir,
- struct dentry *dentry, struct task_struct *task, const void *ptr)
+static struct dentry *proc_task_instantiate(struct dentry *dentry,
+ struct task_struct *task, const void *ptr)
{
struct inode *inode;
- inode = proc_pid_make_inode(dir->i_sb, task, S_IFDIR | S_IRUGO | S_IXUGO);
-
+ inode = proc_pid_make_inode(dentry->d_sb, task, S_IFDIR | S_IRUGO | S_IXUGO);
if (!inode)
- goto out;
+ return ERR_PTR(-ENOENT);
+
inode->i_op = &proc_tid_base_inode_operations;
inode->i_fop = &proc_tid_base_operations;
- inode->i_flags|=S_IMMUTABLE;
+ inode->i_flags |= S_IMMUTABLE;
set_nlink(inode, nlink_tid);
+ pid_update_inode(task, inode);
d_set_d_op(dentry, &pid_dentry_operations);
-
- d_add(dentry, inode);
- /* Close the race of the process dying before we return the dentry */
- if (pid_revalidate(dentry, 0))
- return 0;
-out:
- return -ENOENT;
+ return d_splice_alias(inode, dentry);
}
static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry, unsigned int flags)
{
- int result = -ENOENT;
struct task_struct *task;
struct task_struct *leader = get_proc_task(dir);
unsigned tid;
struct pid_namespace *ns;
+ struct dentry *result = ERR_PTR(-ENOENT);
if (!leader)
goto out_no_task;
@@ -3485,13 +3469,13 @@ static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry
if (!same_thread_group(leader, task))
goto out_drop_task;
- result = proc_task_instantiate(dir, dentry, task, NULL);
+ result = proc_task_instantiate(dentry, task, NULL);
out_drop_task:
put_task_struct(task);
out:
put_task_struct(leader);
out_no_task:
- return ERR_PTR(result);
+ return result;
}
/*
@@ -3588,7 +3572,7 @@ static int proc_task_readdir(struct file *file, struct dir_context *ctx)
/* f_version caches the tgid value that the last readdir call couldn't
* return. lseek aka telldir automagically resets f_version to 0.
*/
- ns = inode->i_sb->s_fs_info;
+ ns = proc_pid_ns(inode);
tid = (int)file->f_version;
file->f_version = 0;
for (task = first_tid(proc_pid(inode), tid, ctx->pos - 2, ns);
diff --git a/fs/proc/cmdline.c b/fs/proc/cmdline.c
index 8233e7af9389..fa762c5fbcb2 100644
--- a/fs/proc/cmdline.c
+++ b/fs/proc/cmdline.c
@@ -11,21 +11,9 @@ static int cmdline_proc_show(struct seq_file *m, void *v)
return 0;
}
-static int cmdline_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, cmdline_proc_show, NULL);
-}
-
-static const struct file_operations cmdline_proc_fops = {
- .open = cmdline_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
static int __init proc_cmdline_init(void)
{
- proc_create("cmdline", 0, NULL, &cmdline_proc_fops);
+ proc_create_single("cmdline", 0, NULL, cmdline_proc_show);
return 0;
}
fs_initcall(proc_cmdline_init);
diff --git a/fs/proc/consoles.c b/fs/proc/consoles.c
index a8ac48aebd59..954caf0b7fee 100644
--- a/fs/proc/consoles.c
+++ b/fs/proc/consoles.c
@@ -91,21 +91,9 @@ static const struct seq_operations consoles_op = {
.show = show_console_dev
};
-static int consoles_open(struct inode *inode, struct file *file)
-{
- return seq_open(file, &consoles_op);
-}
-
-static const struct file_operations proc_consoles_operations = {
- .open = consoles_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
static int __init proc_consoles_init(void)
{
- proc_create("consoles", 0, NULL, &proc_consoles_operations);
+ proc_create_seq("consoles", 0, NULL, &consoles_op);
return 0;
}
fs_initcall(proc_consoles_init);
diff --git a/fs/proc/devices.c b/fs/proc/devices.c
index 2c7f22b14489..37d38697eaf8 100644
--- a/fs/proc/devices.c
+++ b/fs/proc/devices.c
@@ -51,21 +51,9 @@ static const struct seq_operations devinfo_ops = {
.show = devinfo_show
};
-static int devinfo_open(struct inode *inode, struct file *filp)
-{
- return seq_open(filp, &devinfo_ops);
-}
-
-static const struct file_operations proc_devinfo_operations = {
- .open = devinfo_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
static int __init proc_devices_init(void)
{
- proc_create("devices", 0, NULL, &proc_devinfo_operations);
+ proc_create_seq("devices", 0, NULL, &devinfo_ops);
return 0;
}
fs_initcall(proc_devices_init);
diff --git a/fs/proc/fd.c b/fs/proc/fd.c
index 6b80cd1e419a..05b9893e9a22 100644
--- a/fs/proc/fd.c
+++ b/fs/proc/fd.c
@@ -81,9 +81,41 @@ static const struct file_operations proc_fdinfo_file_operations = {
.release = single_release,
};
+static bool tid_fd_mode(struct task_struct *task, unsigned fd, fmode_t *mode)
+{
+ struct files_struct *files = get_files_struct(task);
+ struct file *file;
+
+ if (!files)
+ return false;
+
+ rcu_read_lock();
+ file = fcheck_files(files, fd);
+ if (file)
+ *mode = file->f_mode;
+ rcu_read_unlock();
+ put_files_struct(files);
+ return !!file;
+}
+
+static void tid_fd_update_inode(struct task_struct *task, struct inode *inode,
+ fmode_t f_mode)
+{
+ task_dump_owner(task, 0, &inode->i_uid, &inode->i_gid);
+
+ if (S_ISLNK(inode->i_mode)) {
+ unsigned i_mode = S_IFLNK;
+ if (f_mode & FMODE_READ)
+ i_mode |= S_IRUSR | S_IXUSR;
+ if (f_mode & FMODE_WRITE)
+ i_mode |= S_IWUSR | S_IXUSR;
+ inode->i_mode = i_mode;
+ }
+ security_task_to_inode(task, inode);
+}
+
static int tid_fd_revalidate(struct dentry *dentry, unsigned int flags)
{
- struct files_struct *files;
struct task_struct *task;
struct inode *inode;
unsigned int fd;
@@ -96,35 +128,11 @@ static int tid_fd_revalidate(struct dentry *dentry, unsigned int flags)
fd = proc_fd(inode);
if (task) {
- files = get_files_struct(task);
- if (files) {
- struct file *file;
-
- rcu_read_lock();
- file = fcheck_files(files, fd);
- if (file) {
- unsigned f_mode = file->f_mode;
-
- rcu_read_unlock();
- put_files_struct(files);
-
- task_dump_owner(task, 0, &inode->i_uid, &inode->i_gid);
-
- if (S_ISLNK(inode->i_mode)) {
- unsigned i_mode = S_IFLNK;
- if (f_mode & FMODE_READ)
- i_mode |= S_IRUSR | S_IXUSR;
- if (f_mode & FMODE_WRITE)
- i_mode |= S_IWUSR | S_IXUSR;
- inode->i_mode = i_mode;
- }
-
- security_task_to_inode(task, inode);
- put_task_struct(task);
- return 1;
- }
- rcu_read_unlock();
- put_files_struct(files);
+ fmode_t f_mode;
+ if (tid_fd_mode(task, fd, &f_mode)) {
+ tid_fd_update_inode(task, inode, f_mode);
+ put_task_struct(task);
+ return 1;
}
put_task_struct(task);
}
@@ -166,34 +174,33 @@ static int proc_fd_link(struct dentry *dentry, struct path *path)
return ret;
}
-static int
-proc_fd_instantiate(struct inode *dir, struct dentry *dentry,
- struct task_struct *task, const void *ptr)
+struct fd_data {
+ fmode_t mode;
+ unsigned fd;
+};
+
+static struct dentry *proc_fd_instantiate(struct dentry *dentry,
+ struct task_struct *task, const void *ptr)
{
- unsigned fd = (unsigned long)ptr;
+ const struct fd_data *data = ptr;
struct proc_inode *ei;
struct inode *inode;
- inode = proc_pid_make_inode(dir->i_sb, task, S_IFLNK);
+ inode = proc_pid_make_inode(dentry->d_sb, task, S_IFLNK);
if (!inode)
- goto out;
+ return ERR_PTR(-ENOENT);
ei = PROC_I(inode);
- ei->fd = fd;
+ ei->fd = data->fd;
inode->i_op = &proc_pid_link_inode_operations;
inode->i_size = 64;
ei->op.proc_get_link = proc_fd_link;
+ tid_fd_update_inode(task, inode, data->mode);
d_set_d_op(dentry, &tid_fd_dentry_operations);
- d_add(dentry, inode);
-
- /* Close the race of the process dying before we return the dentry */
- if (tid_fd_revalidate(dentry, 0))
- return 0;
- out:
- return -ENOENT;
+ return d_splice_alias(inode, dentry);
}
static struct dentry *proc_lookupfd_common(struct inode *dir,
@@ -201,19 +208,21 @@ static struct dentry *proc_lookupfd_common(struct inode *dir,
instantiate_t instantiate)
{
struct task_struct *task = get_proc_task(dir);
- int result = -ENOENT;
- unsigned fd = name_to_int(&dentry->d_name);
+ struct fd_data data = {.fd = name_to_int(&dentry->d_name)};
+ struct dentry *result = ERR_PTR(-ENOENT);
if (!task)
goto out_no_task;
- if (fd == ~0U)
+ if (data.fd == ~0U)
+ goto out;
+ if (!tid_fd_mode(task, data.fd, &data.mode))
goto out;
- result = instantiate(dir, dentry, task, (void *)(unsigned long)fd);
+ result = instantiate(dentry, task, &data);
out:
put_task_struct(task);
out_no_task:
- return ERR_PTR(result);
+ return result;
}
static int proc_readfd_common(struct file *file, struct dir_context *ctx,
@@ -236,17 +245,22 @@ static int proc_readfd_common(struct file *file, struct dir_context *ctx,
for (fd = ctx->pos - 2;
fd < files_fdtable(files)->max_fds;
fd++, ctx->pos++) {
+ struct file *f;
+ struct fd_data data;
char name[10 + 1];
int len;
- if (!fcheck_files(files, fd))
+ f = fcheck_files(files, fd);
+ if (!f)
continue;
+ data.mode = f->f_mode;
rcu_read_unlock();
+ data.fd = fd;
len = snprintf(name, sizeof(name), "%u", fd);
if (!proc_fill_cache(file, ctx,
name, len, instantiate, p,
- (void *)(unsigned long)fd))
+ &data))
goto out_fd_loop;
cond_resched();
rcu_read_lock();
@@ -304,31 +318,25 @@ const struct inode_operations proc_fd_inode_operations = {
.setattr = proc_setattr,
};
-static int
-proc_fdinfo_instantiate(struct inode *dir, struct dentry *dentry,
- struct task_struct *task, const void *ptr)
+static struct dentry *proc_fdinfo_instantiate(struct dentry *dentry,
+ struct task_struct *task, const void *ptr)
{
- unsigned fd = (unsigned long)ptr;
+ const struct fd_data *data = ptr;
struct proc_inode *ei;
struct inode *inode;
- inode = proc_pid_make_inode(dir->i_sb, task, S_IFREG | S_IRUSR);
+ inode = proc_pid_make_inode(dentry->d_sb, task, S_IFREG | S_IRUSR);
if (!inode)
- goto out;
+ return ERR_PTR(-ENOENT);
ei = PROC_I(inode);
- ei->fd = fd;
+ ei->fd = data->fd;
inode->i_fop = &proc_fdinfo_file_operations;
+ tid_fd_update_inode(task, inode, 0);
d_set_d_op(dentry, &tid_fd_dentry_operations);
- d_add(dentry, inode);
-
- /* Close the race of the process dying before we return the dentry */
- if (tid_fd_revalidate(dentry, 0))
- return 0;
- out:
- return -ENOENT;
+ return d_splice_alias(inode, dentry);
}
static struct dentry *
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 2078e70e1595..7b4d9714f248 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -25,6 +25,7 @@
#include <linux/spinlock.h>
#include <linux/completion.h>
#include <linux/uaccess.h>
+#include <linux/seq_file.h>
#include "internal.h"
@@ -256,8 +257,7 @@ struct dentry *proc_lookup_de(struct inode *dir, struct dentry *dentry,
if (!inode)
return ERR_PTR(-ENOMEM);
d_set_d_op(dentry, &proc_misc_dentry_ops);
- d_add(dentry, inode);
- return NULL;
+ return d_splice_alias(inode, dentry);
}
read_unlock(&proc_subdir_lock);
return ERR_PTR(-ENOENT);
@@ -346,13 +346,12 @@ static const struct inode_operations proc_dir_inode_operations = {
.setattr = proc_notify_change,
};
-static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp)
+/* returns the registered entry, or frees dp and returns NULL on failure */
+struct proc_dir_entry *proc_register(struct proc_dir_entry *dir,
+ struct proc_dir_entry *dp)
{
- int ret;
-
- ret = proc_alloc_inum(&dp->low_ino);
- if (ret)
- return ret;
+ if (proc_alloc_inum(&dp->low_ino))
+ goto out_free_entry;
write_lock(&proc_subdir_lock);
dp->parent = dir;
@@ -360,12 +359,16 @@ static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp
WARN(1, "proc_dir_entry '%s/%s' already registered\n",
dir->name, dp->name);
write_unlock(&proc_subdir_lock);
- proc_free_inum(dp->low_ino);
- return -EEXIST;
+ goto out_free_inum;
}
write_unlock(&proc_subdir_lock);
- return 0;
+ return dp;
+out_free_inum:
+ proc_free_inum(dp->low_ino);
+out_free_entry:
+ pde_free(dp);
+ return NULL;
}
static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent,
@@ -443,10 +446,7 @@ struct proc_dir_entry *proc_symlink(const char *name,
if (ent->data) {
strcpy((char*)ent->data,dest);
ent->proc_iops = &proc_link_inode_operations;
- if (proc_register(parent, ent) < 0) {
- pde_free(ent);
- ent = NULL;
- }
+ ent = proc_register(parent, ent);
} else {
pde_free(ent);
ent = NULL;
@@ -470,11 +470,9 @@ struct proc_dir_entry *proc_mkdir_data(const char *name, umode_t mode,
ent->proc_fops = &proc_dir_operations;
ent->proc_iops = &proc_dir_inode_operations;
parent->nlink++;
- if (proc_register(parent, ent) < 0) {
- pde_free(ent);
+ ent = proc_register(parent, ent);
+ if (!ent)
parent->nlink--;
- ent = NULL;
- }
}
return ent;
}
@@ -505,47 +503,47 @@ struct proc_dir_entry *proc_create_mount_point(const char *name)
ent->proc_fops = NULL;
ent->proc_iops = NULL;
parent->nlink++;
- if (proc_register(parent, ent) < 0) {
- pde_free(ent);
+ ent = proc_register(parent, ent);
+ if (!ent)
parent->nlink--;
- ent = NULL;
- }
}
return ent;
}
EXPORT_SYMBOL(proc_create_mount_point);
-struct proc_dir_entry *proc_create_data(const char *name, umode_t mode,
- struct proc_dir_entry *parent,
- const struct file_operations *proc_fops,
- void *data)
+struct proc_dir_entry *proc_create_reg(const char *name, umode_t mode,
+ struct proc_dir_entry **parent, void *data)
{
- struct proc_dir_entry *pde;
+ struct proc_dir_entry *p;
+
if ((mode & S_IFMT) == 0)
mode |= S_IFREG;
-
- if (!S_ISREG(mode)) {
- WARN_ON(1); /* use proc_mkdir() */
+ if ((mode & S_IALLUGO) == 0)
+ mode |= S_IRUGO;
+ if (WARN_ON_ONCE(!S_ISREG(mode)))
return NULL;
+
+ p = __proc_create(parent, name, mode, 1);
+ if (p) {
+ p->proc_iops = &proc_file_inode_operations;
+ p->data = data;
}
+ return p;
+}
+
+struct proc_dir_entry *proc_create_data(const char *name, umode_t mode,
+ struct proc_dir_entry *parent,
+ const struct file_operations *proc_fops, void *data)
+{
+ struct proc_dir_entry *p;
BUG_ON(proc_fops == NULL);
- if ((mode & S_IALLUGO) == 0)
- mode |= S_IRUGO;
- pde = __proc_create(&parent, name, mode, 1);
- if (!pde)
- goto out;
- pde->proc_fops = proc_fops;
- pde->data = data;
- pde->proc_iops = &proc_file_inode_operations;
- if (proc_register(parent, pde) < 0)
- goto out_free;
- return pde;
-out_free:
- pde_free(pde);
-out:
- return NULL;
+ p = proc_create_reg(name, mode, &parent, data);
+ if (!p)
+ return NULL;
+ p->proc_fops = proc_fops;
+ return proc_register(parent, p);
}
EXPORT_SYMBOL(proc_create_data);
@@ -557,6 +555,67 @@ struct proc_dir_entry *proc_create(const char *name, umode_t mode,
}
EXPORT_SYMBOL(proc_create);
+static int proc_seq_open(struct inode *inode, struct file *file)
+{
+ struct proc_dir_entry *de = PDE(inode);
+
+ if (de->state_size)
+ return seq_open_private(file, de->seq_ops, de->state_size);
+ return seq_open(file, de->seq_ops);
+}
+
+static const struct file_operations proc_seq_fops = {
+ .open = proc_seq_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release,
+};
+
+struct proc_dir_entry *proc_create_seq_private(const char *name, umode_t mode,
+ struct proc_dir_entry *parent, const struct seq_operations *ops,
+ unsigned int state_size, void *data)
+{
+ struct proc_dir_entry *p;
+
+ p = proc_create_reg(name, mode, &parent, data);
+ if (!p)
+ return NULL;
+ p->proc_fops = &proc_seq_fops;
+ p->seq_ops = ops;
+ p->state_size = state_size;
+ return proc_register(parent, p);
+}
+EXPORT_SYMBOL(proc_create_seq_private);
+
+static int proc_single_open(struct inode *inode, struct file *file)
+{
+ struct proc_dir_entry *de = PDE(inode);
+
+ return single_open(file, de->single_show, de->data);
+}
+
+static const struct file_operations proc_single_fops = {
+ .open = proc_single_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+struct proc_dir_entry *proc_create_single_data(const char *name, umode_t mode,
+ struct proc_dir_entry *parent,
+ int (*show)(struct seq_file *, void *), void *data)
+{
+ struct proc_dir_entry *p;
+
+ p = proc_create_reg(name, mode, &parent, data);
+ if (!p)
+ return NULL;
+ p->proc_fops = &proc_single_fops;
+ p->single_show = show;
+ return proc_register(parent, p);
+}
+EXPORT_SYMBOL(proc_create_single_data);
+
void proc_set_size(struct proc_dir_entry *de, loff_t size)
{
de->size = size;
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 0f1692e63cb6..43c70c9e6b62 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -44,7 +44,12 @@ struct proc_dir_entry {
struct completion *pde_unload_completion;
const struct inode_operations *proc_iops;
const struct file_operations *proc_fops;
+ union {
+ const struct seq_operations *seq_ops;
+ int (*single_show)(struct seq_file *, void *);
+ };
void *data;
+ unsigned int state_size;
unsigned int low_ino;
nlink_t nlink;
kuid_t uid;
@@ -57,9 +62,9 @@ struct proc_dir_entry {
umode_t mode;
u8 namelen;
#ifdef CONFIG_64BIT
-#define SIZEOF_PDE_INLINE_NAME (192-139)
+#define SIZEOF_PDE_INLINE_NAME (192-155)
#else
-#define SIZEOF_PDE_INLINE_NAME (128-87)
+#define SIZEOF_PDE_INLINE_NAME (128-95)
#endif
char inline_name[SIZEOF_PDE_INLINE_NAME];
} __randomize_layout;
@@ -147,14 +152,14 @@ extern const struct dentry_operations pid_dentry_operations;
extern int pid_getattr(const struct path *, struct kstat *, u32, unsigned int);
extern int proc_setattr(struct dentry *, struct iattr *);
extern struct inode *proc_pid_make_inode(struct super_block *, struct task_struct *, umode_t);
-extern int pid_revalidate(struct dentry *, unsigned int);
+extern void pid_update_inode(struct task_struct *, struct inode *);
extern int pid_delete_dentry(const struct dentry *);
extern int proc_pid_readdir(struct file *, struct dir_context *);
extern struct dentry *proc_pid_lookup(struct inode *, struct dentry *, unsigned int);
extern loff_t mem_lseek(struct file *, loff_t, int);
/* Lookups */
-typedef int instantiate_t(struct inode *, struct dentry *,
+typedef struct dentry *instantiate_t(struct dentry *,
struct task_struct *, const void *);
extern bool proc_fill_cache(struct file *, struct dir_context *, const char *, int,
instantiate_t, struct task_struct *, const void *);
@@ -162,6 +167,10 @@ extern bool proc_fill_cache(struct file *, struct dir_context *, const char *, i
/*
* generic.c
*/
+struct proc_dir_entry *proc_create_reg(const char *name, umode_t mode,
+ struct proc_dir_entry **parent, void *data);
+struct proc_dir_entry *proc_register(struct proc_dir_entry *dir,
+ struct proc_dir_entry *dp);
extern struct dentry *proc_lookup(struct inode *, struct dentry *, unsigned int);
struct dentry *proc_lookup_de(struct inode *, struct dentry *, struct proc_dir_entry *);
extern int proc_readdir(struct file *, struct dir_context *);
diff --git a/fs/proc/interrupts.c b/fs/proc/interrupts.c
index 6a6bee9c603c..cb0edc7cbf09 100644
--- a/fs/proc/interrupts.c
+++ b/fs/proc/interrupts.c
@@ -34,21 +34,9 @@ static const struct seq_operations int_seq_ops = {
.show = show_interrupts
};
-static int interrupts_open(struct inode *inode, struct file *filp)
-{
- return seq_open(filp, &int_seq_ops);
-}
-
-static const struct file_operations proc_interrupts_operations = {
- .open = interrupts_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
static int __init proc_interrupts_init(void)
{
- proc_create("interrupts", 0, NULL, &proc_interrupts_operations);
+ proc_create_seq("interrupts", 0, NULL, &int_seq_ops);
return 0;
}
fs_initcall(proc_interrupts_init);
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index d1e82761de81..e64ecb9f2720 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -209,25 +209,34 @@ kclist_add_private(unsigned long pfn, unsigned long nr_pages, void *arg)
{
struct list_head *head = (struct list_head *)arg;
struct kcore_list *ent;
+ struct page *p;
+
+ if (!pfn_valid(pfn))
+ return 1;
+
+ p = pfn_to_page(pfn);
+ if (!memmap_valid_within(pfn, p, page_zone(p)))
+ return 1;
ent = kmalloc(sizeof(*ent), GFP_KERNEL);
if (!ent)
return -ENOMEM;
- ent->addr = (unsigned long)__va((pfn << PAGE_SHIFT));
+ ent->addr = (unsigned long)page_to_virt(p);
ent->size = nr_pages << PAGE_SHIFT;
- /* Sanity check: Can happen in 32bit arch...maybe */
- if (ent->addr < (unsigned long) __va(0))
+ if (!virt_addr_valid(ent->addr))
goto free_out;
/* cut not-mapped area. ....from ppc-32 code. */
if (ULONG_MAX - ent->addr < ent->size)
ent->size = ULONG_MAX - ent->addr;
- /* cut when vmalloc() area is higher than direct-map area */
- if (VMALLOC_START > (unsigned long)__va(0)) {
- if (ent->addr > VMALLOC_START)
- goto free_out;
+ /*
+ * We've already checked virt_addr_valid so we know this address
+ * is a valid pointer, therefore we can check against it to determine
+ * if we need to trim
+ */
+ if (VMALLOC_START > ent->addr) {
if (VMALLOC_START - ent->addr < ent->size)
ent->size = VMALLOC_START - ent->addr;
}
diff --git a/fs/proc/loadavg.c b/fs/proc/loadavg.c
index b572cc865b92..d06694757201 100644
--- a/fs/proc/loadavg.c
+++ b/fs/proc/loadavg.c
@@ -28,21 +28,9 @@ static int loadavg_proc_show(struct seq_file *m, void *v)
return 0;
}
-static int loadavg_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, loadavg_proc_show, NULL);
-}
-
-static const struct file_operations loadavg_proc_fops = {
- .open = loadavg_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
static int __init proc_loadavg_init(void)
{
- proc_create("loadavg", 0, NULL, &loadavg_proc_fops);
+ proc_create_single("loadavg", 0, NULL, loadavg_proc_show);
return 0;
}
fs_initcall(proc_loadavg_init);
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index 65a72ab57471..2fb04846ed11 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -149,21 +149,9 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
return 0;
}
-static int meminfo_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, meminfo_proc_show, NULL);
-}
-
-static const struct file_operations meminfo_proc_fops = {
- .open = meminfo_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
static int __init proc_meminfo_init(void)
{
- proc_create("meminfo", 0, NULL, &meminfo_proc_fops);
+ proc_create_single("meminfo", 0, NULL, meminfo_proc_show);
return 0;
}
fs_initcall(proc_meminfo_init);
diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c
index 59b17e509f46..dd2b35f78b09 100644
--- a/fs/proc/namespaces.c
+++ b/fs/proc/namespaces.c
@@ -87,28 +87,24 @@ static const struct inode_operations proc_ns_link_inode_operations = {
.setattr = proc_setattr,
};
-static int proc_ns_instantiate(struct inode *dir,
- struct dentry *dentry, struct task_struct *task, const void *ptr)
+static struct dentry *proc_ns_instantiate(struct dentry *dentry,
+ struct task_struct *task, const void *ptr)
{
const struct proc_ns_operations *ns_ops = ptr;
struct inode *inode;
struct proc_inode *ei;
- inode = proc_pid_make_inode(dir->i_sb, task, S_IFLNK | S_IRWXUGO);
+ inode = proc_pid_make_inode(dentry->d_sb, task, S_IFLNK | S_IRWXUGO);
if (!inode)
- goto out;
+ return ERR_PTR(-ENOENT);
ei = PROC_I(inode);
inode->i_op = &proc_ns_link_inode_operations;
ei->ns_ops = ns_ops;
+ pid_update_inode(task, inode);
d_set_d_op(dentry, &pid_dentry_operations);
- d_add(dentry, inode);
- /* Close the race of the process dying before we return the dentry */
- if (pid_revalidate(dentry, 0))
- return 0;
-out:
- return -ENOENT;
+ return d_splice_alias(inode, dentry);
}
static int proc_ns_dir_readdir(struct file *file, struct dir_context *ctx)
@@ -147,12 +143,10 @@ const struct file_operations proc_ns_dir_operations = {
static struct dentry *proc_ns_dir_lookup(struct inode *dir,
struct dentry *dentry, unsigned int flags)
{
- int error;
struct task_struct *task = get_proc_task(dir);
const struct proc_ns_operations **entry, **last;
unsigned int len = dentry->d_name.len;
-
- error = -ENOENT;
+ struct dentry *res = ERR_PTR(-ENOENT);
if (!task)
goto out_no_task;
@@ -167,11 +161,11 @@ static struct dentry *proc_ns_dir_lookup(struct inode *dir,
if (entry == last)
goto out;
- error = proc_ns_instantiate(dir, dentry, task, *entry);
+ res = proc_ns_instantiate(dentry, task, *entry);
out:
put_task_struct(task);
out_no_task:
- return ERR_PTR(error);
+ return res;
}
const struct inode_operations proc_ns_dir_inode_operations = {
diff --git a/fs/proc/nommu.c b/fs/proc/nommu.c
index 75634379f82e..3b63be64e436 100644
--- a/fs/proc/nommu.c
+++ b/fs/proc/nommu.c
@@ -113,21 +113,9 @@ static const struct seq_operations proc_nommu_region_list_seqop = {
.show = nommu_region_list_show
};
-static int proc_nommu_region_list_open(struct inode *inode, struct file *file)
-{
- return seq_open(file, &proc_nommu_region_list_seqop);
-}
-
-static const struct file_operations proc_nommu_region_list_operations = {
- .open = proc_nommu_region_list_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
static int __init proc_nommu_init(void)
{
- proc_create("maps", S_IRUGO, NULL, &proc_nommu_region_list_operations);
+ proc_create_seq("maps", S_IRUGO, NULL, &proc_nommu_region_list_seqop);
return 0;
}
diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c
index 1763f370489d..7d94fa005b0d 100644
--- a/fs/proc/proc_net.c
+++ b/fs/proc/proc_net.c
@@ -38,20 +38,20 @@ static struct net *get_proc_net(const struct inode *inode)
return maybe_get_net(PDE_NET(PDE(inode)));
}
-int seq_open_net(struct inode *ino, struct file *f,
- const struct seq_operations *ops, int size)
+static int seq_open_net(struct inode *inode, struct file *file)
{
- struct net *net;
+ unsigned int state_size = PDE(inode)->state_size;
struct seq_net_private *p;
+ struct net *net;
- BUG_ON(size < sizeof(*p));
+ WARN_ON_ONCE(state_size < sizeof(*p));
- net = get_proc_net(ino);
- if (net == NULL)
+ net = get_proc_net(inode);
+ if (!net)
return -ENXIO;
- p = __seq_open_private(f, ops, size);
- if (p == NULL) {
+ p = __seq_open_private(file, PDE(inode)->seq_ops, state_size);
+ if (!p) {
put_net(net);
return -ENOMEM;
}
@@ -60,51 +60,83 @@ int seq_open_net(struct inode *ino, struct file *f,
#endif
return 0;
}
-EXPORT_SYMBOL_GPL(seq_open_net);
-int single_open_net(struct inode *inode, struct file *file,
- int (*show)(struct seq_file *, void *))
+static int seq_release_net(struct inode *ino, struct file *f)
{
- int err;
- struct net *net;
-
- err = -ENXIO;
- net = get_proc_net(inode);
- if (net == NULL)
- goto err_net;
-
- err = single_open(file, show, net);
- if (err < 0)
- goto err_open;
+ struct seq_file *seq = f->private_data;
+ put_net(seq_file_net(seq));
+ seq_release_private(ino, f);
return 0;
+}
-err_open:
- put_net(net);
-err_net:
- return err;
+static const struct file_operations proc_net_seq_fops = {
+ .open = seq_open_net,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release_net,
+};
+
+struct proc_dir_entry *proc_create_net_data(const char *name, umode_t mode,
+ struct proc_dir_entry *parent, const struct seq_operations *ops,
+ unsigned int state_size, void *data)
+{
+ struct proc_dir_entry *p;
+
+ p = proc_create_reg(name, mode, &parent, data);
+ if (!p)
+ return NULL;
+ p->proc_fops = &proc_net_seq_fops;
+ p->seq_ops = ops;
+ p->state_size = state_size;
+ return proc_register(parent, p);
}
-EXPORT_SYMBOL_GPL(single_open_net);
+EXPORT_SYMBOL_GPL(proc_create_net_data);
-int seq_release_net(struct inode *ino, struct file *f)
+static int single_open_net(struct inode *inode, struct file *file)
{
- struct seq_file *seq;
+ struct proc_dir_entry *de = PDE(inode);
+ struct net *net;
+ int err;
- seq = f->private_data;
+ net = get_proc_net(inode);
+ if (!net)
+ return -ENXIO;
- put_net(seq_file_net(seq));
- seq_release_private(ino, f);
- return 0;
+ err = single_open(file, de->single_show, net);
+ if (err)
+ put_net(net);
+ return err;
}
-EXPORT_SYMBOL_GPL(seq_release_net);
-int single_release_net(struct inode *ino, struct file *f)
+static int single_release_net(struct inode *ino, struct file *f)
{
struct seq_file *seq = f->private_data;
put_net(seq->private);
return single_release(ino, f);
}
-EXPORT_SYMBOL_GPL(single_release_net);
+
+static const struct file_operations proc_net_single_fops = {
+ .open = single_open_net,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release_net,
+};
+
+struct proc_dir_entry *proc_create_net_single(const char *name, umode_t mode,
+ struct proc_dir_entry *parent,
+ int (*show)(struct seq_file *, void *), void *data)
+{
+ struct proc_dir_entry *p;
+
+ p = proc_create_reg(name, mode, &parent, data);
+ if (!p)
+ return NULL;
+ p->proc_fops = &proc_net_single_fops;
+ p->single_show = show;
+ return proc_register(parent, p);
+}
+EXPORT_SYMBOL_GPL(proc_create_net_single);
static struct net *get_proc_task_net(struct inode *dir)
{
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 8989936f2995..4d765e5e91ed 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -554,9 +554,8 @@ static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry,
if (!inode)
goto out;
- err = NULL;
d_set_d_op(dentry, &proc_sys_dentry_operations);
- d_add(dentry, inode);
+ err = d_splice_alias(inode, dentry);
out:
if (h)
@@ -684,6 +683,7 @@ static bool proc_sys_fill_cache(struct file *file,
if (IS_ERR(child))
return false;
if (d_in_lookup(child)) {
+ struct dentry *res;
inode = proc_sys_make_inode(dir->d_sb, head, table);
if (!inode) {
d_lookup_done(child);
@@ -691,7 +691,16 @@ static bool proc_sys_fill_cache(struct file *file,
return false;
}
d_set_d_op(child, &proc_sys_dentry_operations);
- d_add(child, inode);
+ res = d_splice_alias(inode, child);
+ d_lookup_done(child);
+ if (unlikely(res)) {
+ if (IS_ERR(res)) {
+ dput(child);
+ return false;
+ }
+ dput(child);
+ child = res;
+ }
}
}
inode = d_inode(child);
diff --git a/fs/proc/proc_tty.c b/fs/proc/proc_tty.c
index d0cf1c50bb6c..c69ff191e5d8 100644
--- a/fs/proc/proc_tty.c
+++ b/fs/proc/proc_tty.c
@@ -126,18 +126,6 @@ static const struct seq_operations tty_drivers_op = {
.show = show_tty_driver
};
-static int tty_drivers_open(struct inode *inode, struct file *file)
-{
- return seq_open(file, &tty_drivers_op);
-}
-
-static const struct file_operations proc_tty_drivers_operations = {
- .open = tty_drivers_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
/*
* This function is called by tty_register_driver() to handle
* registering the driver's /proc handler into /proc/tty/driver/<foo>
@@ -147,11 +135,11 @@ void proc_tty_register_driver(struct tty_driver *driver)
struct proc_dir_entry *ent;
if (!driver->driver_name || driver->proc_entry ||
- !driver->ops->proc_fops)
+ !driver->ops->proc_show)
return;
- ent = proc_create_data(driver->driver_name, 0, proc_tty_driver,
- driver->ops->proc_fops, driver);
+ ent = proc_create_single_data(driver->driver_name, 0, proc_tty_driver,
+ driver->ops->proc_show, driver);
driver->proc_entry = ent;
}
@@ -186,6 +174,6 @@ void __init proc_tty_init(void)
* entry.
*/
proc_tty_driver = proc_mkdir_mode("tty/driver", S_IRUSR|S_IXUSR, NULL);
- proc_create("tty/ldiscs", 0, NULL, &tty_ldiscs_proc_fops);
- proc_create("tty/drivers", 0, NULL, &proc_tty_drivers_operations);
+ proc_create_seq("tty/ldiscs", 0, NULL, &tty_ldiscs_seq_ops);
+ proc_create_seq("tty/drivers", 0, NULL, &tty_drivers_op);
}
diff --git a/fs/proc/self.c b/fs/proc/self.c
index 4d7d061696b3..127265e5c55f 100644
--- a/fs/proc/self.c
+++ b/fs/proc/self.c
@@ -12,7 +12,7 @@ static const char *proc_self_get_link(struct dentry *dentry,
struct inode *inode,
struct delayed_call *done)
{
- struct pid_namespace *ns = inode->i_sb->s_fs_info;
+ struct pid_namespace *ns = proc_pid_ns(inode);
pid_t tgid = task_tgid_nr_ns(current, ns);
char *name;
@@ -36,7 +36,7 @@ static unsigned self_inum __ro_after_init;
int proc_setup_self(struct super_block *s)
{
struct inode *root_inode = d_inode(s->s_root);
- struct pid_namespace *ns = s->s_fs_info;
+ struct pid_namespace *ns = proc_pid_ns(root_inode);
struct dentry *self;
inode_lock(root_inode);
diff --git a/fs/proc/softirqs.c b/fs/proc/softirqs.c
index 24072cc06e65..12901dcf57e2 100644
--- a/fs/proc/softirqs.c
+++ b/fs/proc/softirqs.c
@@ -25,21 +25,9 @@ static int show_softirqs(struct seq_file *p, void *v)
return 0;
}
-static int softirqs_open(struct inode *inode, struct file *file)
-{
- return single_open(file, show_softirqs, NULL);
-}
-
-static const struct file_operations proc_softirqs_operations = {
- .open = softirqs_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
static int __init proc_softirqs_init(void)
{
- proc_create("softirqs", 0, NULL, &proc_softirqs_operations);
+ proc_create_single("softirqs", 0, NULL, show_softirqs);
return 0;
}
fs_initcall(proc_softirqs_init);
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index c486ad4b43f0..a20c6e495bb2 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -937,7 +937,7 @@ static inline void clear_soft_dirty(struct vm_area_struct *vma,
/*
* The soft-dirty tracker uses #PF-s to catch writes
* to pages, so write-protect the pte as well. See the
- * Documentation/vm/soft-dirty.txt for full description
+ * Documentation/admin-guide/mm/soft-dirty.rst for full description
* of how soft-dirty works.
*/
pte_t ptent = *pte;
@@ -1421,7 +1421,7 @@ static int pagemap_hugetlb_range(pte_t *ptep, unsigned long hmask,
* Bits 0-54 page frame number (PFN) if present
* Bits 0-4 swap type if swapped
* Bits 5-54 swap offset if swapped
- * Bit 55 pte is soft-dirty (see Documentation/vm/soft-dirty.txt)
+ * Bit 55 pte is soft-dirty (see Documentation/admin-guide/mm/soft-dirty.rst)
* Bit 56 page exclusively mapped
* Bits 57-60 zero
* Bit 61 page is file-page or shared-anon
diff --git a/fs/proc/thread_self.c b/fs/proc/thread_self.c
index 9d2efaca499f..b905010ca9eb 100644
--- a/fs/proc/thread_self.c
+++ b/fs/proc/thread_self.c
@@ -12,7 +12,7 @@ static const char *proc_thread_self_get_link(struct dentry *dentry,
struct inode *inode,
struct delayed_call *done)
{
- struct pid_namespace *ns = inode->i_sb->s_fs_info;
+ struct pid_namespace *ns = proc_pid_ns(inode);
pid_t tgid = task_tgid_nr_ns(current, ns);
pid_t pid = task_pid_nr_ns(current, ns);
char *name;
@@ -36,7 +36,7 @@ static unsigned thread_self_inum __ro_after_init;
int proc_setup_thread_self(struct super_block *s)
{
struct inode *root_inode = d_inode(s->s_root);
- struct pid_namespace *ns = s->s_fs_info;
+ struct pid_namespace *ns = proc_pid_ns(root_inode);
struct dentry *thread_self;
inode_lock(root_inode);
diff --git a/fs/proc/uptime.c b/fs/proc/uptime.c
index 95a708d83721..3bd12f955867 100644
--- a/fs/proc/uptime.c
+++ b/fs/proc/uptime.c
@@ -30,21 +30,9 @@ static int uptime_proc_show(struct seq_file *m, void *v)
return 0;
}
-static int uptime_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, uptime_proc_show, NULL);
-}
-
-static const struct file_operations uptime_proc_fops = {
- .open = uptime_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
static int __init proc_uptime_init(void)
{
- proc_create("uptime", 0, NULL, &uptime_proc_fops);
+ proc_create_single("uptime", 0, NULL, uptime_proc_show);
return 0;
}
fs_initcall(proc_uptime_init);
diff --git a/fs/proc/version.c b/fs/proc/version.c
index 94901e8e700d..b449f186577f 100644
--- a/fs/proc/version.c
+++ b/fs/proc/version.c
@@ -15,21 +15,9 @@ static int version_proc_show(struct seq_file *m, void *v)
return 0;
}
-static int version_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, version_proc_show, NULL);
-}
-
-static const struct file_operations version_proc_fops = {
- .open = version_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
static int __init proc_version_init(void)
{
- proc_create("version", 0, NULL, &version_proc_fops);
+ proc_create_single("version", 0, NULL, version_proc_show);
return 0;
}
fs_initcall(proc_version_init);
diff --git a/fs/qnx4/namei.c b/fs/qnx4/namei.c
index eca27878079d..8d72221735d7 100644
--- a/fs/qnx4/namei.c
+++ b/fs/qnx4/namei.c
@@ -114,13 +114,9 @@ struct dentry * qnx4_lookup(struct inode *dir, struct dentry *dentry, unsigned i
brelse(bh);
foundinode = qnx4_iget(dir->i_sb, ino);
- if (IS_ERR(foundinode)) {
+ if (IS_ERR(foundinode))
QNX4DEBUG((KERN_ERR "qnx4: lookup->iget -> error %ld\n",
PTR_ERR(foundinode)));
- return ERR_CAST(foundinode);
- }
out:
- d_add(dentry, foundinode);
-
- return NULL;
+ return d_splice_alias(foundinode, dentry);
}
diff --git a/fs/qnx6/namei.c b/fs/qnx6/namei.c
index 72c2770830be..e2e98e653b8d 100644
--- a/fs/qnx6/namei.c
+++ b/fs/qnx6/namei.c
@@ -29,15 +29,11 @@ struct dentry *qnx6_lookup(struct inode *dir, struct dentry *dentry,
if (ino) {
foundinode = qnx6_iget(dir->i_sb, ino);
qnx6_put_page(page);
- if (IS_ERR(foundinode)) {
+ if (IS_ERR(foundinode))
pr_debug("lookup->iget -> error %ld\n",
PTR_ERR(foundinode));
- return ERR_CAST(foundinode);
- }
} else {
pr_debug("%s(): not found %s\n", __func__, name);
- return NULL;
}
- d_add(dentry, foundinode);
- return NULL;
+ return d_splice_alias(foundinode, dentry);
}
diff --git a/fs/read_write.c b/fs/read_write.c
index c4eabbfc90df..e83bd9744b5d 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -2023,7 +2023,7 @@ int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same)
ret = mnt_want_write_file(dst_file);
if (ret) {
info->status = ret;
- goto next_loop;
+ goto next_fdput;
}
dst_off = info->dest_offset;
@@ -2058,9 +2058,9 @@ int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same)
next_file:
mnt_drop_write_file(dst_file);
-next_loop:
+next_fdput:
fdput(dst_fd);
-
+next_loop:
if (fatal_signal_pending(current))
goto out;
}
diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c
index bd39a998843d..5089dac02660 100644
--- a/fs/reiserfs/namei.c
+++ b/fs/reiserfs/namei.c
@@ -687,8 +687,7 @@ static int reiserfs_create(struct inode *dir, struct dentry *dentry, umode_t mod
reiserfs_update_inode_transaction(inode);
reiserfs_update_inode_transaction(dir);
- unlock_new_inode(inode);
- d_instantiate(dentry, inode);
+ d_instantiate_new(dentry, inode);
retval = journal_end(&th);
out_failed:
@@ -771,8 +770,7 @@ static int reiserfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode
goto out_failed;
}
- unlock_new_inode(inode);
- d_instantiate(dentry, inode);
+ d_instantiate_new(dentry, inode);
retval = journal_end(&th);
out_failed:
@@ -871,8 +869,7 @@ static int reiserfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode
/* the above add_entry did not update dir's stat data */
reiserfs_update_sd(&th, dir);
- unlock_new_inode(inode);
- d_instantiate(dentry, inode);
+ d_instantiate_new(dentry, inode);
retval = journal_end(&th);
out_failed:
reiserfs_write_unlock(dir->i_sb);
@@ -1187,8 +1184,7 @@ static int reiserfs_symlink(struct inode *parent_dir,
goto out_failed;
}
- unlock_new_inode(inode);
- d_instantiate(dentry, inode);
+ d_instantiate_new(dentry, inode);
retval = journal_end(&th);
out_failed:
reiserfs_write_unlock(parent_dir->i_sb);
diff --git a/fs/reiserfs/procfs.c b/fs/reiserfs/procfs.c
index fe999157dd97..e39b3910d24d 100644
--- a/fs/reiserfs/procfs.c
+++ b/fs/reiserfs/procfs.c
@@ -389,27 +389,13 @@ static int show_journal(struct seq_file *m, void *unused)
return 0;
}
-static int r_open(struct inode *inode, struct file *file)
-{
- return single_open(file, PDE_DATA(inode),
- proc_get_parent_data(inode));
-}
-
-static const struct file_operations r_file_operations = {
- .open = r_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
static struct proc_dir_entry *proc_info_root = NULL;
static const char proc_info_root_name[] = "fs/reiserfs";
static void add_file(struct super_block *sb, char *name,
int (*func) (struct seq_file *, void *))
{
- proc_create_data(name, 0, REISERFS_SB(sb)->procdir,
- &r_file_operations, func);
+ proc_create_single_data(name, 0, REISERFS_SB(sb)->procdir, func, sb);
}
int reiserfs_proc_info_init(struct super_block *sb)
diff --git a/fs/romfs/super.c b/fs/romfs/super.c
index 8f06fd1f3d69..6ccb51993a76 100644
--- a/fs/romfs/super.c
+++ b/fs/romfs/super.c
@@ -213,7 +213,7 @@ static struct dentry *romfs_lookup(struct inode *dir, struct dentry *dentry,
unsigned int flags)
{
unsigned long offset, maxoff;
- struct inode *inode;
+ struct inode *inode = NULL;
struct romfs_inode ri;
const char *name; /* got from dentry */
int len, ret;
@@ -233,7 +233,7 @@ static struct dentry *romfs_lookup(struct inode *dir, struct dentry *dentry,
for (;;) {
if (!offset || offset >= maxoff)
- goto out0;
+ break;
ret = romfs_dev_read(dir->i_sb, offset, &ri, sizeof(ri));
if (ret < 0)
@@ -244,37 +244,19 @@ static struct dentry *romfs_lookup(struct inode *dir, struct dentry *dentry,
len);
if (ret < 0)
goto error;
- if (ret == 1)
+ if (ret == 1) {
+ /* Hard link handling */
+ if ((be32_to_cpu(ri.next) & ROMFH_TYPE) == ROMFH_HRD)
+ offset = be32_to_cpu(ri.spec) & ROMFH_MASK;
+ inode = romfs_iget(dir->i_sb, offset);
break;
+ }
/* next entry */
offset = be32_to_cpu(ri.next) & ROMFH_MASK;
}
- /* Hard link handling */
- if ((be32_to_cpu(ri.next) & ROMFH_TYPE) == ROMFH_HRD)
- offset = be32_to_cpu(ri.spec) & ROMFH_MASK;
-
- inode = romfs_iget(dir->i_sb, offset);
- if (IS_ERR(inode)) {
- ret = PTR_ERR(inode);
- goto error;
- }
- goto outi;
-
- /*
- * it's a bit funky, _lookup needs to return an error code
- * (negative) or a NULL, both as a dentry. ENOENT should not
- * be returned, instead we need to create a negative dentry by
- * d_add(dentry, NULL); and return 0 as no error.
- * (Although as I see, it only matters on writable file
- * systems).
- */
-out0:
- inode = NULL;
-outi:
- d_add(dentry, inode);
- ret = 0;
+ return d_splice_alias(inode, dentry);
error:
return ERR_PTR(ret);
}
diff --git a/fs/select.c b/fs/select.c
index ba879c51288f..bc3cc0f98896 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -34,6 +34,29 @@
#include <linux/uaccess.h>
+__poll_t vfs_poll(struct file *file, struct poll_table_struct *pt)
+{
+ if (file->f_op->poll) {
+ return file->f_op->poll(file, pt);
+ } else if (file_has_poll_mask(file)) {
+ unsigned int events = poll_requested_events(pt);
+ struct wait_queue_head *head;
+
+ if (pt && pt->_qproc) {
+ head = file->f_op->get_poll_head(file, events);
+ if (!head)
+ return DEFAULT_POLLMASK;
+ if (IS_ERR(head))
+ return EPOLLERR;
+ pt->_qproc(file, head, pt);
+ }
+
+ return file->f_op->poll_mask(file, events);
+ } else {
+ return DEFAULT_POLLMASK;
+ }
+}
+EXPORT_SYMBOL_GPL(vfs_poll);
/*
* Estimate expected accuracy in ns from a timeval.
@@ -233,7 +256,7 @@ static void __pollwait(struct file *filp, wait_queue_head_t *wait_address,
add_wait_queue(wait_address, &entry->wait);
}
-int poll_schedule_timeout(struct poll_wqueues *pwq, int state,
+static int poll_schedule_timeout(struct poll_wqueues *pwq, int state,
ktime_t *expires, unsigned long slack)
{
int rc = -EINTR;
@@ -258,7 +281,6 @@ int poll_schedule_timeout(struct poll_wqueues *pwq, int state,
return rc;
}
-EXPORT_SYMBOL(poll_schedule_timeout);
/**
* poll_select_set_timeout - helper function to setup the timeout value
@@ -503,14 +525,10 @@ static int do_select(int n, fd_set_bits *fds, struct timespec64 *end_time)
continue;
f = fdget(i);
if (f.file) {
- const struct file_operations *f_op;
- f_op = f.file->f_op;
- mask = DEFAULT_POLLMASK;
- if (f_op->poll) {
- wait_key_set(wait, in, out,
- bit, busy_flag);
- mask = (*f_op->poll)(f.file, wait);
- }
+ wait_key_set(wait, in, out, bit,
+ busy_flag);
+ mask = vfs_poll(f.file, wait);
+
fdput(f);
if ((mask & POLLIN_SET) && (in & bit)) {
res_in |= bit;
@@ -813,34 +831,29 @@ static inline __poll_t do_pollfd(struct pollfd *pollfd, poll_table *pwait,
bool *can_busy_poll,
__poll_t busy_flag)
{
- __poll_t mask;
- int fd;
-
- mask = 0;
- fd = pollfd->fd;
- if (fd >= 0) {
- struct fd f = fdget(fd);
- mask = EPOLLNVAL;
- if (f.file) {
- /* userland u16 ->events contains POLL... bitmap */
- __poll_t filter = demangle_poll(pollfd->events) |
- EPOLLERR | EPOLLHUP;
- mask = DEFAULT_POLLMASK;
- if (f.file->f_op->poll) {
- pwait->_key = filter;
- pwait->_key |= busy_flag;
- mask = f.file->f_op->poll(f.file, pwait);
- if (mask & busy_flag)
- *can_busy_poll = true;
- }
- /* Mask out unneeded events. */
- mask &= filter;
- fdput(f);
- }
- }
+ int fd = pollfd->fd;
+ __poll_t mask = 0, filter;
+ struct fd f;
+
+ if (fd < 0)
+ goto out;
+ mask = EPOLLNVAL;
+ f = fdget(fd);
+ if (!f.file)
+ goto out;
+
+ /* userland u16 ->events contains POLL... bitmap */
+ filter = demangle_poll(pollfd->events) | EPOLLERR | EPOLLHUP;
+ pwait->_key = filter | busy_flag;
+ mask = vfs_poll(f.file, pwait);
+ if (mask & busy_flag)
+ *can_busy_poll = true;
+ mask &= filter; /* Mask out unneeded events. */
+ fdput(f);
+
+out:
/* ... and so does ->revents */
pollfd->revents = mangle_poll(mask);
-
return mask;
}
diff --git a/fs/seq_file.c b/fs/seq_file.c
index c6c27f1f9c98..4cc090b50cc5 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -709,11 +709,6 @@ void seq_put_decimal_ull_width(struct seq_file *m, const char *delimiter,
if (m->count + width >= m->size)
goto overflow;
- if (num < 10) {
- m->buf[m->count++] = num + '0';
- return;
- }
-
len = num_to_str(m->buf + m->count, m->size - m->count, num, width);
if (!len)
goto overflow;
diff --git a/fs/signalfd.c b/fs/signalfd.c
index d2187a813376..cbb42f77a2bd 100644
--- a/fs/signalfd.c
+++ b/fs/signalfd.c
@@ -81,83 +81,86 @@ static __poll_t signalfd_poll(struct file *file, poll_table *wait)
static int signalfd_copyinfo(struct signalfd_siginfo __user *uinfo,
siginfo_t const *kinfo)
{
- long err;
+ struct signalfd_siginfo new;
BUILD_BUG_ON(sizeof(struct signalfd_siginfo) != 128);
/*
* Unused members should be zero ...
*/
- err = __clear_user(uinfo, sizeof(*uinfo));
+ memset(&new, 0, sizeof(new));
/*
* If you change siginfo_t structure, please be sure
* this code is fixed accordingly.
*/
- err |= __put_user(kinfo->si_signo, &uinfo->ssi_signo);
- err |= __put_user(kinfo->si_errno, &uinfo->ssi_errno);
- err |= __put_user(kinfo->si_code, &uinfo->ssi_code);
+ new.ssi_signo = kinfo->si_signo;
+ new.ssi_errno = kinfo->si_errno;
+ new.ssi_code = kinfo->si_code;
switch (siginfo_layout(kinfo->si_signo, kinfo->si_code)) {
case SIL_KILL:
- err |= __put_user(kinfo->si_pid, &uinfo->ssi_pid);
- err |= __put_user(kinfo->si_uid, &uinfo->ssi_uid);
+ new.ssi_pid = kinfo->si_pid;
+ new.ssi_uid = kinfo->si_uid;
break;
case SIL_TIMER:
- err |= __put_user(kinfo->si_tid, &uinfo->ssi_tid);
- err |= __put_user(kinfo->si_overrun, &uinfo->ssi_overrun);
- err |= __put_user((long) kinfo->si_ptr, &uinfo->ssi_ptr);
- err |= __put_user(kinfo->si_int, &uinfo->ssi_int);
+ new.ssi_tid = kinfo->si_tid;
+ new.ssi_overrun = kinfo->si_overrun;
+ new.ssi_ptr = (long) kinfo->si_ptr;
+ new.ssi_int = kinfo->si_int;
break;
case SIL_POLL:
- err |= __put_user(kinfo->si_band, &uinfo->ssi_band);
- err |= __put_user(kinfo->si_fd, &uinfo->ssi_fd);
+ new.ssi_band = kinfo->si_band;
+ new.ssi_fd = kinfo->si_fd;
break;
- case SIL_FAULT:
- err |= __put_user((long) kinfo->si_addr, &uinfo->ssi_addr);
-#ifdef __ARCH_SI_TRAPNO
- err |= __put_user(kinfo->si_trapno, &uinfo->ssi_trapno);
-#endif
-#ifdef BUS_MCEERR_AO
+ case SIL_FAULT_BNDERR:
+ case SIL_FAULT_PKUERR:
/*
- * Other callers might not initialize the si_lsb field,
- * so check explicitly for the right codes here.
+ * Fall through to the SIL_FAULT case. Both SIL_FAULT_BNDERR
+ * and SIL_FAULT_PKUERR are only generated by faults that
+ * deliver them synchronously to userspace. In case someone
+ * injects one of these signals and signalfd catches it treat
+ * it as SIL_FAULT.
*/
- if (kinfo->si_signo == SIGBUS &&
- kinfo->si_code == BUS_MCEERR_AO)
- err |= __put_user((short) kinfo->si_addr_lsb,
- &uinfo->ssi_addr_lsb);
+ case SIL_FAULT:
+ new.ssi_addr = (long) kinfo->si_addr;
+#ifdef __ARCH_SI_TRAPNO
+ new.ssi_trapno = kinfo->si_trapno;
#endif
-#ifdef BUS_MCEERR_AR
- /*
- * Other callers might not initialize the si_lsb field,
- * so check explicitly for the right codes here.
- */
- if (kinfo->si_signo == SIGBUS &&
- kinfo->si_code == BUS_MCEERR_AR)
- err |= __put_user((short) kinfo->si_addr_lsb,
- &uinfo->ssi_addr_lsb);
+ break;
+ case SIL_FAULT_MCEERR:
+ new.ssi_addr = (long) kinfo->si_addr;
+#ifdef __ARCH_SI_TRAPNO
+ new.ssi_trapno = kinfo->si_trapno;
#endif
+ new.ssi_addr_lsb = (short) kinfo->si_addr_lsb;
break;
case SIL_CHLD:
- err |= __put_user(kinfo->si_pid, &uinfo->ssi_pid);
- err |= __put_user(kinfo->si_uid, &uinfo->ssi_uid);
- err |= __put_user(kinfo->si_status, &uinfo->ssi_status);
- err |= __put_user(kinfo->si_utime, &uinfo->ssi_utime);
- err |= __put_user(kinfo->si_stime, &uinfo->ssi_stime);
+ new.ssi_pid = kinfo->si_pid;
+ new.ssi_uid = kinfo->si_uid;
+ new.ssi_status = kinfo->si_status;
+ new.ssi_utime = kinfo->si_utime;
+ new.ssi_stime = kinfo->si_stime;
break;
case SIL_RT:
- default:
/*
* This case catches also the signals queued by sigqueue().
*/
- err |= __put_user(kinfo->si_pid, &uinfo->ssi_pid);
- err |= __put_user(kinfo->si_uid, &uinfo->ssi_uid);
- err |= __put_user((long) kinfo->si_ptr, &uinfo->ssi_ptr);
- err |= __put_user(kinfo->si_int, &uinfo->ssi_int);
+ new.ssi_pid = kinfo->si_pid;
+ new.ssi_uid = kinfo->si_uid;
+ new.ssi_ptr = (long) kinfo->si_ptr;
+ new.ssi_int = kinfo->si_int;
+ break;
+ case SIL_SYS:
+ new.ssi_call_addr = (long) kinfo->si_call_addr;
+ new.ssi_syscall = kinfo->si_syscall;
+ new.ssi_arch = kinfo->si_arch;
break;
}
- return err ? -EFAULT: sizeof(*uinfo);
+ if (copy_to_user(uinfo, &new, sizeof(struct signalfd_siginfo)))
+ return -EFAULT;
+
+ return sizeof(*uinfo);
}
static ssize_t signalfd_dequeue(struct signalfd_ctx *ctx, siginfo_t *info,
diff --git a/fs/super.c b/fs/super.c
index 122c402049a2..50728d9c1a05 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -121,13 +121,23 @@ static unsigned long super_cache_count(struct shrinker *shrink,
sb = container_of(shrink, struct super_block, s_shrink);
/*
- * Don't call trylock_super as it is a potential
- * scalability bottleneck. The counts could get updated
- * between super_cache_count and super_cache_scan anyway.
- * Call to super_cache_count with shrinker_rwsem held
- * ensures the safety of call to list_lru_shrink_count() and
- * s_op->nr_cached_objects().
+ * We don't call trylock_super() here as it is a scalability bottleneck,
+ * so we're exposed to partial setup state. The shrinker rwsem does not
+ * protect filesystem operations backing list_lru_shrink_count() or
+ * s_op->nr_cached_objects(). Counts can change between
+ * super_cache_count and super_cache_scan, so we really don't need locks
+ * here.
+ *
+ * However, if we are currently mounting the superblock, the underlying
+ * filesystem might be in a state of partial construction and hence it
+ * is dangerous to access it. trylock_super() uses a SB_BORN check to
+ * avoid this situation, so do the same here. The memory barrier is
+ * matched with the one in mount_fs() as we don't hold locks here.
*/
+ if (!(sb->s_flags & SB_BORN))
+ return 0;
+ smp_rmb();
+
if (sb->s_op && sb->s_op->nr_cached_objects)
total_objects = sb->s_op->nr_cached_objects(sb, sc);
@@ -937,7 +947,7 @@ void emergency_remount(void)
static void do_thaw_all_callback(struct super_block *sb)
{
down_write(&sb->s_umount);
- if (sb->s_root && sb->s_flags & MS_BORN) {
+ if (sb->s_root && sb->s_flags & SB_BORN) {
emergency_thaw_bdev(sb);
thaw_super_locked(sb);
} else {
@@ -1272,6 +1282,14 @@ mount_fs(struct file_system_type *type, int flags, const char *name, void *data)
sb = root->d_sb;
BUG_ON(!sb);
WARN_ON(!sb->s_bdi);
+
+ /*
+ * Write barrier is for super_cache_count(). We place it before setting
+ * SB_BORN as the data dependency between the two functions is the
+ * superblock structure contents that we just set up, not the SB_BORN
+ * flag.
+ */
+ smp_wmb();
sb->s_flags |= SB_BORN;
error = security_sb_kern_mount(sb, flags, secdata);
diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c
index b428d317ae92..92682fcc41f6 100644
--- a/fs/sysfs/mount.c
+++ b/fs/sysfs/mount.c
@@ -25,7 +25,7 @@ static struct dentry *sysfs_mount(struct file_system_type *fs_type,
{
struct dentry *root;
void *ns;
- bool new_sb;
+ bool new_sb = false;
if (!(flags & SB_KERNMOUNT)) {
if (!kobj_ns_current_may_mount(KOBJ_NS_TYPE_NET))
@@ -35,9 +35,9 @@ static struct dentry *sysfs_mount(struct file_system_type *fs_type,
ns = kobj_ns_grab_current(KOBJ_NS_TYPE_NET);
root = kernfs_mount_ns(fs_type, flags, sysfs_root,
SYSFS_MAGIC, &new_sb, ns);
- if (IS_ERR(root) || !new_sb)
+ if (!new_sb)
kobj_ns_drop(KOBJ_NS_TYPE_NET, ns);
- else if (new_sb)
+ else if (!IS_ERR(root))
root->d_sb->s_iflags |= SB_I_USERNS_VISIBLE;
return root;
diff --git a/fs/sysv/namei.c b/fs/sysv/namei.c
index 250b0755b908..4d5d20491ffd 100644
--- a/fs/sysv/namei.c
+++ b/fs/sysv/namei.c
@@ -51,14 +51,9 @@ static struct dentry *sysv_lookup(struct inode * dir, struct dentry * dentry, un
if (dentry->d_name.len > SYSV_NAMELEN)
return ERR_PTR(-ENAMETOOLONG);
ino = sysv_inode_by_name(dentry);
-
- if (ino) {
+ if (ino)
inode = sysv_iget(dir->i_sb, ino);
- if (IS_ERR(inode))
- return ERR_CAST(inode);
- }
- d_add(dentry, inode);
- return NULL;
+ return d_splice_alias(inode, dentry);
}
static int sysv_mknod(struct inode * dir, struct dentry * dentry, umode_t mode, dev_t rdev)
diff --git a/fs/timerfd.c b/fs/timerfd.c
index cdad49da3ff7..d84a2bee4f82 100644
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -226,21 +226,20 @@ static int timerfd_release(struct inode *inode, struct file *file)
kfree_rcu(ctx, rcu);
return 0;
}
-
-static __poll_t timerfd_poll(struct file *file, poll_table *wait)
+
+static struct wait_queue_head *timerfd_get_poll_head(struct file *file,
+ __poll_t eventmask)
{
struct timerfd_ctx *ctx = file->private_data;
- __poll_t events = 0;
- unsigned long flags;
- poll_wait(file, &ctx->wqh, wait);
+ return &ctx->wqh;
+}
- spin_lock_irqsave(&ctx->wqh.lock, flags);
- if (ctx->ticks)
- events |= EPOLLIN;
- spin_unlock_irqrestore(&ctx->wqh.lock, flags);
+static __poll_t timerfd_poll_mask(struct file *file, __poll_t eventmask)
+{
+ struct timerfd_ctx *ctx = file->private_data;
- return events;
+ return ctx->ticks ? EPOLLIN : 0;
}
static ssize_t timerfd_read(struct file *file, char __user *buf, size_t count,
@@ -364,7 +363,8 @@ static long timerfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg
static const struct file_operations timerfd_fops = {
.release = timerfd_release,
- .poll = timerfd_poll,
+ .get_poll_head = timerfd_get_poll_head,
+ .poll_mask = timerfd_poll_mask,
.read = timerfd_read,
.llseek = noop_llseek,
.show_fdinfo = timerfd_show,
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c
index 9d7fb88e172e..4e267cc21c77 100644
--- a/fs/ubifs/dir.c
+++ b/fs/ubifs/dir.c
@@ -214,7 +214,7 @@ static struct dentry *ubifs_lookup(struct inode *dir, struct dentry *dentry,
int err;
union ubifs_key key;
struct inode *inode = NULL;
- struct ubifs_dent_node *dent;
+ struct ubifs_dent_node *dent = NULL;
struct ubifs_info *c = dir->i_sb->s_fs_info;
struct fscrypt_name nm;
@@ -229,14 +229,14 @@ static struct dentry *ubifs_lookup(struct inode *dir, struct dentry *dentry,
return ERR_PTR(err);
if (fname_len(&nm) > UBIFS_MAX_NLEN) {
- err = -ENAMETOOLONG;
- goto out_fname;
+ inode = ERR_PTR(-ENAMETOOLONG);
+ goto done;
}
dent = kmalloc(UBIFS_MAX_DENT_NODE_SZ, GFP_NOFS);
if (!dent) {
- err = -ENOMEM;
- goto out_fname;
+ inode = ERR_PTR(-ENOMEM);
+ goto done;
}
if (nm.hash) {
@@ -250,16 +250,16 @@ static struct dentry *ubifs_lookup(struct inode *dir, struct dentry *dentry,
}
if (err) {
- if (err == -ENOENT) {
+ if (err == -ENOENT)
dbg_gen("not found");
- goto done;
- }
- goto out_dent;
+ else
+ inode = ERR_PTR(err);
+ goto done;
}
if (dbg_check_name(c, dent, &nm)) {
- err = -EINVAL;
- goto out_dent;
+ inode = ERR_PTR(-EINVAL);
+ goto done;
}
inode = ubifs_iget(dir->i_sb, le64_to_cpu(dent->inum));
@@ -272,7 +272,7 @@ static struct dentry *ubifs_lookup(struct inode *dir, struct dentry *dentry,
ubifs_err(c, "dead directory entry '%pd', error %d",
dentry, err);
ubifs_ro_mode(c, err);
- goto out_dent;
+ goto done;
}
if (ubifs_crypt_is_encrypted(dir) &&
@@ -280,27 +280,14 @@ static struct dentry *ubifs_lookup(struct inode *dir, struct dentry *dentry,
!fscrypt_has_permitted_context(dir, inode)) {
ubifs_warn(c, "Inconsistent encryption contexts: %lu/%lu",
dir->i_ino, inode->i_ino);
- err = -EPERM;
- goto out_inode;
+ iput(inode);
+ inode = ERR_PTR(-EPERM);
}
done:
kfree(dent);
fscrypt_free_filename(&nm);
- /*
- * Note, d_splice_alias() would be required instead if we supported
- * NFS.
- */
- d_add(dentry, inode);
- return NULL;
-
-out_inode:
- iput(inode);
-out_dent:
- kfree(dent);
-out_fname:
- fscrypt_free_filename(&nm);
- return ERR_PTR(err);
+ return d_splice_alias(inode, dentry);
}
static int ubifs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index 0458dd47e105..c586026508db 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -622,8 +622,7 @@ static int udf_add_nondir(struct dentry *dentry, struct inode *inode)
if (fibh.sbh != fibh.ebh)
brelse(fibh.ebh);
brelse(fibh.sbh);
- unlock_new_inode(inode);
- d_instantiate(dentry, inode);
+ d_instantiate_new(dentry, inode);
return 0;
}
@@ -733,8 +732,7 @@ static int udf_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
inc_nlink(dir);
dir->i_ctime = dir->i_mtime = current_time(dir);
mark_inode_dirty(dir);
- unlock_new_inode(inode);
- d_instantiate(dentry, inode);
+ d_instantiate_new(dentry, inode);
if (fibh.sbh != fibh.ebh)
brelse(fibh.ebh);
brelse(fibh.sbh);
diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c
index 32545cd00ceb..d5f43ba76c59 100644
--- a/fs/ufs/namei.c
+++ b/fs/ufs/namei.c
@@ -39,8 +39,7 @@ static inline int ufs_add_nondir(struct dentry *dentry, struct inode *inode)
{
int err = ufs_add_link(dentry, inode);
if (!err) {
- unlock_new_inode(inode);
- d_instantiate(dentry, inode);
+ d_instantiate_new(dentry, inode);
return 0;
}
inode_dec_link_count(inode);
@@ -193,8 +192,7 @@ static int ufs_mkdir(struct inode * dir, struct dentry * dentry, umode_t mode)
if (err)
goto out_fail;
- unlock_new_inode(inode);
- d_instantiate(dentry, inode);
+ d_instantiate_new(dentry, inode);
return 0;
out_fail:
diff --git a/fs/xattr.c b/fs/xattr.c
index 61cd28ba25f3..f9cb1db187b7 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -229,7 +229,7 @@ out:
}
EXPORT_SYMBOL_GPL(vfs_setxattr);
-ssize_t
+static ssize_t
xattr_getsecurity(struct inode *inode, const char *name, void *value,
size_t size)
{
@@ -254,7 +254,6 @@ out:
out_noalloc:
return len;
}
-EXPORT_SYMBOL_GPL(xattr_getsecurity);
/*
* vfs_getxattr_alloc - allocate memory, if necessary, before calling getxattr
@@ -354,7 +353,6 @@ vfs_listxattr(struct dentry *dentry, char *list, size_t size)
if (error)
return error;
if (inode->i_op->listxattr && (inode->i_opflags & IOP_XATTR)) {
- error = -EOPNOTSUPP;
error = inode->i_op->listxattr(dentry, list, size);
} else {
error = security_inode_listsecurity(inode, list, size);
diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c
index ce4a34a2751d..35a124400d60 100644
--- a/fs/xfs/libxfs/xfs_attr.c
+++ b/fs/xfs/libxfs/xfs_attr.c
@@ -511,7 +511,14 @@ xfs_attr_shortform_addname(xfs_da_args_t *args)
if (args->flags & ATTR_CREATE)
return retval;
retval = xfs_attr_shortform_remove(args);
- ASSERT(retval == 0);
+ if (retval)
+ return retval;
+ /*
+ * Since we have removed the old attr, clear ATTR_REPLACE so
+ * that the leaf format add routine won't trip over the attr
+ * not being around.
+ */
+ args->flags &= ~ATTR_REPLACE;
}
if (args->namelen >= XFS_ATTR_SF_ENTSIZE_MAX ||
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 6a7c2f03ea11..040eeda8426f 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -725,12 +725,16 @@ xfs_bmap_extents_to_btree(
*logflagsp = 0;
if ((error = xfs_alloc_vextent(&args))) {
xfs_iroot_realloc(ip, -1, whichfork);
+ ASSERT(ifp->if_broot == NULL);
+ XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_EXTENTS);
xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
return error;
}
if (WARN_ON_ONCE(args.fsbno == NULLFSBLOCK)) {
xfs_iroot_realloc(ip, -1, whichfork);
+ ASSERT(ifp->if_broot == NULL);
+ XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_EXTENTS);
xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
return -ENOSPC;
}
diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c
index ef68b1de006a..1201107eabc6 100644
--- a/fs/xfs/libxfs/xfs_inode_buf.c
+++ b/fs/xfs/libxfs/xfs_inode_buf.c
@@ -466,6 +466,8 @@ xfs_dinode_verify(
return __this_address;
if (di_size > XFS_DFORK_DSIZE(dip, mp))
return __this_address;
+ if (dip->di_nextents)
+ return __this_address;
/* fall through */
case XFS_DINODE_FMT_EXTENTS:
case XFS_DINODE_FMT_BTREE:
@@ -484,12 +486,31 @@ xfs_dinode_verify(
if (XFS_DFORK_Q(dip)) {
switch (dip->di_aformat) {
case XFS_DINODE_FMT_LOCAL:
+ if (dip->di_anextents)
+ return __this_address;
+ /* fall through */
case XFS_DINODE_FMT_EXTENTS:
case XFS_DINODE_FMT_BTREE:
break;
default:
return __this_address;
}
+ } else {
+ /*
+ * If there is no fork offset, this may be a freshly-made inode
+ * in a new disk cluster, in which case di_aformat is zeroed.
+ * Otherwise, such an inode must be in EXTENTS format; this goes
+ * for freed inodes as well.
+ */
+ switch (dip->di_aformat) {
+ case 0:
+ case XFS_DINODE_FMT_EXTENTS:
+ break;
+ default:
+ return __this_address;
+ }
+ if (dip->di_anextents)
+ return __this_address;
}
/* only version 3 or greater inodes are extensively verified here */
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 0ab824f574ed..102463543db3 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -594,7 +594,7 @@ xfs_alloc_ioend(
struct xfs_ioend *ioend;
struct bio *bio;
- bio = bio_alloc_bioset(GFP_NOFS, BIO_MAX_PAGES, xfs_ioend_bioset);
+ bio = bio_alloc_bioset(GFP_NOFS, BIO_MAX_PAGES, &xfs_ioend_bioset);
xfs_init_bio_from_bh(bio, bh);
ioend = container_of(bio, struct xfs_ioend, io_inline_bio);
diff --git a/fs/xfs/xfs_aops.h b/fs/xfs/xfs_aops.h
index 69346d460dfa..694c85b03813 100644
--- a/fs/xfs/xfs_aops.h
+++ b/fs/xfs/xfs_aops.h
@@ -18,7 +18,7 @@
#ifndef __XFS_AOPS_H__
#define __XFS_AOPS_H__
-extern struct bio_set *xfs_ioend_bioset;
+extern struct bio_set xfs_ioend_bioset;
/*
* Types of I/O for bmap clustering and I/O completion tracking.
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 299aee4b7b0b..e70fb8ccecea 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -778,22 +778,26 @@ xfs_file_fallocate(
if (error)
goto out_unlock;
} else if (mode & FALLOC_FL_INSERT_RANGE) {
- unsigned int blksize_mask = i_blocksize(inode) - 1;
+ unsigned int blksize_mask = i_blocksize(inode) - 1;
+ loff_t isize = i_size_read(inode);
- new_size = i_size_read(inode) + len;
if (offset & blksize_mask || len & blksize_mask) {
error = -EINVAL;
goto out_unlock;
}
- /* check the new inode size does not wrap through zero */
- if (new_size > inode->i_sb->s_maxbytes) {
+ /*
+ * New inode size must not exceed ->s_maxbytes, accounting for
+ * possible signed overflow.
+ */
+ if (inode->i_sb->s_maxbytes - isize < len) {
error = -EFBIG;
goto out_unlock;
}
+ new_size = isize + len;
/* Offset should be less than i_size */
- if (offset >= i_size_read(inode)) {
+ if (offset >= isize) {
error = -EINVAL;
goto out_unlock;
}
@@ -876,8 +880,18 @@ xfs_file_dedupe_range(
struct file *dst_file,
u64 dst_loff)
{
+ struct inode *srci = file_inode(src_file);
+ u64 max_dedupe;
int error;
+ /*
+ * Since we have to read all these pages in to compare them, cut
+ * it off at MAX_RW_COUNT/2 rounded down to the nearest block.
+ * That means we won't do more than MAX_RW_COUNT IO per request.
+ */
+ max_dedupe = (MAX_RW_COUNT >> 1) & ~(i_blocksize(srci) - 1);
+ if (len > max_dedupe)
+ len = max_dedupe;
error = xfs_reflink_remap_range(src_file, loff, dst_file, dst_loff,
len, true);
if (error)
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index a3ed3c811dfa..df42e4cb4dc4 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -260,6 +260,7 @@ xfs_vn_lookup(
struct dentry *dentry,
unsigned int flags)
{
+ struct inode *inode;
struct xfs_inode *cip;
struct xfs_name name;
int error;
@@ -269,14 +270,13 @@ xfs_vn_lookup(
xfs_dentry_to_name(&name, dentry);
error = xfs_lookup(XFS_I(dir), &name, &cip, NULL);
- if (unlikely(error)) {
- if (unlikely(error != -ENOENT))
- return ERR_PTR(error);
- d_add(dentry, NULL);
- return NULL;
- }
-
- return d_splice_alias(VFS_I(cip), dentry);
+ if (likely(!error))
+ inode = VFS_I(cip);
+ else if (likely(error == -ENOENT))
+ inode = NULL;
+ else
+ inode = ERR_PTR(error);
+ return d_splice_alias(inode, dentry);
}
STATIC struct dentry *
diff --git a/fs/xfs/xfs_stats.c b/fs/xfs/xfs_stats.c
index 056e12b421eb..1cc79907b377 100644
--- a/fs/xfs/xfs_stats.c
+++ b/fs/xfs/xfs_stats.c
@@ -113,6 +113,7 @@ void xfs_stats_clearall(struct xfsstats __percpu *stats)
}
}
+#ifdef CONFIG_PROC_FS
/* legacy quota interfaces */
#ifdef CONFIG_XFS_QUOTA
static int xqm_proc_show(struct seq_file *m, void *v)
@@ -124,18 +125,6 @@ static int xqm_proc_show(struct seq_file *m, void *v)
return 0;
}
-static int xqm_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, xqm_proc_show, NULL);
-}
-
-static const struct file_operations xqm_proc_fops = {
- .open = xqm_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
/* legacy quota stats interface no 2 */
static int xqmstat_proc_show(struct seq_file *m, void *v)
{
@@ -147,22 +136,8 @@ static int xqmstat_proc_show(struct seq_file *m, void *v)
seq_putc(m, '\n');
return 0;
}
-
-static int xqmstat_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, xqmstat_proc_show, NULL);
-}
-
-static const struct file_operations xqmstat_proc_fops = {
- .owner = THIS_MODULE,
- .open = xqmstat_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
#endif /* CONFIG_XFS_QUOTA */
-#ifdef CONFIG_PROC_FS
int
xfs_init_procfs(void)
{
@@ -174,11 +149,9 @@ xfs_init_procfs(void)
goto out;
#ifdef CONFIG_XFS_QUOTA
- if (!proc_create("fs/xfs/xqmstat", 0, NULL,
- &xqmstat_proc_fops))
+ if (!proc_create_single("fs/xfs/xqmstat", 0, NULL, xqmstat_proc_show))
goto out;
- if (!proc_create("fs/xfs/xqm", 0, NULL,
- &xqm_proc_fops))
+ if (!proc_create_single("fs/xfs/xqm", 0, NULL, xqm_proc_show))
goto out;
#endif
return 0;
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index d71424052917..f643d76db516 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -63,7 +63,7 @@
#include <linux/parser.h>
static const struct super_operations xfs_super_operations;
-struct bio_set *xfs_ioend_bioset;
+struct bio_set xfs_ioend_bioset;
static struct kset *xfs_kset; /* top-level xfs sysfs dir */
#ifdef DEBUG
@@ -1845,10 +1845,9 @@ MODULE_ALIAS_FS("xfs");
STATIC int __init
xfs_init_zones(void)
{
- xfs_ioend_bioset = bioset_create(4 * MAX_BUF_PER_PAGE,
+ if (bioset_init(&xfs_ioend_bioset, 4 * MAX_BUF_PER_PAGE,
offsetof(struct xfs_ioend, io_inline_bio),
- BIOSET_NEED_BVECS);
- if (!xfs_ioend_bioset)
+ BIOSET_NEED_BVECS))
goto out;
xfs_log_ticket_zone = kmem_zone_init(sizeof(xlog_ticket_t),
@@ -1997,7 +1996,7 @@ xfs_init_zones(void)
out_destroy_log_ticket_zone:
kmem_zone_destroy(xfs_log_ticket_zone);
out_free_ioend_bioset:
- bioset_free(xfs_ioend_bioset);
+ bioset_exit(&xfs_ioend_bioset);
out:
return -ENOMEM;
}
@@ -2029,7 +2028,7 @@ xfs_destroy_zones(void)
kmem_zone_destroy(xfs_btree_cur_zone);
kmem_zone_destroy(xfs_bmap_free_item_zone);
kmem_zone_destroy(xfs_log_ticket_zone);
- bioset_free(xfs_ioend_bioset);
+ bioset_exit(&xfs_ioend_bioset);
}
STATIC int __init
diff --git a/include/asm-generic/atomic-long.h b/include/asm-generic/atomic-long.h
index 34a028a7bcc5..87d14476edc2 100644
--- a/include/asm-generic/atomic-long.h
+++ b/include/asm-generic/atomic-long.h
@@ -25,6 +25,7 @@ typedef atomic64_t atomic_long_t;
#define ATOMIC_LONG_INIT(i) ATOMIC64_INIT(i)
#define ATOMIC_LONG_PFX(x) atomic64 ## x
+#define ATOMIC_LONG_TYPE s64
#else
@@ -32,6 +33,7 @@ typedef atomic_t atomic_long_t;
#define ATOMIC_LONG_INIT(i) ATOMIC_INIT(i)
#define ATOMIC_LONG_PFX(x) atomic ## x
+#define ATOMIC_LONG_TYPE int
#endif
@@ -90,6 +92,21 @@ ATOMIC_LONG_ADD_SUB_OP(sub, _release)
#define atomic_long_cmpxchg(l, old, new) \
(ATOMIC_LONG_PFX(_cmpxchg)((ATOMIC_LONG_PFX(_t) *)(l), (old), (new)))
+
+#define atomic_long_try_cmpxchg_relaxed(l, old, new) \
+ (ATOMIC_LONG_PFX(_try_cmpxchg_relaxed)((ATOMIC_LONG_PFX(_t) *)(l), \
+ (ATOMIC_LONG_TYPE *)(old), (ATOMIC_LONG_TYPE)(new)))
+#define atomic_long_try_cmpxchg_acquire(l, old, new) \
+ (ATOMIC_LONG_PFX(_try_cmpxchg_acquire)((ATOMIC_LONG_PFX(_t) *)(l), \
+ (ATOMIC_LONG_TYPE *)(old), (ATOMIC_LONG_TYPE)(new)))
+#define atomic_long_try_cmpxchg_release(l, old, new) \
+ (ATOMIC_LONG_PFX(_try_cmpxchg_release)((ATOMIC_LONG_PFX(_t) *)(l), \
+ (ATOMIC_LONG_TYPE *)(old), (ATOMIC_LONG_TYPE)(new)))
+#define atomic_long_try_cmpxchg(l, old, new) \
+ (ATOMIC_LONG_PFX(_try_cmpxchg)((ATOMIC_LONG_PFX(_t) *)(l), \
+ (ATOMIC_LONG_TYPE *)(old), (ATOMIC_LONG_TYPE)(new)))
+
+
#define atomic_long_xchg_relaxed(v, new) \
(ATOMIC_LONG_PFX(_xchg_relaxed)((ATOMIC_LONG_PFX(_t) *)(v), (new)))
#define atomic_long_xchg_acquire(v, new) \
@@ -244,6 +261,8 @@ static inline long atomic_long_add_unless(atomic_long_t *l, long a, long u)
#define atomic_long_inc_not_zero(l) \
ATOMIC_LONG_PFX(_inc_not_zero)((ATOMIC_LONG_PFX(_t) *)(l))
+#define atomic_long_cond_read_relaxed(v, c) \
+ ATOMIC_LONG_PFX(_cond_read_relaxed)((ATOMIC_LONG_PFX(_t) *)(v), (c))
#define atomic_long_cond_read_acquire(v, c) \
ATOMIC_LONG_PFX(_cond_read_acquire)((ATOMIC_LONG_PFX(_t) *)(v), (c))
diff --git a/include/asm-generic/barrier.h b/include/asm-generic/barrier.h
index 29458bbb2fa0..2cafdbb9ae4c 100644
--- a/include/asm-generic/barrier.h
+++ b/include/asm-generic/barrier.h
@@ -221,18 +221,17 @@ do { \
#endif
/**
- * smp_cond_load_acquire() - (Spin) wait for cond with ACQUIRE ordering
+ * smp_cond_load_relaxed() - (Spin) wait for cond with no ordering guarantees
* @ptr: pointer to the variable to wait on
* @cond: boolean expression to wait for
*
- * Equivalent to using smp_load_acquire() on the condition variable but employs
- * the control dependency of the wait to reduce the barrier on many platforms.
+ * Equivalent to using READ_ONCE() on the condition variable.
*
* Due to C lacking lambda expressions we load the value of *ptr into a
* pre-named variable @VAL to be used in @cond.
*/
-#ifndef smp_cond_load_acquire
-#define smp_cond_load_acquire(ptr, cond_expr) ({ \
+#ifndef smp_cond_load_relaxed
+#define smp_cond_load_relaxed(ptr, cond_expr) ({ \
typeof(ptr) __PTR = (ptr); \
typeof(*ptr) VAL; \
for (;;) { \
@@ -241,10 +240,26 @@ do { \
break; \
cpu_relax(); \
} \
- smp_acquire__after_ctrl_dep(); \
VAL; \
})
#endif
+/**
+ * smp_cond_load_acquire() - (Spin) wait for cond with ACQUIRE ordering
+ * @ptr: pointer to the variable to wait on
+ * @cond: boolean expression to wait for
+ *
+ * Equivalent to using smp_load_acquire() on the condition variable but employs
+ * the control dependency of the wait to reduce the barrier on many platforms.
+ */
+#ifndef smp_cond_load_acquire
+#define smp_cond_load_acquire(ptr, cond_expr) ({ \
+ typeof(*ptr) _val; \
+ _val = smp_cond_load_relaxed(ptr, cond_expr); \
+ smp_acquire__after_ctrl_dep(); \
+ _val; \
+})
+#endif
+
#endif /* !__ASSEMBLY__ */
#endif /* __ASM_GENERIC_BARRIER_H */
diff --git a/include/asm-generic/dma-mapping.h b/include/asm-generic/dma-mapping.h
index 880a292d792f..ad2868263867 100644
--- a/include/asm-generic/dma-mapping.h
+++ b/include/asm-generic/dma-mapping.h
@@ -4,7 +4,16 @@
static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus)
{
+ /*
+ * Use the non-coherent ops if available. If an architecture wants a
+ * more fine-grained selection of operations it will have to implement
+ * get_arch_dma_ops itself or use the per-device dma_ops.
+ */
+#ifdef CONFIG_DMA_NONCOHERENT_OPS
+ return &dma_noncoherent_ops;
+#else
return &dma_direct_ops;
+#endif
}
#endif /* _ASM_GENERIC_DMA_MAPPING_H */
diff --git a/include/asm-generic/pci.h b/include/asm-generic/pci.h
index 830d7659289b..6bb3cd3d695a 100644
--- a/include/asm-generic/pci.h
+++ b/include/asm-generic/pci.h
@@ -14,12 +14,4 @@ static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel)
}
#endif /* HAVE_ARCH_PCI_GET_LEGACY_IDE_IRQ */
-/*
- * By default, assume that no iommu is in use and that the PCI
- * space is mapped to address physical 0.
- */
-#ifndef PCI_DMA_BUS_IS_PHYS
-#define PCI_DMA_BUS_IS_PHYS (1)
-#endif
-
#endif /* _ASM_GENERIC_PCI_H */
diff --git a/include/asm-generic/qspinlock.h b/include/asm-generic/qspinlock.h
index b37b4ad7eb94..9cc457597ddf 100644
--- a/include/asm-generic/qspinlock.h
+++ b/include/asm-generic/qspinlock.h
@@ -26,7 +26,6 @@
* @lock: Pointer to queued spinlock structure
* Return: 1 if it is locked, 0 otherwise
*/
-#ifndef queued_spin_is_locked
static __always_inline int queued_spin_is_locked(struct qspinlock *lock)
{
/*
@@ -35,7 +34,6 @@ static __always_inline int queued_spin_is_locked(struct qspinlock *lock)
*/
return atomic_read(&lock->val);
}
-#endif
/**
* queued_spin_value_unlocked - is the spinlock structure unlocked?
@@ -100,7 +98,7 @@ static __always_inline void queued_spin_unlock(struct qspinlock *lock)
/*
* unlock() needs release semantics:
*/
- (void)atomic_sub_return_release(_Q_LOCKED_VAL, &lock->val);
+ smp_store_release(&lock->locked, 0);
}
#endif
diff --git a/include/asm-generic/qspinlock_types.h b/include/asm-generic/qspinlock_types.h
index 034acd0c4956..0763f065b975 100644
--- a/include/asm-generic/qspinlock_types.h
+++ b/include/asm-generic/qspinlock_types.h
@@ -29,13 +29,41 @@
#endif
typedef struct qspinlock {
- atomic_t val;
+ union {
+ atomic_t val;
+
+ /*
+ * By using the whole 2nd least significant byte for the
+ * pending bit, we can allow better optimization of the lock
+ * acquisition for the pending bit holder.
+ */
+#ifdef __LITTLE_ENDIAN
+ struct {
+ u8 locked;
+ u8 pending;
+ };
+ struct {
+ u16 locked_pending;
+ u16 tail;
+ };
+#else
+ struct {
+ u16 tail;
+ u16 locked_pending;
+ };
+ struct {
+ u8 reserved[2];
+ u8 pending;
+ u8 locked;
+ };
+#endif
+ };
} arch_spinlock_t;
/*
* Initializier
*/
-#define __ARCH_SPIN_LOCK_UNLOCKED { ATOMIC_INIT(0) }
+#define __ARCH_SPIN_LOCK_UNLOCKED { .val = ATOMIC_INIT(0) }
/*
* Bitfields in the atomic value:
diff --git a/include/crypto/if_alg.h b/include/crypto/if_alg.h
index 482461d8931d..cc414db9da0a 100644
--- a/include/crypto/if_alg.h
+++ b/include/crypto/if_alg.h
@@ -245,8 +245,7 @@ ssize_t af_alg_sendpage(struct socket *sock, struct page *page,
int offset, size_t size, int flags);
void af_alg_free_resources(struct af_alg_async_req *areq);
void af_alg_async_cb(struct crypto_async_request *_req, int err);
-__poll_t af_alg_poll(struct file *file, struct socket *sock,
- poll_table *wait);
+__poll_t af_alg_poll_mask(struct socket *sock, __poll_t events);
struct af_alg_async_req *af_alg_alloc_areq(struct sock *sk,
unsigned int areqlen);
int af_alg_get_rsgl(struct sock *sk, struct msghdr *msg, int flags,
diff --git a/include/drm/bridge/dw_hdmi.h b/include/drm/bridge/dw_hdmi.h
index dd2a8cf7d20b..ccb5aa8468e0 100644
--- a/include/drm/bridge/dw_hdmi.h
+++ b/include/drm/bridge/dw_hdmi.h
@@ -151,7 +151,7 @@ struct dw_hdmi *dw_hdmi_bind(struct platform_device *pdev,
struct drm_encoder *encoder,
const struct dw_hdmi_plat_data *plat_data);
-void dw_hdmi_setup_rx_sense(struct device *dev, bool hpd, bool rx_sense);
+void dw_hdmi_setup_rx_sense(struct dw_hdmi *hdmi, bool hpd, bool rx_sense);
void dw_hdmi_set_sample_rate(struct dw_hdmi *hdmi, unsigned int rate);
void dw_hdmi_audio_enable(struct dw_hdmi *hdmi);
diff --git a/include/dt-bindings/clock/stm32mp1-clks.h b/include/dt-bindings/clock/stm32mp1-clks.h
index 86e3ec662ef4..90ec780bfc68 100644
--- a/include/dt-bindings/clock/stm32mp1-clks.h
+++ b/include/dt-bindings/clock/stm32mp1-clks.h
@@ -76,7 +76,7 @@
#define I2C6 63
#define USART1 64
#define RTCAPB 65
-#define TZC 66
+#define TZC1 66
#define TZPC 67
#define IWDG1 68
#define BSEC 69
@@ -123,6 +123,7 @@
#define CRC1 110
#define USBH 111
#define ETHSTP 112
+#define TZC2 113
/* Kernel clocks */
#define SDMMC1_K 118
@@ -228,7 +229,6 @@
#define CK_MCO2 212
/* TRACE & DEBUG clocks */
-#define DBG 213
#define CK_DBG 214
#define CK_TRACE 215
diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index 24f03941ada8..e7efe12a81bd 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -131,6 +131,7 @@ struct vgic_irq {
u32 mpidr; /* GICv3 target VCPU */
};
u8 source; /* GICv2 SGIs only */
+ u8 active_source; /* GICv2 SGIs only */
u8 priority;
enum vgic_irq_config config; /* Level or edge */
diff --git a/include/linux/aio.h b/include/linux/aio.h
index 9d8aabecfe2d..b83e68dd006f 100644
--- a/include/linux/aio.h
+++ b/include/linux/aio.h
@@ -8,8 +8,6 @@ struct kioctx;
struct kiocb;
struct mm_struct;
-#define KIOCB_KEY 0
-
typedef int (kiocb_cancel_fn)(struct kiocb *);
/* prototypes */
diff --git a/include/linux/atalk.h b/include/linux/atalk.h
index 40373920ea58..23f805562f4e 100644
--- a/include/linux/atalk.h
+++ b/include/linux/atalk.h
@@ -145,7 +145,12 @@ extern rwlock_t atalk_interfaces_lock;
extern struct atalk_route atrtr_default;
-extern const struct file_operations atalk_seq_arp_fops;
+struct aarp_iter_state {
+ int bucket;
+ struct aarp_entry **table;
+};
+
+extern const struct seq_operations aarp_seq_ops;
extern int sysctl_aarp_expiry_time;
extern int sysctl_aarp_tick_time;
diff --git a/include/linux/atomic.h b/include/linux/atomic.h
index 8b276fd9a127..01ce3997cb42 100644
--- a/include/linux/atomic.h
+++ b/include/linux/atomic.h
@@ -654,6 +654,7 @@ static inline int atomic_dec_if_positive(atomic_t *v)
}
#endif
+#define atomic_cond_read_relaxed(v, c) smp_cond_load_relaxed(&(v)->counter, (c))
#define atomic_cond_read_acquire(v, c) smp_cond_load_acquire(&(v)->counter, (c))
#ifdef CONFIG_GENERIC_ATOMIC64
@@ -1075,6 +1076,7 @@ static inline long long atomic64_fetch_andnot_release(long long i, atomic64_t *v
}
#endif
+#define atomic64_cond_read_relaxed(v, c) smp_cond_load_relaxed(&(v)->counter, (c))
#define atomic64_cond_read_acquire(v, c) smp_cond_load_acquire(&(v)->counter, (c))
#include <asm-generic/atomic-long.h>
diff --git a/include/linux/bio.h b/include/linux/bio.h
index ce547a25e8ae..810a8bee8f85 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -67,8 +67,12 @@
#define bio_multiple_segments(bio) \
((bio)->bi_iter.bi_size != bio_iovec(bio).bv_len)
-#define bio_sectors(bio) ((bio)->bi_iter.bi_size >> 9)
-#define bio_end_sector(bio) ((bio)->bi_iter.bi_sector + bio_sectors((bio)))
+
+#define bvec_iter_sectors(iter) ((iter).bi_size >> 9)
+#define bvec_iter_end_sector(iter) ((iter).bi_sector + bvec_iter_sectors((iter)))
+
+#define bio_sectors(bio) bvec_iter_sectors((bio)->bi_iter)
+#define bio_end_sector(bio) bvec_iter_end_sector((bio)->bi_iter)
/*
* Return the data direction, READ or WRITE.
@@ -406,13 +410,13 @@ static inline struct bio *bio_next_split(struct bio *bio, int sectors,
return bio_split(bio, sectors, gfp, bs);
}
-extern struct bio_set *bioset_create(unsigned int, unsigned int, int flags);
enum {
BIOSET_NEED_BVECS = BIT(0),
BIOSET_NEED_RESCUER = BIT(1),
};
-extern void bioset_free(struct bio_set *);
-extern mempool_t *biovec_create_pool(int pool_entries);
+extern int bioset_init(struct bio_set *, unsigned int, unsigned int, int flags);
+extern void bioset_exit(struct bio_set *);
+extern int biovec_init_pool(mempool_t *pool, int pool_entries);
extern struct bio *bio_alloc_bioset(gfp_t, unsigned int, struct bio_set *);
extern void bio_put(struct bio *);
@@ -421,11 +425,11 @@ extern void __bio_clone_fast(struct bio *, struct bio *);
extern struct bio *bio_clone_fast(struct bio *, gfp_t, struct bio_set *);
extern struct bio *bio_clone_bioset(struct bio *, gfp_t, struct bio_set *bs);
-extern struct bio_set *fs_bio_set;
+extern struct bio_set fs_bio_set;
static inline struct bio *bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs)
{
- return bio_alloc_bioset(gfp_mask, nr_iovecs, fs_bio_set);
+ return bio_alloc_bioset(gfp_mask, nr_iovecs, &fs_bio_set);
}
static inline struct bio *bio_kmalloc(gfp_t gfp_mask, unsigned int nr_iovecs)
@@ -499,7 +503,10 @@ static inline void bio_flush_dcache_pages(struct bio *bi)
}
#endif
+extern void bio_copy_data_iter(struct bio *dst, struct bvec_iter *dst_iter,
+ struct bio *src, struct bvec_iter *src_iter);
extern void bio_copy_data(struct bio *dst, struct bio *src);
+extern void bio_list_copy_data(struct bio *dst, struct bio *src);
extern void bio_free_pages(struct bio *bio);
extern struct bio *bio_copy_user_iov(struct request_queue *,
@@ -507,7 +514,13 @@ extern struct bio *bio_copy_user_iov(struct request_queue *,
struct iov_iter *,
gfp_t);
extern int bio_uncopy_user(struct bio *);
-void zero_fill_bio(struct bio *bio);
+void zero_fill_bio_iter(struct bio *bio, struct bvec_iter iter);
+
+static inline void zero_fill_bio(struct bio *bio)
+{
+ zero_fill_bio_iter(bio, bio->bi_iter);
+}
+
extern struct bio_vec *bvec_alloc(gfp_t, int, unsigned long *, mempool_t *);
extern void bvec_free(mempool_t *, struct bio_vec *, unsigned int);
extern unsigned int bvec_nr_vecs(unsigned short idx);
@@ -722,11 +735,11 @@ struct bio_set {
struct kmem_cache *bio_slab;
unsigned int front_pad;
- mempool_t *bio_pool;
- mempool_t *bvec_pool;
+ mempool_t bio_pool;
+ mempool_t bvec_pool;
#if defined(CONFIG_BLK_DEV_INTEGRITY)
- mempool_t *bio_integrity_pool;
- mempool_t *bvec_integrity_pool;
+ mempool_t bio_integrity_pool;
+ mempool_t bvec_integrity_pool;
#endif
/*
@@ -745,6 +758,11 @@ struct biovec_slab {
struct kmem_cache *slab;
};
+static inline bool bioset_initialized(struct bio_set *bs)
+{
+ return bs->bio_slab != NULL;
+}
+
/*
* a small number of entries is fine, not going to be performance critical.
* basically we just need to survive
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index ebc34a5686dc..fb355173f3c7 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -259,7 +259,8 @@ void blk_mq_add_to_requeue_list(struct request *rq, bool at_head,
void blk_mq_kick_requeue_list(struct request_queue *q);
void blk_mq_delay_kick_requeue_list(struct request_queue *q, unsigned long msecs);
void blk_mq_complete_request(struct request *rq);
-
+bool blk_mq_bio_list_merge(struct request_queue *q, struct list_head *list,
+ struct bio *bio);
bool blk_mq_queue_stopped(struct request_queue *q);
void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx);
void blk_mq_start_hw_queue(struct blk_mq_hw_ctx *hctx);
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 17b18b91ebac..3c4f390aea4b 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -8,6 +8,7 @@
#include <linux/types.h>
#include <linux/bvec.h>
+#include <linux/ktime.h>
struct bio_set;
struct bio;
@@ -90,10 +91,52 @@ static inline bool blk_path_error(blk_status_t error)
return true;
}
-struct blk_issue_stat {
- u64 stat;
+/*
+ * From most significant bit:
+ * 1 bit: reserved for other usage, see below
+ * 12 bits: original size of bio
+ * 51 bits: issue time of bio
+ */
+#define BIO_ISSUE_RES_BITS 1
+#define BIO_ISSUE_SIZE_BITS 12
+#define BIO_ISSUE_RES_SHIFT (64 - BIO_ISSUE_RES_BITS)
+#define BIO_ISSUE_SIZE_SHIFT (BIO_ISSUE_RES_SHIFT - BIO_ISSUE_SIZE_BITS)
+#define BIO_ISSUE_TIME_MASK ((1ULL << BIO_ISSUE_SIZE_SHIFT) - 1)
+#define BIO_ISSUE_SIZE_MASK \
+ (((1ULL << BIO_ISSUE_SIZE_BITS) - 1) << BIO_ISSUE_SIZE_SHIFT)
+#define BIO_ISSUE_RES_MASK (~((1ULL << BIO_ISSUE_RES_SHIFT) - 1))
+
+/* Reserved bit for blk-throtl */
+#define BIO_ISSUE_THROTL_SKIP_LATENCY (1ULL << 63)
+
+struct bio_issue {
+ u64 value;
};
+static inline u64 __bio_issue_time(u64 time)
+{
+ return time & BIO_ISSUE_TIME_MASK;
+}
+
+static inline u64 bio_issue_time(struct bio_issue *issue)
+{
+ return __bio_issue_time(issue->value);
+}
+
+static inline sector_t bio_issue_size(struct bio_issue *issue)
+{
+ return ((issue->value & BIO_ISSUE_SIZE_MASK) >> BIO_ISSUE_SIZE_SHIFT);
+}
+
+static inline void bio_issue_init(struct bio_issue *issue,
+ sector_t size)
+{
+ size &= (1ULL << BIO_ISSUE_SIZE_BITS) - 1;
+ issue->value = ((issue->value & BIO_ISSUE_RES_MASK) |
+ (ktime_get_ns() & BIO_ISSUE_TIME_MASK) |
+ ((u64)size << BIO_ISSUE_SIZE_SHIFT));
+}
+
/*
* main unit of I/O for the block layer and lower layers (ie drivers and
* stacking drivers)
@@ -138,7 +181,7 @@ struct bio {
struct cgroup_subsys_state *bi_css;
#ifdef CONFIG_BLK_DEV_THROTTLING_LOW
void *bi_cg_private;
- struct blk_issue_stat bi_issue_stat;
+ struct bio_issue bi_issue;
#endif
#endif
union {
@@ -186,6 +229,8 @@ struct bio {
* throttling rules. Don't do it again. */
#define BIO_TRACE_COMPLETION 10 /* bio_endio() should trace the final completion
* of this bio. */
+#define BIO_QUEUE_ENTERED 11 /* can use blk_queue_enter_live() */
+
/* See BVEC_POOL_OFFSET below before adding new flags */
/*
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 5c4eee043191..bca3a92eb55f 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -125,16 +125,23 @@ typedef __u32 __bitwise req_flags_t;
#define RQF_SPECIAL_PAYLOAD ((__force req_flags_t)(1 << 18))
/* The per-zone write lock is held for this request */
#define RQF_ZONE_WRITE_LOCKED ((__force req_flags_t)(1 << 19))
-/* timeout is expired */
-#define RQF_MQ_TIMEOUT_EXPIRED ((__force req_flags_t)(1 << 20))
/* already slept for hybrid poll */
-#define RQF_MQ_POLL_SLEPT ((__force req_flags_t)(1 << 21))
+#define RQF_MQ_POLL_SLEPT ((__force req_flags_t)(1 << 20))
/* flags that prevent us from merging requests: */
#define RQF_NOMERGE_FLAGS \
(RQF_STARTED | RQF_SOFTBARRIER | RQF_FLUSH_SEQ | RQF_SPECIAL_PAYLOAD)
/*
+ * Request state for blk-mq.
+ */
+enum mq_rq_state {
+ MQ_RQ_IDLE = 0,
+ MQ_RQ_IN_FLIGHT = 1,
+ MQ_RQ_COMPLETE = 2,
+};
+
+/*
* Try to put the fields that are referenced together in the same cacheline.
*
* If you modify this structure, make sure to update blk_rq_init() and
@@ -205,9 +212,20 @@ struct request {
struct gendisk *rq_disk;
struct hd_struct *part;
- unsigned long start_time;
- struct blk_issue_stat issue_stat;
- /* Number of scatter-gather DMA addr+len pairs after
+ /* Time that I/O was submitted to the kernel. */
+ u64 start_time_ns;
+ /* Time that I/O was submitted to the device. */
+ u64 io_start_time_ns;
+
+#ifdef CONFIG_BLK_WBT
+ unsigned short wbt_flags;
+#endif
+#ifdef CONFIG_BLK_DEV_THROTTLING_LOW
+ unsigned short throtl_size;
+#endif
+
+ /*
+ * Number of scatter-gather DMA addr+len pairs after
* physical address coalescing is performed.
*/
unsigned short nr_phys_segments;
@@ -219,32 +237,14 @@ struct request {
unsigned short write_hint;
unsigned short ioprio;
- unsigned int timeout;
-
void *special; /* opaque pointer available for LLD use */
unsigned int extra_len; /* length of alignment and padding */
- /*
- * On blk-mq, the lower bits of ->gstate (generation number and
- * state) carry the MQ_RQ_* state value and the upper bits the
- * generation number which is monotonically incremented and used to
- * distinguish the reuse instances.
- *
- * ->gstate_seq allows updates to ->gstate and other fields
- * (currently ->deadline) during request start to be read
- * atomically from the timeout path, so that it can operate on a
- * coherent set of information.
- */
- seqcount_t gstate_seq;
- u64 gstate;
+ enum mq_rq_state state;
+ refcount_t ref;
- /*
- * ->aborted_gstate is used by the timeout to claim a specific
- * recycle instance of this request. See blk_mq_timeout_work().
- */
- struct u64_stats_sync aborted_gstate_sync;
- u64 aborted_gstate;
+ unsigned int timeout;
/* access through blk_rq_set_deadline, blk_rq_deadline */
unsigned long __deadline;
@@ -267,8 +267,6 @@ struct request {
#ifdef CONFIG_BLK_CGROUP
struct request_list *rl; /* rl this rq is alloced from */
- unsigned long long start_time_ns;
- unsigned long long io_start_time_ns; /* when passed to hardware */
#endif
};
@@ -328,9 +326,8 @@ typedef int (init_rq_fn)(struct request_queue *, struct request *, gfp_t);
typedef void (exit_rq_fn)(struct request_queue *, struct request *);
enum blk_eh_timer_return {
- BLK_EH_NOT_HANDLED,
- BLK_EH_HANDLED,
- BLK_EH_RESET_TIMER,
+ BLK_EH_DONE, /* drivers has completed the command */
+ BLK_EH_RESET_TIMER, /* reset timer and try again */
};
typedef enum blk_eh_timer_return (rq_timed_out_fn)(struct request *);
@@ -655,7 +652,7 @@ struct request_queue {
struct blk_mq_tag_set *tag_set;
struct list_head tag_set_list;
- struct bio_set *bio_split;
+ struct bio_set bio_split;
#ifdef CONFIG_BLK_DEBUG_FS
struct dentry *debugfs_dir;
@@ -967,11 +964,8 @@ extern void blk_rq_init(struct request_queue *q, struct request *rq);
extern void blk_init_request_from_bio(struct request *req, struct bio *bio);
extern void blk_put_request(struct request *);
extern void __blk_put_request(struct request_queue *, struct request *);
-extern struct request *blk_get_request_flags(struct request_queue *,
- unsigned int op,
- blk_mq_req_flags_t flags);
extern struct request *blk_get_request(struct request_queue *, unsigned int op,
- gfp_t gfp_mask);
+ blk_mq_req_flags_t flags);
extern void blk_requeue_request(struct request_queue *, struct request *);
extern int blk_lld_busy(struct request_queue *q);
extern int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
@@ -1788,48 +1782,6 @@ int kblockd_schedule_work(struct work_struct *work);
int kblockd_schedule_work_on(int cpu, struct work_struct *work);
int kblockd_mod_delayed_work_on(int cpu, struct delayed_work *dwork, unsigned long delay);
-#ifdef CONFIG_BLK_CGROUP
-/*
- * This should not be using sched_clock(). A real patch is in progress
- * to fix this up, until that is in place we need to disable preemption
- * around sched_clock() in this function and set_io_start_time_ns().
- */
-static inline void set_start_time_ns(struct request *req)
-{
- preempt_disable();
- req->start_time_ns = sched_clock();
- preempt_enable();
-}
-
-static inline void set_io_start_time_ns(struct request *req)
-{
- preempt_disable();
- req->io_start_time_ns = sched_clock();
- preempt_enable();
-}
-
-static inline uint64_t rq_start_time_ns(struct request *req)
-{
- return req->start_time_ns;
-}
-
-static inline uint64_t rq_io_start_time_ns(struct request *req)
-{
- return req->io_start_time_ns;
-}
-#else
-static inline void set_start_time_ns(struct request *req) {}
-static inline void set_io_start_time_ns(struct request *req) {}
-static inline uint64_t rq_start_time_ns(struct request *req)
-{
- return 0;
-}
-static inline uint64_t rq_io_start_time_ns(struct request *req)
-{
- return 0;
-}
-#endif
-
#define MODULE_ALIAS_BLOCKDEV(major,minor) \
MODULE_ALIAS("block-major-" __stringify(major) "-" __stringify(minor))
#define MODULE_ALIAS_BLOCKDEV_MAJOR(major) \
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 486e65e3db26..469b20e1dd7e 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -31,6 +31,7 @@ struct bpf_map_ops {
void (*map_release)(struct bpf_map *map, struct file *map_file);
void (*map_free)(struct bpf_map *map);
int (*map_get_next_key)(struct bpf_map *map, void *key, void *next_key);
+ void (*map_release_uref)(struct bpf_map *map);
/* funcs callable from userspace and from eBPF programs */
void *(*map_lookup_elem)(struct bpf_map *map, void *key);
@@ -351,6 +352,7 @@ int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array,
struct bpf_prog **_prog, *__prog; \
struct bpf_prog_array *_array; \
u32 _ret = 1; \
+ preempt_disable(); \
rcu_read_lock(); \
_array = rcu_dereference(array); \
if (unlikely(check_non_null && !_array))\
@@ -362,6 +364,7 @@ int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array,
} \
_out: \
rcu_read_unlock(); \
+ preempt_enable_no_resched(); \
_ret; \
})
@@ -434,7 +437,6 @@ int bpf_stackmap_copy(struct bpf_map *map, void *key, void *value);
int bpf_fd_array_map_update_elem(struct bpf_map *map, struct file *map_file,
void *key, void *value, u64 map_flags);
int bpf_fd_array_map_lookup_elem(struct bpf_map *map, void *key, u32 *value);
-void bpf_fd_array_map_clear(struct bpf_map *map);
int bpf_fd_htab_map_update_elem(struct bpf_map *map, struct file *map_file,
void *key, void *value, u64 map_flags);
int bpf_fd_htab_map_lookup_elem(struct bpf_map *map, void *key, u32 *value);
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 7e61c395fddf..df36b1b08af0 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -142,10 +142,11 @@ struct bpf_verifier_state_list {
struct bpf_insn_aux_data {
union {
enum bpf_reg_type ptr_type; /* pointer type for load/store insns */
- struct bpf_map *map_ptr; /* pointer for call insn into lookup_elem */
+ unsigned long map_state; /* pointer/poison value for maps */
s32 call_imm; /* saved imm field of call insn */
};
int ctx_field_size; /* the ctx field size for load insn, maybe 0 */
+ int sanitize_stack_off; /* stack slot to be cleared */
bool seen; /* this insn was processed by the verifier */
};
diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h
index d3339dd48b1a..b324e01ccf2d 100644
--- a/include/linux/brcmphy.h
+++ b/include/linux/brcmphy.h
@@ -25,6 +25,7 @@
#define PHY_ID_BCM54612E 0x03625e60
#define PHY_ID_BCM54616S 0x03625d10
#define PHY_ID_BCM57780 0x03625d90
+#define PHY_ID_BCM89610 0x03625cd0
#define PHY_ID_BCM7250 0xae025280
#define PHY_ID_BCM7260 0xae025190
diff --git a/include/linux/bsg-lib.h b/include/linux/bsg-lib.h
index 28a7ccc55c89..6aeaf6472665 100644
--- a/include/linux/bsg-lib.h
+++ b/include/linux/bsg-lib.h
@@ -72,8 +72,7 @@ struct bsg_job {
void bsg_job_done(struct bsg_job *job, int result,
unsigned int reply_payload_rcv_len);
struct request_queue *bsg_setup_queue(struct device *dev, const char *name,
- bsg_job_fn *job_fn, int dd_job_size,
- void (*release)(struct device *));
+ bsg_job_fn *job_fn, int dd_job_size);
void bsg_job_put(struct bsg_job *job);
int __must_check bsg_job_get(struct bsg_job *job);
diff --git a/include/linux/bsg.h b/include/linux/bsg.h
index 0c7dd9ceb139..dac37b6e00ec 100644
--- a/include/linux/bsg.h
+++ b/include/linux/bsg.h
@@ -17,17 +17,13 @@ struct bsg_ops {
struct bsg_class_device {
struct device *class_dev;
- struct device *parent;
int minor;
struct request_queue *queue;
- struct kref ref;
const struct bsg_ops *ops;
- void (*release)(struct device *);
};
int bsg_register_queue(struct request_queue *q, struct device *parent,
- const char *name, const struct bsg_ops *ops,
- void (*release)(struct device *));
+ const char *name, const struct bsg_ops *ops);
int bsg_scsi_register_queue(struct request_queue *q, struct device *parent);
void bsg_unregister_queue(struct request_queue *q);
#else
diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index 528ccc943cee..96bb32285989 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -77,7 +77,10 @@ struct ceph_osd_data {
u32 bio_length;
};
#endif /* CONFIG_BLOCK */
- struct ceph_bvec_iter bvec_pos;
+ struct {
+ struct ceph_bvec_iter bvec_pos;
+ u32 num_bvecs;
+ };
};
};
@@ -412,6 +415,10 @@ void osd_req_op_extent_osd_data_bio(struct ceph_osd_request *osd_req,
struct ceph_bio_iter *bio_pos,
u32 bio_length);
#endif /* CONFIG_BLOCK */
+void osd_req_op_extent_osd_data_bvecs(struct ceph_osd_request *osd_req,
+ unsigned int which,
+ struct bio_vec *bvecs, u32 num_bvecs,
+ u32 bytes);
void osd_req_op_extent_osd_data_bvec_pos(struct ceph_osd_request *osd_req,
unsigned int which,
struct ceph_bvec_iter *bvec_pos);
@@ -426,7 +433,8 @@ extern void osd_req_op_cls_request_data_pages(struct ceph_osd_request *,
bool own_pages);
void osd_req_op_cls_request_data_bvecs(struct ceph_osd_request *osd_req,
unsigned int which,
- struct bio_vec *bvecs, u32 bytes);
+ struct bio_vec *bvecs, u32 num_bvecs,
+ u32 bytes);
extern void osd_req_op_cls_response_data_pages(struct ceph_osd_request *,
unsigned int which,
struct page **pages, u64 length,
diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h
index 210a890008f9..1d25e149c1c5 100644
--- a/include/linux/clk-provider.h
+++ b/include/linux/clk-provider.h
@@ -765,6 +765,9 @@ int __clk_mux_determine_rate(struct clk_hw *hw,
int __clk_determine_rate(struct clk_hw *core, struct clk_rate_request *req);
int __clk_mux_determine_rate_closest(struct clk_hw *hw,
struct clk_rate_request *req);
+int clk_mux_determine_rate_flags(struct clk_hw *hw,
+ struct clk_rate_request *req,
+ unsigned long flags);
void clk_hw_reparent(struct clk_hw *hw, struct clk_hw *new_parent);
void clk_hw_set_rate_range(struct clk_hw *hw, unsigned long min_rate,
unsigned long max_rate);
diff --git a/include/linux/compat.h b/include/linux/compat.h
index cfc1b6383ae0..b1a5562b3215 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -332,6 +332,7 @@ extern int put_compat_rusage(const struct rusage *,
struct compat_rusage __user *);
struct compat_siginfo;
+struct __compat_aio_sigset;
struct compat_dirent {
u32 d_ino;
@@ -555,6 +556,12 @@ asmlinkage long compat_sys_io_getevents(compat_aio_context_t ctx_id,
compat_long_t nr,
struct io_event __user *events,
struct compat_timespec __user *timeout);
+asmlinkage long compat_sys_io_pgetevents(compat_aio_context_t ctx_id,
+ compat_long_t min_nr,
+ compat_long_t nr,
+ struct io_event __user *events,
+ struct compat_timespec __user *timeout,
+ const struct __compat_aio_sigset __user *usig);
/* fs/cookies.c */
asmlinkage long compat_sys_lookup_dcookie(u32, u32, char __user *, compat_size_t);
diff --git a/include/linux/cper.h b/include/linux/cper.h
index d14ef4e77c8a..9c703a0abe6e 100644
--- a/include/linux/cper.h
+++ b/include/linux/cper.h
@@ -381,7 +381,7 @@ struct cper_sec_proc_generic {
/* IA32/X64 Processor Error Section */
struct cper_sec_proc_ia {
__u64 validation_bits;
- __u8 lapic_id;
+ __u64 lapic_id;
__u8 cpuid[48];
};
@@ -551,5 +551,7 @@ const char *cper_mem_err_unpack(struct trace_seq *,
struct cper_mem_err_compact *);
void cper_print_proc_arm(const char *pfx,
const struct cper_sec_proc_arm *proc);
+void cper_print_proc_ia(const char *pfx,
+ const struct cper_sec_proc_ia *proc);
#endif
diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index 7b01bc11c692..a97a63eef59f 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -53,6 +53,8 @@ extern ssize_t cpu_show_spectre_v1(struct device *dev,
struct device_attribute *attr, char *buf);
extern ssize_t cpu_show_spectre_v2(struct device *dev,
struct device_attribute *attr, char *buf);
+extern ssize_t cpu_show_spec_store_bypass(struct device *dev,
+ struct device_attribute *attr, char *buf);
extern __printf(4, 5)
struct device *cpu_device_create(struct device *parent, void *drvdata,
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 94acbde17bb1..66c6e17e61e5 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -224,6 +224,7 @@ extern seqlock_t rename_lock;
* These are the low-level FS interfaces to the dcache..
*/
extern void d_instantiate(struct dentry *, struct inode *);
+extern void d_instantiate_new(struct dentry *, struct inode *);
extern struct dentry * d_instantiate_unique(struct dentry *, struct inode *);
extern struct dentry * d_instantiate_anon(struct dentry *, struct inode *);
extern int d_instantiate_no_diralias(struct dentry *, struct inode *);
diff --git a/include/linux/delayacct.h b/include/linux/delayacct.h
index 5e335b6203f4..e6c0448ebcc7 100644
--- a/include/linux/delayacct.h
+++ b/include/linux/delayacct.h
@@ -29,7 +29,7 @@
#ifdef CONFIG_TASK_DELAY_ACCT
struct task_delay_info {
- spinlock_t lock;
+ raw_spinlock_t lock;
unsigned int flags; /* Private per-task flags */
/* For each stat XXX, add following, aligned appropriately
diff --git a/include/linux/device.h b/include/linux/device.h
index 477956990f5e..00b6c3b42437 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -88,6 +88,8 @@ extern void bus_remove_file(struct bus_type *, struct bus_attribute *);
* @resume: Called to bring a device on this bus out of sleep mode.
* @num_vf: Called to find out how many virtual functions a device on this
* bus supports.
+ * @dma_configure: Called to setup DMA configuration on a device on
+ this bus.
* @pm: Power management operations of this bus, callback the specific
* device driver's pm-ops.
* @iommu_ops: IOMMU specific operations for this bus, used to attach IOMMU
@@ -96,8 +98,6 @@ extern void bus_remove_file(struct bus_type *, struct bus_attribute *);
* @p: The private data of the driver core, only the driver core can
* touch this.
* @lock_key: Lock class key for use by the lock validator
- * @force_dma: Assume devices on this bus should be set up by dma_configure()
- * even if DMA capability is not explicitly described by firmware.
*
* A bus is a channel between the processor and one or more devices. For the
* purposes of the device model, all devices are connected via a bus, even if
@@ -130,14 +130,14 @@ struct bus_type {
int (*num_vf)(struct device *dev);
+ int (*dma_configure)(struct device *dev);
+
const struct dev_pm_ops *pm;
const struct iommu_ops *iommu_ops;
struct subsys_private *p;
struct lock_class_key lock_key;
-
- bool force_dma;
};
extern int __must_check bus_register(struct bus_type *bus);
@@ -904,6 +904,8 @@ struct dev_links_info {
* @offline: Set after successful invocation of bus type's .offline().
* @of_node_reused: Set if the device-tree node is shared with an ancestor
* device.
+ * @dma_32bit_limit: bridge limited to 32bit DMA even if the device itself
+ * indicates support for a higher limit in the dma_mask field.
*
* At the lowest level, every device in a Linux system is represented by an
* instance of struct device. The device structure contains the information
@@ -992,6 +994,7 @@ struct device {
bool offline_disabled:1;
bool offline:1;
bool of_node_reused:1;
+ bool dma_32bit_limit:1;
};
static inline struct device *kobj_to_dev(struct kobject *kobj)
diff --git a/include/linux/dma-debug.h b/include/linux/dma-debug.h
index c7d844f09c3a..a785f2507159 100644
--- a/include/linux/dma-debug.h
+++ b/include/linux/dma-debug.h
@@ -30,8 +30,6 @@ struct bus_type;
extern void dma_debug_add_bus(struct bus_type *bus);
-extern void dma_debug_init(u32 num_entries);
-
extern int dma_debug_resize_entries(u32 num_entries);
extern void debug_dma_map_page(struct device *dev, struct page *page,
@@ -100,10 +98,6 @@ static inline void dma_debug_add_bus(struct bus_type *bus)
{
}
-static inline void dma_debug_init(u32 num_entries)
-{
-}
-
static inline int dma_debug_resize_entries(u32 num_entries)
{
return 0;
diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h
index 53ad6a47f513..8d9f33febde5 100644
--- a/include/linux/dma-direct.h
+++ b/include/linux/dma-direct.h
@@ -59,6 +59,11 @@ void *dma_direct_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
gfp_t gfp, unsigned long attrs);
void dma_direct_free(struct device *dev, size_t size, void *cpu_addr,
dma_addr_t dma_addr, unsigned long attrs);
+dma_addr_t dma_direct_map_page(struct device *dev, struct page *page,
+ unsigned long offset, size_t size, enum dma_data_direction dir,
+ unsigned long attrs);
+int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int nents,
+ enum dma_data_direction dir, unsigned long attrs);
int dma_direct_supported(struct device *dev, u64 mask);
-
+int dma_direct_mapping_error(struct device *dev, dma_addr_t dma_addr);
#endif /* _LINUX_DMA_DIRECT_H */
diff --git a/include/linux/dma-iommu.h b/include/linux/dma-iommu.h
index 92f20832fd28..e8ca5e654277 100644
--- a/include/linux/dma-iommu.h
+++ b/include/linux/dma-iommu.h
@@ -17,6 +17,7 @@
#define __DMA_IOMMU_H
#ifdef __KERNEL__
+#include <linux/types.h>
#include <asm/errno.h>
#ifdef CONFIG_IOMMU_DMA
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index f8ab1c0f589e..f9cc309507d9 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -133,10 +133,10 @@ struct dma_map_ops {
#ifdef ARCH_HAS_DMA_GET_REQUIRED_MASK
u64 (*get_required_mask)(struct device *dev);
#endif
- int is_phys;
};
extern const struct dma_map_ops dma_direct_ops;
+extern const struct dma_map_ops dma_noncoherent_ops;
extern const struct dma_map_ops dma_virt_ops;
#define DMA_BIT_MASK(n) (((n) == 64) ? ~0ULL : ((1ULL<<(n))-1))
@@ -502,7 +502,7 @@ dma_get_sgtable_attrs(struct device *dev, struct sg_table *sgt, void *cpu_addr,
#define dma_get_sgtable(d, t, v, h, s) dma_get_sgtable_attrs(d, t, v, h, s, 0)
#ifndef arch_dma_alloc_attrs
-#define arch_dma_alloc_attrs(dev, flag) (true)
+#define arch_dma_alloc_attrs(dev) (true)
#endif
static inline void *dma_alloc_attrs(struct device *dev, size_t size,
@@ -521,7 +521,7 @@ static inline void *dma_alloc_attrs(struct device *dev, size_t size,
/* let the implementation decide on the zone to allocate from: */
flag &= ~(__GFP_DMA | __GFP_DMA32 | __GFP_HIGHMEM);
- if (!arch_dma_alloc_attrs(&dev, &flag))
+ if (!arch_dma_alloc_attrs(&dev))
return NULL;
if (!ops->alloc)
return NULL;
@@ -572,14 +572,6 @@ static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
return 0;
}
-/*
- * This is a hack for the legacy x86 forbid_dac and iommu_sac_force. Please
- * don't use this in new code.
- */
-#ifndef arch_dma_supported
-#define arch_dma_supported(dev, mask) (1)
-#endif
-
static inline void dma_check_mask(struct device *dev, u64 mask)
{
if (sme_active() && (mask < (((u64)sme_get_me_mask() << 1) - 1)))
@@ -592,9 +584,6 @@ static inline int dma_supported(struct device *dev, u64 mask)
if (!ops)
return 0;
- if (!arch_dma_supported(dev, mask))
- return 0;
-
if (!ops->dma_supported)
return 1;
return ops->dma_supported(dev, mask);
@@ -839,7 +828,7 @@ static inline int dma_mmap_wc(struct device *dev,
#define dma_mmap_writecombine dma_mmap_wc
#endif
-#if defined(CONFIG_NEED_DMA_MAP_STATE) || defined(CONFIG_DMA_API_DEBUG)
+#ifdef CONFIG_NEED_DMA_MAP_STATE
#define DEFINE_DMA_UNMAP_ADDR(ADDR_NAME) dma_addr_t ADDR_NAME
#define DEFINE_DMA_UNMAP_LEN(LEN_NAME) __u32 LEN_NAME
#define dma_unmap_addr(PTR, ADDR_NAME) ((PTR)->ADDR_NAME)
diff --git a/include/linux/dma-noncoherent.h b/include/linux/dma-noncoherent.h
new file mode 100644
index 000000000000..10b2654d549b
--- /dev/null
+++ b/include/linux/dma-noncoherent.h
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_DMA_NONCOHERENT_H
+#define _LINUX_DMA_NONCOHERENT_H 1
+
+#include <linux/dma-mapping.h>
+
+void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
+ gfp_t gfp, unsigned long attrs);
+void arch_dma_free(struct device *dev, size_t size, void *cpu_addr,
+ dma_addr_t dma_addr, unsigned long attrs);
+
+#ifdef CONFIG_DMA_NONCOHERENT_MMAP
+int arch_dma_mmap(struct device *dev, struct vm_area_struct *vma,
+ void *cpu_addr, dma_addr_t dma_addr, size_t size,
+ unsigned long attrs);
+#else
+#define arch_dma_mmap NULL
+#endif /* CONFIG_DMA_NONCOHERENT_MMAP */
+
+#ifdef CONFIG_DMA_NONCOHERENT_CACHE_SYNC
+void arch_dma_cache_sync(struct device *dev, void *vaddr, size_t size,
+ enum dma_data_direction direction);
+#else
+#define arch_dma_cache_sync NULL
+#endif /* CONFIG_DMA_NONCOHERENT_CACHE_SYNC */
+
+#ifdef CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE
+void arch_sync_dma_for_device(struct device *dev, phys_addr_t paddr,
+ size_t size, enum dma_data_direction dir);
+#else
+static inline void arch_sync_dma_for_device(struct device *dev,
+ phys_addr_t paddr, size_t size, enum dma_data_direction dir)
+{
+}
+#endif /* ARCH_HAS_SYNC_DMA_FOR_DEVICE */
+
+#ifdef CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU
+void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr,
+ size_t size, enum dma_data_direction dir);
+#else
+static inline void arch_sync_dma_for_cpu(struct device *dev,
+ phys_addr_t paddr, size_t size, enum dma_data_direction dir)
+{
+}
+#endif /* ARCH_HAS_SYNC_DMA_FOR_CPU */
+
+#endif /* _LINUX_DMA_NONCOHERENT_H */
diff --git a/include/linux/efi.h b/include/linux/efi.h
index f1b7d68ac460..56add823f190 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -395,9 +395,9 @@ typedef struct {
u32 attributes;
u32 get_bar_attributes;
u32 set_bar_attributes;
- uint64_t romsize;
- void *romimage;
-} efi_pci_io_protocol_32;
+ u64 romsize;
+ u32 romimage;
+} efi_pci_io_protocol_32_t;
typedef struct {
u64 poll_mem;
@@ -415,9 +415,9 @@ typedef struct {
u64 attributes;
u64 get_bar_attributes;
u64 set_bar_attributes;
- uint64_t romsize;
- void *romimage;
-} efi_pci_io_protocol_64;
+ u64 romsize;
+ u64 romimage;
+} efi_pci_io_protocol_64_t;
typedef struct {
void *poll_mem;
@@ -437,7 +437,7 @@ typedef struct {
void *set_bar_attributes;
uint64_t romsize;
void *romimage;
-} efi_pci_io_protocol;
+} efi_pci_io_protocol_t;
#define EFI_PCI_IO_ATTRIBUTE_ISA_MOTHERBOARD_IO 0x0001
#define EFI_PCI_IO_ATTRIBUTE_ISA_IO 0x0002
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index 6d9e230dffd2..a02deea30185 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -218,8 +218,6 @@ extern void elv_unregister(struct elevator_type *);
extern ssize_t elv_iosched_show(struct request_queue *, char *);
extern ssize_t elv_iosched_store(struct request_queue *, const char *, size_t);
-extern int elevator_init(struct request_queue *, char *);
-extern void elevator_exit(struct request_queue *, struct elevator_queue *);
extern bool elv_bio_merge_ok(struct request *, struct bio *);
extern struct elevator_queue *elevator_alloc(struct request_queue *,
struct elevator_type *);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 760d8da1b6c7..d4c37d371da5 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -94,7 +94,7 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
/*
* flags in file.f_mode. Note that FMODE_READ and FMODE_WRITE must correspond
- * to O_WRONLY and O_RDWR via the strange trick in __dentry_open()
+ * to O_WRONLY and O_RDWR via the strange trick in do_dentry_open()
*/
/* file is open for reading */
@@ -1250,7 +1250,7 @@ static inline int locks_lock_file_wait(struct file *filp, struct file_lock *fl)
}
struct fasync_struct {
- spinlock_t fa_lock;
+ rwlock_t fa_lock;
int magic;
int fa_fd;
struct fasync_struct *fa_next; /* singly linked list */
@@ -1711,6 +1711,8 @@ struct file_operations {
int (*iterate) (struct file *, struct dir_context *);
int (*iterate_shared) (struct file *, struct dir_context *);
__poll_t (*poll) (struct file *, struct poll_table_struct *);
+ struct wait_queue_head * (*get_poll_head)(struct file *, __poll_t);
+ __poll_t (*poll_mask) (struct file *, __poll_t);
long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
long (*compat_ioctl) (struct file *, unsigned int, unsigned long);
int (*mmap) (struct file *, struct vm_area_struct *);
@@ -2570,7 +2572,7 @@ extern bool is_bad_inode(struct inode *);
#ifdef CONFIG_BLOCK
extern void check_disk_size_change(struct gendisk *disk,
- struct block_device *bdev);
+ struct block_device *bdev, bool verbose);
extern int revalidate_disk(struct gendisk *);
extern int check_disk_change(struct block_device *);
extern int __invalidate_device(struct block_device *, bool);
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index c826b0b5232a..6cb8a5789668 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -368,7 +368,9 @@ static inline void free_part_stats(struct hd_struct *part)
part_stat_add(cpu, gendiskp, field, -subnd)
void part_in_flight(struct request_queue *q, struct hd_struct *part,
- unsigned int inflight[2]);
+ unsigned int inflight[2]);
+void part_in_flight_rw(struct request_queue *q, struct hd_struct *part,
+ unsigned int inflight[2]);
void part_dec_in_flight(struct request_queue *q, struct hd_struct *part,
int rw);
void part_inc_in_flight(struct request_queue *q, struct hd_struct *part,
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 1a4582b44d32..fc5ab85278d5 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -464,7 +464,7 @@ static inline struct page *
__alloc_pages_node(int nid, gfp_t gfp_mask, unsigned int order)
{
VM_BUG_ON(nid < 0 || nid >= MAX_NUMNODES);
- VM_WARN_ON(!node_online(nid));
+ VM_WARN_ON((gfp_mask & __GFP_THISNODE) && !node_online(nid));
return __alloc_pages(gfp_mask, order, nid);
}
diff --git a/include/linux/hmm.h b/include/linux/hmm.h
index 39988924de3a..2f1327c37a63 100644
--- a/include/linux/hmm.h
+++ b/include/linux/hmm.h
@@ -16,7 +16,7 @@
/*
* Heterogeneous Memory Management (HMM)
*
- * See Documentation/vm/hmm.txt for reasons and overview of what HMM is and it
+ * See Documentation/vm/hmm.rst for reasons and overview of what HMM is and it
* is for. Here we focus on the HMM API description, with some explanation of
* the underlying implementation.
*
diff --git a/include/linux/ide.h b/include/linux/ide.h
index ca9d34feb572..c74b0321922a 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -961,7 +961,7 @@ __IDE_PROC_DEVSET(_name, _min, _max, NULL, NULL)
typedef struct {
const char *name;
umode_t mode;
- const struct file_operations *proc_fops;
+ int (*show)(struct seq_file *, void *);
} ide_proc_entry_t;
void proc_ide_create(void);
@@ -973,8 +973,8 @@ void ide_proc_unregister_port(ide_hwif_t *);
void ide_proc_register_driver(ide_drive_t *, struct ide_driver *);
void ide_proc_unregister_driver(ide_drive_t *, struct ide_driver *);
-extern const struct file_operations ide_capacity_proc_fops;
-extern const struct file_operations ide_geometry_proc_fops;
+int ide_capacity_proc_show(struct seq_file *m, void *v);
+int ide_geometry_proc_show(struct seq_file *m, void *v);
#else
static inline void proc_ide_create(void) { ; }
static inline void proc_ide_destroy(void) { ; }
@@ -1508,8 +1508,6 @@ static inline void ide_set_hwifdata (ide_hwif_t * hwif, void *data)
hwif->hwif_data = data;
}
-extern void ide_toggle_bounce(ide_drive_t *drive, int on);
-
u64 ide_get_lba_addr(struct ide_cmd *, int);
u8 ide_dump_status(ide_drive_t *, const char *, u8);
diff --git a/include/linux/iio/buffer_impl.h b/include/linux/iio/buffer_impl.h
index b9e22b7e2f28..d1171db23742 100644
--- a/include/linux/iio/buffer_impl.h
+++ b/include/linux/iio/buffer_impl.h
@@ -53,7 +53,7 @@ struct iio_buffer_access_funcs {
int (*request_update)(struct iio_buffer *buffer);
int (*set_bytes_per_datum)(struct iio_buffer *buffer, size_t bpd);
- int (*set_length)(struct iio_buffer *buffer, int length);
+ int (*set_length)(struct iio_buffer *buffer, unsigned int length);
int (*enable)(struct iio_buffer *buffer, struct iio_dev *indio_dev);
int (*disable)(struct iio_buffer *buffer, struct iio_dev *indio_dev);
@@ -72,10 +72,10 @@ struct iio_buffer_access_funcs {
*/
struct iio_buffer {
/** @length: Number of datums in buffer. */
- int length;
+ unsigned int length;
/** @bytes_per_datum: Size of individual datum including timestamp. */
- int bytes_per_datum;
+ size_t bytes_per_datum;
/**
* @access: Buffer access functions associated with the
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 5426627f9c55..eeceac3376fc 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -432,11 +432,18 @@ extern bool force_irqthreads;
#define force_irqthreads (0)
#endif
-#ifndef __ARCH_SET_SOFTIRQ_PENDING
-#define set_softirq_pending(x) (local_softirq_pending() = (x))
-#define or_softirq_pending(x) (local_softirq_pending() |= (x))
+#ifndef local_softirq_pending
+
+#ifndef local_softirq_pending_ref
+#define local_softirq_pending_ref irq_stat.__softirq_pending
#endif
+#define local_softirq_pending() (__this_cpu_read(local_softirq_pending_ref))
+#define set_softirq_pending(x) (__this_cpu_write(local_softirq_pending_ref, (x)))
+#define or_softirq_pending(x) (__this_cpu_or(local_softirq_pending_ref, (x)))
+
+#endif /* local_softirq_pending */
+
/* Some architectures might implement lazy enabling/disabling of
* interrupts. In some cases, such as stop_machine, we might want
* to ensure that after a local_irq_disable(), interrupts have
diff --git a/include/linux/iommu-helper.h b/include/linux/iommu-helper.h
index cb9a9248c8c0..70d01edcbf8b 100644
--- a/include/linux/iommu-helper.h
+++ b/include/linux/iommu-helper.h
@@ -2,6 +2,7 @@
#ifndef _LINUX_IOMMU_HELPER_H
#define _LINUX_IOMMU_HELPER_H
+#include <linux/bug.h>
#include <linux/kernel.h>
static inline unsigned long iommu_device_max_index(unsigned long size,
@@ -14,9 +15,15 @@ static inline unsigned long iommu_device_max_index(unsigned long size,
return size;
}
-extern int iommu_is_span_boundary(unsigned int index, unsigned int nr,
- unsigned long shift,
- unsigned long boundary_size);
+static inline int iommu_is_span_boundary(unsigned int index, unsigned int nr,
+ unsigned long shift, unsigned long boundary_size)
+{
+ BUG_ON(!is_power_of_2(boundary_size));
+
+ shift = (shift + index) & (boundary_size - 1);
+ return shift + nr > boundary_size;
+}
+
extern unsigned long iommu_area_alloc(unsigned long *map, unsigned long size,
unsigned long start, unsigned int nr,
unsigned long shift,
diff --git a/include/linux/irq.h b/include/linux/irq.h
index 65916a305f3d..b2067083aa94 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -512,6 +512,7 @@ enum {
IRQCHIP_SKIP_SET_WAKE = (1 << 4),
IRQCHIP_ONESHOT_SAFE = (1 << 5),
IRQCHIP_EOI_THREADED = (1 << 6),
+ IRQCHIP_SUPPORTS_LEVEL_MSI = (1 << 7),
};
#include <linux/irqdesc.h>
diff --git a/include/linux/irq_cpustat.h b/include/linux/irq_cpustat.h
index 4954948d1973..6e8895cd4d92 100644
--- a/include/linux/irq_cpustat.h
+++ b/include/linux/irq_cpustat.h
@@ -18,15 +18,11 @@
*/
#ifndef __ARCH_IRQ_STAT
-extern irq_cpustat_t irq_stat[]; /* defined in asm/hardirq.h */
-#define __IRQ_STAT(cpu, member) (irq_stat[cpu].member)
+DECLARE_PER_CPU_ALIGNED(irq_cpustat_t, irq_stat); /* defined in asm/hardirq.h */
+#define __IRQ_STAT(cpu, member) (per_cpu(irq_stat.member, cpu))
#endif
- /* arch independent irq_stat fields */
-#define local_softirq_pending() \
- __IRQ_STAT(smp_processor_id(), __softirq_pending)
-
- /* arch dependent irq_stat fields */
+/* arch dependent irq_stat fields */
#define nmi_count(cpu) __IRQ_STAT((cpu), __nmi_count) /* i386 */
#endif /* __irq_cpustat_h */
diff --git a/include/linux/irq_sim.h b/include/linux/irq_sim.h
index 0380d899b955..630a57e55db6 100644
--- a/include/linux/irq_sim.h
+++ b/include/linux/irq_sim.h
@@ -1,14 +1,11 @@
-#ifndef _LINUX_IRQ_SIM_H
-#define _LINUX_IRQ_SIM_H
+/* SPDX-License-Identifier: GPL-2.0+ */
/*
- * Copyright (C) 2017 Bartosz Golaszewski <brgl@bgdev.pl>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
+ * Copyright (C) 2017-2018 Bartosz Golaszewski <brgl@bgdev.pl>
*/
+#ifndef _LINUX_IRQ_SIM_H
+#define _LINUX_IRQ_SIM_H
+
#include <linux/irq_work.h>
#include <linux/device.h>
diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h
index f5af3b594e6e..cbb872c1b607 100644
--- a/include/linux/irqchip/arm-gic-v3.h
+++ b/include/linux/irqchip/arm-gic-v3.h
@@ -587,6 +587,7 @@ struct fwnode_handle;
int its_cpu_init(void);
int its_init(struct fwnode_handle *handle, struct rdists *rdists,
struct irq_domain *domain);
+int mbi_init(struct fwnode_handle *fwnode, struct irq_domain *parent);
static inline bool gic_enable_sre(void)
{
diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index 48c7e86bb556..dccfa65aee96 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -301,7 +301,13 @@ static inline struct irq_domain *irq_find_matching_host(struct device_node *node
static inline struct irq_domain *irq_find_host(struct device_node *node)
{
- return irq_find_matching_host(node, DOMAIN_BUS_ANY);
+ struct irq_domain *d;
+
+ d = irq_find_matching_host(node, DOMAIN_BUS_WIRED);
+ if (!d)
+ d = irq_find_matching_host(node, DOMAIN_BUS_ANY);
+
+ return d;
}
/**
diff --git a/include/linux/isdn/capilli.h b/include/linux/isdn/capilli.h
index 11b57c485854..d75e1ad72964 100644
--- a/include/linux/isdn/capilli.h
+++ b/include/linux/isdn/capilli.h
@@ -50,7 +50,7 @@ struct capi_ctr {
u16 (*send_message)(struct capi_ctr *, struct sk_buff *skb);
char *(*procinfo)(struct capi_ctr *);
- const struct file_operations *proc_fops;
+ int (*proc_show)(struct seq_file *, void *);
/* filled in before calling ready callback */
u8 manu[CAPI_MANUFACTURER_LEN]; /* CAPI_GET_MANUFACTURER */
diff --git a/include/linux/kthread.h b/include/linux/kthread.h
index c1961761311d..2803264c512f 100644
--- a/include/linux/kthread.h
+++ b/include/linux/kthread.h
@@ -62,6 +62,7 @@ void *kthread_probe_data(struct task_struct *k);
int kthread_park(struct task_struct *k);
void kthread_unpark(struct task_struct *k);
void kthread_parkme(void);
+void kthread_park_complete(struct task_struct *k);
int kthreadd(void *unused);
extern struct task_struct *kthreadd_task;
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 6930c63126c7..6d6e79c59e68 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1045,13 +1045,7 @@ static inline int mmu_notifier_retry(struct kvm *kvm, unsigned long mmu_seq)
#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
-#ifdef CONFIG_S390
-#define KVM_MAX_IRQ_ROUTES 4096 //FIXME: we can have more than that...
-#elif defined(CONFIG_ARM64)
-#define KVM_MAX_IRQ_ROUTES 4096
-#else
-#define KVM_MAX_IRQ_ROUTES 1024
-#endif
+#define KVM_MAX_IRQ_ROUTES 4096 /* might need extension/rework in the future */
bool kvm_arch_can_set_irq_routing(struct kvm *kvm);
int kvm_set_irq_routing(struct kvm *kvm,
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 1795fecdea17..1c113134c98f 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -1133,7 +1133,6 @@ extern int ata_sas_port_start(struct ata_port *ap);
extern void ata_sas_port_stop(struct ata_port *ap);
extern int ata_sas_slave_configure(struct scsi_device *, struct ata_port *);
extern int ata_sas_queuecmd(struct scsi_cmnd *cmd, struct ata_port *ap);
-extern enum blk_eh_timer_return ata_scsi_timed_out(struct scsi_cmnd *cmd);
extern int sata_scr_valid(struct ata_link *link);
extern int sata_scr_read(struct ata_link *link, int reg, u32 *val);
extern int sata_scr_write(struct ata_link *link, int reg, u32 val);
@@ -1359,7 +1358,6 @@ extern struct device_attribute *ata_common_sdev_attrs[];
.proc_name = drv_name, \
.slave_configure = ata_scsi_slave_config, \
.slave_destroy = ata_scsi_slave_destroy, \
- .eh_timed_out = ata_scsi_timed_out, \
.bios_param = ata_std_bios_param, \
.unlock_native_capacity = ata_scsi_unlock_native_capacity, \
.sdev_attrs = ata_common_sdev_attrs
diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h
index 6e0859b9d4d2..e9e0d1c7eaf5 100644
--- a/include/linux/lightnvm.h
+++ b/include/linux/lightnvm.h
@@ -489,7 +489,7 @@ typedef blk_qc_t (nvm_tgt_make_rq_fn)(struct request_queue *, struct bio *);
typedef sector_t (nvm_tgt_capacity_fn)(void *);
typedef void *(nvm_tgt_init_fn)(struct nvm_tgt_dev *, struct gendisk *,
int flags);
-typedef void (nvm_tgt_exit_fn)(void *);
+typedef void (nvm_tgt_exit_fn)(void *, bool);
typedef int (nvm_tgt_sysfs_init_fn)(struct gendisk *);
typedef void (nvm_tgt_sysfs_exit_fn)(struct gendisk *);
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index e0e49b5b1ee1..2b0265265c28 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -216,6 +216,9 @@ void put_online_mems(void);
void mem_hotplug_begin(void);
void mem_hotplug_done(void);
+extern void set_zone_contiguous(struct zone *zone);
+extern void clear_zone_contiguous(struct zone *zone);
+
#else /* ! CONFIG_MEMORY_HOTPLUG */
#define pfn_to_online_page(pfn) \
({ \
diff --git a/include/linux/mempool.h b/include/linux/mempool.h
index b51f5c430c26..0c964ac107c2 100644
--- a/include/linux/mempool.h
+++ b/include/linux/mempool.h
@@ -25,6 +25,18 @@ typedef struct mempool_s {
wait_queue_head_t wait;
} mempool_t;
+static inline bool mempool_initialized(mempool_t *pool)
+{
+ return pool->elements != NULL;
+}
+
+void mempool_exit(mempool_t *pool);
+int mempool_init_node(mempool_t *pool, int min_nr, mempool_alloc_t *alloc_fn,
+ mempool_free_t *free_fn, void *pool_data,
+ gfp_t gfp_mask, int node_id);
+int mempool_init(mempool_t *pool, int min_nr, mempool_alloc_t *alloc_fn,
+ mempool_free_t *free_fn, void *pool_data);
+
extern mempool_t *mempool_create(int min_nr, mempool_alloc_t *alloc_fn,
mempool_free_t *free_fn, void *pool_data);
extern mempool_t *mempool_create_node(int min_nr, mempool_alloc_t *alloc_fn,
@@ -43,6 +55,14 @@ extern void mempool_free(void *element, mempool_t *pool);
*/
void *mempool_alloc_slab(gfp_t gfp_mask, void *pool_data);
void mempool_free_slab(void *element, void *pool_data);
+
+static inline int
+mempool_init_slab_pool(mempool_t *pool, int min_nr, struct kmem_cache *kc)
+{
+ return mempool_init(pool, min_nr, mempool_alloc_slab,
+ mempool_free_slab, (void *) kc);
+}
+
static inline mempool_t *
mempool_create_slab_pool(int min_nr, struct kmem_cache *kc)
{
@@ -56,6 +76,13 @@ mempool_create_slab_pool(int min_nr, struct kmem_cache *kc)
*/
void *mempool_kmalloc(gfp_t gfp_mask, void *pool_data);
void mempool_kfree(void *element, void *pool_data);
+
+static inline int mempool_init_kmalloc_pool(mempool_t *pool, int min_nr, size_t size)
+{
+ return mempool_init(pool, min_nr, mempool_kmalloc,
+ mempool_kfree, (void *) size);
+}
+
static inline mempool_t *mempool_create_kmalloc_pool(int min_nr, size_t size)
{
return mempool_create(min_nr, mempool_kmalloc, mempool_kfree,
@@ -68,6 +95,13 @@ static inline mempool_t *mempool_create_kmalloc_pool(int min_nr, size_t size)
*/
void *mempool_alloc_pages(gfp_t gfp_mask, void *pool_data);
void mempool_free_pages(void *element, void *pool_data);
+
+static inline int mempool_init_page_pool(mempool_t *pool, int min_nr, int order)
+{
+ return mempool_init(pool, min_nr, mempool_alloc_pages,
+ mempool_free_pages, (void *)(long)order);
+}
+
static inline mempool_t *mempool_create_page_pool(int min_nr, int order)
{
return mempool_create(min_nr, mempool_alloc_pages, mempool_free_pages,
diff --git a/include/linux/memremap.h b/include/linux/memremap.h
index 7b4899c06f49..74ea5e2310a8 100644
--- a/include/linux/memremap.h
+++ b/include/linux/memremap.h
@@ -45,7 +45,7 @@ struct vmem_altmap {
* must be treated as an opaque object, rather than a "normal" struct page.
*
* A more complete discussion of unaddressable memory may be found in
- * include/linux/hmm.h and Documentation/vm/hmm.txt.
+ * include/linux/hmm.h and Documentation/vm/hmm.rst.
*
* MEMORY_DEVICE_PUBLIC:
* Device memory that is cache coherent from device and CPU point of view. This
@@ -67,7 +67,7 @@ enum memory_type {
* page_free()
*
* Additional notes about MEMORY_DEVICE_PRIVATE may be found in
- * include/linux/hmm.h and Documentation/vm/hmm.txt. There is also a brief
+ * include/linux/hmm.h and Documentation/vm/hmm.rst. There is also a brief
* explanation in include/linux/memory_hotplug.h.
*
* The page_fault() callback must migrate page back, from device memory to
diff --git a/include/linux/mfd/cros_ec.h b/include/linux/mfd/cros_ec.h
index 2d4e23c9ea0a..f09e9cf2e4ab 100644
--- a/include/linux/mfd/cros_ec.h
+++ b/include/linux/mfd/cros_ec.h
@@ -197,6 +197,8 @@ struct cros_ec_dev {
u32 features[2];
};
+#define to_cros_ec_dev(dev) container_of(dev, struct cros_ec_dev, class_dev)
+
/**
* cros_ec_suspend - Handle a suspend operation for the ChromeOS EC device
*
diff --git a/include/linux/mfd/mc13xxx.h b/include/linux/mfd/mc13xxx.h
index 638222e43e48..54a3cd808f9e 100644
--- a/include/linux/mfd/mc13xxx.h
+++ b/include/linux/mfd/mc13xxx.h
@@ -243,6 +243,8 @@ struct mc13xxx_platform_data {
#define MC13XXX_ADC0_LICELLCON (1 << 0)
#define MC13XXX_ADC0_CHRGICON (1 << 1)
#define MC13XXX_ADC0_BATICON (1 << 2)
+#define MC13XXX_ADC0_ADIN7SEL_DIE (1 << 4)
+#define MC13XXX_ADC0_ADIN7SEL_UID (2 << 4)
#define MC13XXX_ADC0_ADREFEN (1 << 10)
#define MC13XXX_ADC0_TSMOD0 (1 << 12)
#define MC13XXX_ADC0_TSMOD1 (1 << 13)
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 767d193c269a..d703774982ca 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -1284,25 +1284,9 @@ enum {
};
static inline const struct cpumask *
-mlx5_get_vector_affinity(struct mlx5_core_dev *dev, int vector)
+mlx5_get_vector_affinity_hint(struct mlx5_core_dev *dev, int vector)
{
- const struct cpumask *mask;
- struct irq_desc *desc;
- unsigned int irq;
- int eqn;
- int err;
-
- err = mlx5_vector2eqn(dev, MLX5_EQ_VEC_COMP_BASE + vector, &eqn, &irq);
- if (err)
- return NULL;
-
- desc = irq_to_desc(irq);
-#ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK
- mask = irq_data_get_effective_affinity_mask(&desc->irq_data);
-#else
- mask = desc->irq_common_data.affinity;
-#endif
- return mask;
+ return dev->priv.irq_info[vector].mask;
}
#endif /* MLX5_DRIVER_H */
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 1ac1f06a4be6..02a616e2f17d 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2109,7 +2109,6 @@ extern void setup_per_cpu_pageset(void);
extern void zone_pcp_update(struct zone *zone);
extern void zone_pcp_reset(struct zone *zone);
-extern void setup_zone_pageset(struct zone *zone);
/* page_alloc.c */
extern int min_free_kbytes;
@@ -2466,6 +2465,13 @@ static inline vm_fault_t vmf_insert_pfn(struct vm_area_struct *vma,
return VM_FAULT_NOPAGE;
}
+static inline vm_fault_t vmf_error(int err)
+{
+ if (err == -ENOMEM)
+ return VM_FAULT_OOM;
+ return VM_FAULT_SIGBUS;
+}
+
struct page *follow_page_mask(struct vm_area_struct *vma,
unsigned long address, unsigned int foll_flags,
unsigned int *page_mask);
@@ -2493,6 +2499,7 @@ static inline struct page *follow_page(struct vm_area_struct *vma,
#define FOLL_MLOCK 0x1000 /* lock present pages */
#define FOLL_REMOTE 0x2000 /* we are working on non-current tsk/mm */
#define FOLL_COW 0x4000 /* internal GUP flag */
+#define FOLL_ANON 0x8000 /* don't do file mappings */
static inline int vm_fault_to_errno(int vm_fault, int foll_flags)
{
diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h
index 2d07a1ed5a31..392e6af82701 100644
--- a/include/linux/mmu_notifier.h
+++ b/include/linux/mmu_notifier.h
@@ -174,7 +174,7 @@ struct mmu_notifier_ops {
* invalidate_range_start()/end() notifiers, as
* invalidate_range() alread catches the points in time when an
* external TLB range needs to be flushed. For more in depth
- * discussion on this see Documentation/vm/mmu_notifier.txt
+ * discussion on this see Documentation/vm/mmu_notifier.rst
*
* Note that this function might be called with just a sub-range
* of what was passed to invalidate_range_start()/end(), if
diff --git a/include/linux/msi.h b/include/linux/msi.h
index 1f1bbb5b4679..5839d8062dfc 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -289,6 +289,8 @@ enum {
* MSI_FLAG_ACTIVATE_EARLY has been set.
*/
MSI_FLAG_MUST_REACTIVATE = (1 << 5),
+ /* Is level-triggered capable, using two messages */
+ MSI_FLAG_LEVEL_CAPABLE = (1 << 6),
};
int msi_domain_set_affinity(struct irq_data *data, const struct cpumask *mask,
diff --git a/include/linux/mtd/map.h b/include/linux/mtd/map.h
index b5b43f94f311..01b990e4b228 100644
--- a/include/linux/mtd/map.h
+++ b/include/linux/mtd/map.h
@@ -312,7 +312,7 @@ void map_destroy(struct mtd_info *mtd);
({ \
int i, ret = 1; \
for (i = 0; i < map_words(map); i++) { \
- if (((val1).x[i] & (val2).x[i]) != (val2).x[i]) { \
+ if (((val1).x[i] & (val2).x[i]) != (val3).x[i]) { \
ret = 0; \
break; \
} \
diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h
index 5dad59b31244..17c919436f48 100644
--- a/include/linux/mtd/rawnand.h
+++ b/include/linux/mtd/rawnand.h
@@ -867,12 +867,18 @@ struct nand_op_instr {
* tBERS (during an erase) which all of them are u64 values that cannot be
* divided by usual kernel macros and must be handled with the special
* DIV_ROUND_UP_ULL() macro.
+ *
+ * Cast to type of dividend is needed here to guarantee that the result won't
+ * be an unsigned long long when the dividend is an unsigned long (or smaller),
+ * which is what the compiler does when it sees ternary operator with 2
+ * different return types (picks the largest type to make sure there's no
+ * loss).
*/
-#define __DIVIDE(dividend, divisor) ({ \
- sizeof(dividend) == sizeof(u32) ? \
- DIV_ROUND_UP(dividend, divisor) : \
- DIV_ROUND_UP_ULL(dividend, divisor); \
- })
+#define __DIVIDE(dividend, divisor) ({ \
+ (__typeof__(dividend))(sizeof(dividend) <= sizeof(unsigned long) ? \
+ DIV_ROUND_UP(dividend, divisor) : \
+ DIV_ROUND_UP_ULL(dividend, divisor)); \
+ })
#define PSEC_TO_NSEC(x) __DIVIDE(x, 1000)
#define PSEC_TO_MSEC(x) __DIVIDE(x, 1000000000)
diff --git a/include/linux/mutex.h b/include/linux/mutex.h
index 14bc0d5d0ee5..3093dd162424 100644
--- a/include/linux/mutex.h
+++ b/include/linux/mutex.h
@@ -146,9 +146,6 @@ extern void __mutex_init(struct mutex *lock, const char *name,
*/
static inline bool mutex_is_locked(struct mutex *lock)
{
- /*
- * XXX think about spin_is_locked
- */
return __mutex_owner(lock) != NULL;
}
diff --git a/include/linux/net.h b/include/linux/net.h
index 2248a052061d..3fd9d8c16581 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -147,6 +147,7 @@ struct proto_ops {
int (*getname) (struct socket *sock,
struct sockaddr *addr,
int peer);
+ __poll_t (*poll_mask) (struct socket *sock, __poll_t events);
__poll_t (*poll) (struct file *file, struct socket *sock,
struct poll_table_struct *wait);
int (*ioctl) (struct socket *sock, unsigned int cmd,
diff --git a/include/linux/node.h b/include/linux/node.h
index 41f171861dcc..6d336e38d155 100644
--- a/include/linux/node.h
+++ b/include/linux/node.h
@@ -32,9 +32,11 @@ extern struct node *node_devices[];
typedef void (*node_registration_func_t)(struct node *);
#if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) && defined(CONFIG_NUMA)
-extern int link_mem_sections(int nid, unsigned long start_pfn, unsigned long nr_pages);
+extern int link_mem_sections(int nid, unsigned long start_pfn,
+ unsigned long nr_pages, bool check_nid);
#else
-static inline int link_mem_sections(int nid, unsigned long start_pfn, unsigned long nr_pages)
+static inline int link_mem_sections(int nid, unsigned long start_pfn,
+ unsigned long nr_pages, bool check_nid)
{
return 0;
}
@@ -57,7 +59,7 @@ static inline int register_one_node(int nid)
if (error)
return error;
/* link memory sections under this node */
- error = link_mem_sections(nid, pgdat->node_start_pfn, pgdat->node_spanned_pages);
+ error = link_mem_sections(nid, pgdat->node_start_pfn, pgdat->node_spanned_pages, true);
}
return error;
diff --git a/include/linux/nospec.h b/include/linux/nospec.h
index e791ebc65c9c..0c5ef54fd416 100644
--- a/include/linux/nospec.h
+++ b/include/linux/nospec.h
@@ -7,6 +7,8 @@
#define _LINUX_NOSPEC_H
#include <asm/barrier.h>
+struct task_struct;
+
/**
* array_index_mask_nospec() - generate a ~0 mask when index < size, 0 otherwise
* @index: array element index
@@ -55,4 +57,12 @@ static inline unsigned long array_index_mask_nospec(unsigned long index,
\
(typeof(_i)) (_i & _mask); \
})
+
+/* Speculation control prctl */
+int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which);
+int arch_prctl_spec_ctrl_set(struct task_struct *task, unsigned long which,
+ unsigned long ctrl);
+/* Speculation control for seccomp enforced mitigation */
+void arch_seccomp_spec_mitigate(struct task_struct *task);
+
#endif /* _LINUX_NOSPEC_H */
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 4112e2bd747f..2950ce957656 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -436,10 +436,19 @@ enum {
enum {
NVME_AER_ERROR = 0,
NVME_AER_SMART = 1,
+ NVME_AER_NOTICE = 2,
NVME_AER_CSS = 6,
NVME_AER_VS = 7,
- NVME_AER_NOTICE_NS_CHANGED = 0x0002,
- NVME_AER_NOTICE_FW_ACT_STARTING = 0x0102,
+};
+
+enum {
+ NVME_AER_NOTICE_NS_CHANGED = 0x00,
+ NVME_AER_NOTICE_FW_ACT_STARTING = 0x01,
+};
+
+enum {
+ NVME_AEN_CFG_NS_ATTR = 1 << 8,
+ NVME_AEN_CFG_FW_ACT = 1 << 9,
};
struct nvme_lba_range_type {
@@ -747,6 +756,7 @@ enum {
NVME_LOG_ERROR = 0x01,
NVME_LOG_SMART = 0x02,
NVME_LOG_FW_SLOT = 0x03,
+ NVME_LOG_CHANGED_NS = 0x04,
NVME_LOG_CMD_EFFECTS = 0x05,
NVME_LOG_DISC = 0x70,
NVME_LOG_RESERVATION = 0x80,
@@ -755,6 +765,8 @@ enum {
NVME_FWACT_ACTV = (2 << 3),
};
+#define NVME_MAX_CHANGED_NAMESPACES 1024
+
struct nvme_identify {
__u8 opcode;
__u8 flags;
diff --git a/include/linux/of_device.h b/include/linux/of_device.h
index 8da5a1b31ece..165fd302b442 100644
--- a/include/linux/of_device.h
+++ b/include/linux/of_device.h
@@ -55,7 +55,9 @@ static inline struct device_node *of_cpu_device_node_get(int cpu)
return of_node_get(cpu_dev->of_node);
}
-int of_dma_configure(struct device *dev, struct device_node *np);
+int of_dma_configure(struct device *dev,
+ struct device_node *np,
+ bool force_dma);
void of_dma_deconfigure(struct device *dev);
#else /* CONFIG_OF */
@@ -105,7 +107,9 @@ static inline struct device_node *of_cpu_device_node_get(int cpu)
return NULL;
}
-static inline int of_dma_configure(struct device *dev, struct device_node *np)
+static inline int of_dma_configure(struct device *dev,
+ struct device_node *np,
+ bool force_dma)
{
return 0;
}
diff --git a/include/linux/oom.h b/include/linux/oom.h
index 5bad038ac012..6adac113e96d 100644
--- a/include/linux/oom.h
+++ b/include/linux/oom.h
@@ -95,6 +95,8 @@ static inline int check_stable_address_space(struct mm_struct *mm)
return 0;
}
+void __oom_reap_task_mm(struct mm_struct *mm);
+
extern unsigned long oom_badness(struct task_struct *p,
struct mem_cgroup *memcg, const nodemask_t *nodemask,
unsigned long totalpages);
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 73178a2fcee0..55371cb827ad 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -670,7 +670,7 @@ int raw_pci_read(unsigned int domain, unsigned int bus, unsigned int devfn,
int raw_pci_write(unsigned int domain, unsigned int bus, unsigned int devfn,
int reg, int len, u32 val);
-#ifdef CONFIG_PCI_BUS_ADDR_T_64BIT
+#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
typedef u64 pci_bus_addr_t;
#else
typedef u32 pci_bus_addr_t;
diff --git a/include/linux/percpu-rwsem.h b/include/linux/percpu-rwsem.h
index b1f37a89e368..79b99d653e03 100644
--- a/include/linux/percpu-rwsem.h
+++ b/include/linux/percpu-rwsem.h
@@ -133,7 +133,7 @@ static inline void percpu_rwsem_release(struct percpu_rw_semaphore *sem,
lock_release(&sem->rw_sem.dep_map, 1, ip);
#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
if (!read)
- sem->rw_sem.owner = NULL;
+ sem->rw_sem.owner = RWSEM_OWNER_UNKNOWN;
#endif
}
@@ -141,6 +141,10 @@ static inline void percpu_rwsem_acquire(struct percpu_rw_semaphore *sem,
bool read, unsigned long ip)
{
lock_acquire(&sem->rw_sem.dep_map, 0, 1, read, 1, NULL, ip);
+#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
+ if (!read)
+ sem->rw_sem.owner = current;
+#endif
}
#endif
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index e71e99eb9a4e..bea0b0cd4bf7 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -467,7 +467,7 @@ enum perf_addr_filter_action_t {
*/
struct perf_addr_filter {
struct list_head entry;
- struct inode *inode;
+ struct path path;
unsigned long offset;
unsigned long size;
enum perf_addr_filter_action_t action;
@@ -1016,6 +1016,14 @@ static inline int is_software_event(struct perf_event *event)
return event->event_caps & PERF_EV_CAP_SOFTWARE;
}
+/*
+ * Return 1 for event in sw context, 0 for event in hw context
+ */
+static inline int in_software_context(struct perf_event *event)
+{
+ return event->ctx->pmu->task_ctx_nr == perf_sw_context;
+}
+
extern struct static_key perf_swevent_enabled[PERF_COUNT_SW_MAX];
extern void ___perf_sw_event(u32, u64, struct pt_regs *, u64);
diff --git a/include/linux/pktcdvd.h b/include/linux/pktcdvd.h
index 93d142ad1528..174601554b06 100644
--- a/include/linux/pktcdvd.h
+++ b/include/linux/pktcdvd.h
@@ -186,7 +186,7 @@ struct pktcdvd_device
sector_t current_sector; /* Keep track of where the elevator is */
atomic_t scan_queue; /* Set to non-zero when pkt_handle_queue */
/* needs to be run. */
- mempool_t *rb_pool; /* mempool for pkt_rb_node allocations */
+ mempool_t rb_pool; /* mempool for pkt_rb_node allocations */
struct packet_iosched iosched;
struct gendisk *disk;
diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h
index 49f634d96118..3097c943fab9 100644
--- a/include/linux/platform_device.h
+++ b/include/linux/platform_device.h
@@ -356,6 +356,8 @@ extern int platform_pm_restore(struct device *dev);
#define platform_pm_restore NULL
#endif
+extern int platform_dma_configure(struct device *dev);
+
#ifdef CONFIG_PM_SLEEP
#define USE_PLATFORM_PM_SLEEP_OPS \
.suspend = platform_pm_suspend, \
diff --git a/include/linux/poll.h b/include/linux/poll.h
index f45ebd017eaa..fdf86b4cbc71 100644
--- a/include/linux/poll.h
+++ b/include/linux/poll.h
@@ -74,6 +74,18 @@ static inline void init_poll_funcptr(poll_table *pt, poll_queue_proc qproc)
pt->_key = ~(__poll_t)0; /* all events enabled */
}
+static inline bool file_has_poll_mask(struct file *file)
+{
+ return file->f_op->get_poll_head && file->f_op->poll_mask;
+}
+
+static inline bool file_can_poll(struct file *file)
+{
+ return file->f_op->poll || file_has_poll_mask(file);
+}
+
+__poll_t vfs_poll(struct file *file, struct poll_table_struct *pt);
+
struct poll_table_entry {
struct file *filp;
__poll_t key;
@@ -96,8 +108,6 @@ struct poll_wqueues {
extern void poll_initwait(struct poll_wqueues *pwq);
extern void poll_freewait(struct poll_wqueues *pwq);
-extern int poll_schedule_timeout(struct poll_wqueues *pwq, int state,
- ktime_t *expires, unsigned long slack);
extern u64 select_estimate_accuracy(struct timespec64 *tv);
#define MAX_INT64_SECONDS (((s64)(~((u64)0)>>1)/HZ)-1)
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index 928ef9e4d912..e518352137e7 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -9,6 +9,8 @@
#include <linux/fs.h>
struct proc_dir_entry;
+struct seq_file;
+struct seq_operations;
#ifdef CONFIG_PROC_FS
@@ -23,6 +25,19 @@ extern struct proc_dir_entry *proc_mkdir_data(const char *, umode_t,
extern struct proc_dir_entry *proc_mkdir_mode(const char *, umode_t,
struct proc_dir_entry *);
struct proc_dir_entry *proc_create_mount_point(const char *name);
+
+struct proc_dir_entry *proc_create_seq_private(const char *name, umode_t mode,
+ struct proc_dir_entry *parent, const struct seq_operations *ops,
+ unsigned int state_size, void *data);
+#define proc_create_seq_data(name, mode, parent, ops, data) \
+ proc_create_seq_private(name, mode, parent, ops, 0, data)
+#define proc_create_seq(name, mode, parent, ops) \
+ proc_create_seq_private(name, mode, parent, ops, 0, NULL)
+struct proc_dir_entry *proc_create_single_data(const char *name, umode_t mode,
+ struct proc_dir_entry *parent,
+ int (*show)(struct seq_file *, void *), void *data);
+#define proc_create_single(name, mode, parent, show) \
+ proc_create_single_data(name, mode, parent, show, NULL)
extern struct proc_dir_entry *proc_create_data(const char *, umode_t,
struct proc_dir_entry *,
@@ -38,6 +53,15 @@ extern void proc_remove(struct proc_dir_entry *);
extern void remove_proc_entry(const char *, struct proc_dir_entry *);
extern int remove_proc_subtree(const char *, struct proc_dir_entry *);
+struct proc_dir_entry *proc_create_net_data(const char *name, umode_t mode,
+ struct proc_dir_entry *parent, const struct seq_operations *ops,
+ unsigned int state_size, void *data);
+#define proc_create_net(name, mode, parent, state_size, ops) \
+ proc_create_net_data(name, mode, parent, state_size, ops, NULL)
+struct proc_dir_entry *proc_create_net_single(const char *name, umode_t mode,
+ struct proc_dir_entry *parent,
+ int (*show)(struct seq_file *, void *), void *data);
+
#else /* CONFIG_PROC_FS */
static inline void proc_root_init(void)
@@ -57,6 +81,11 @@ static inline struct proc_dir_entry *proc_mkdir_data(const char *name,
umode_t mode, struct proc_dir_entry *parent, void *data) { return NULL; }
static inline struct proc_dir_entry *proc_mkdir_mode(const char *name,
umode_t mode, struct proc_dir_entry *parent) { return NULL; }
+#define proc_create_seq_private(name, mode, parent, ops, size, data) ({NULL;})
+#define proc_create_seq_data(name, mode, parent, ops, data) ({NULL;})
+#define proc_create_seq(name, mode, parent, ops) ({NULL;})
+#define proc_create_single(name, mode, parent, show) ({NULL;})
+#define proc_create_single_data(name, mode, parent, show, data) ({NULL;})
#define proc_create(name, mode, parent, proc_fops) ({NULL;})
#define proc_create_data(name, mode, parent, proc_fops, data) ({NULL;})
@@ -69,6 +98,10 @@ static inline void proc_remove(struct proc_dir_entry *de) {}
#define remove_proc_entry(name, parent) do {} while (0)
static inline int remove_proc_subtree(const char *name, struct proc_dir_entry *parent) { return 0; }
+#define proc_create_net_data(name, mode, parent, ops, state_size, data) ({NULL;})
+#define proc_create_net(name, mode, parent, state_size, ops) ({NULL;})
+#define proc_create_net_single(name, mode, parent, show, data) ({NULL;})
+
#endif /* CONFIG_PROC_FS */
struct net;
@@ -83,4 +116,10 @@ struct ns_common;
int open_related_ns(struct ns_common *ns,
struct ns_common *(*get_ns)(struct ns_common *ns));
+/* get the associated pid namespace for a file in procfs */
+static inline struct pid_namespace *proc_pid_ns(struct inode *inode)
+{
+ return inode->i_sb->s_fs_info;
+}
+
#endif /* _LINUX_PROC_FS_H */
diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
index 919b2a0b0307..037bf0ef1ae9 100644
--- a/include/linux/ptrace.h
+++ b/include/linux/ptrace.h
@@ -345,7 +345,6 @@ extern void user_single_step_siginfo(struct task_struct *tsk,
static inline void user_single_step_siginfo(struct task_struct *tsk,
struct pt_regs *regs, siginfo_t *info)
{
- memset(info, 0, sizeof(*info));
info->si_signo = SIGTRAP;
}
#endif
diff --git a/include/linux/rbtree_augmented.h b/include/linux/rbtree_augmented.h
index 6bfd2b581f75..af8a61be2d8d 100644
--- a/include/linux/rbtree_augmented.h
+++ b/include/linux/rbtree_augmented.h
@@ -26,6 +26,7 @@
#include <linux/compiler.h>
#include <linux/rbtree.h>
+#include <linux/rcupdate.h>
/*
* Please note - only struct rb_augment_callbacks and the prototypes for
diff --git a/include/linux/rbtree_latch.h b/include/linux/rbtree_latch.h
index ece43e882b56..7d012faa509a 100644
--- a/include/linux/rbtree_latch.h
+++ b/include/linux/rbtree_latch.h
@@ -35,6 +35,7 @@
#include <linux/rbtree.h>
#include <linux/seqlock.h>
+#include <linux/rcupdate.h>
struct latch_tree_node {
struct rb_node node[2];
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 36360d07f25b..e679b175b411 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -108,7 +108,6 @@ void rcu_sched_qs(void);
void rcu_bh_qs(void);
void rcu_check_callbacks(int user);
void rcu_report_dead(unsigned int cpu);
-void rcu_cpu_starting(unsigned int cpu);
void rcutree_migrate_callbacks(int cpu);
#ifdef CONFIG_RCU_STALL_COMMON
@@ -188,13 +187,13 @@ static inline void exit_tasks_rcu_finish(void) { }
#endif /* #else #ifdef CONFIG_TASKS_RCU */
/**
- * cond_resched_rcu_qs - Report potential quiescent states to RCU
+ * cond_resched_tasks_rcu_qs - Report potential quiescent states to RCU
*
* This macro resembles cond_resched(), except that it is defined to
* report potential quiescent states to RCU-tasks even if the cond_resched()
* machinery were to be shut off, as some advocate for PREEMPT kernels.
*/
-#define cond_resched_rcu_qs() \
+#define cond_resched_tasks_rcu_qs() \
do { \
if (!cond_resched()) \
rcu_note_voluntary_context_switch_lite(current); \
diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index ce9beec35e34..7b3c82e8a625 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -132,5 +132,6 @@ static inline void rcu_all_qs(void) { barrier(); }
#define rcutree_offline_cpu NULL
#define rcutree_dead_cpu NULL
#define rcutree_dying_cpu NULL
+static inline void rcu_cpu_starting(unsigned int cpu) { }
#endif /* __LINUX_RCUTINY_H */
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index fd996cdf1833..914655848ef6 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -74,6 +74,7 @@ static inline void synchronize_rcu_bh_expedited(void)
void rcu_barrier(void);
void rcu_barrier_bh(void);
void rcu_barrier_sched(void);
+bool rcu_eqs_special_set(int cpu);
unsigned long get_state_synchronize_rcu(void);
void cond_synchronize_rcu(unsigned long oldstate);
unsigned long get_state_synchronize_sched(void);
@@ -100,5 +101,6 @@ int rcutree_online_cpu(unsigned int cpu);
int rcutree_offline_cpu(unsigned int cpu);
int rcutree_dead_cpu(unsigned int cpu);
int rcutree_dying_cpu(unsigned int cpu);
+void rcu_cpu_starting(unsigned int cpu);
#endif /* __LINUX_RCUTREE_H */
diff --git a/include/linux/regmap.h b/include/linux/regmap.h
index 5f7ad0552c03..4f38068ffb71 100644
--- a/include/linux/regmap.h
+++ b/include/linux/regmap.h
@@ -15,6 +15,7 @@
#include <linux/list.h>
#include <linux/rbtree.h>
+#include <linux/ktime.h>
#include <linux/delay.h>
#include <linux/err.h>
#include <linux/bug.h>
@@ -587,7 +588,10 @@ struct regmap *__devm_regmap_init_sdw(struct sdw_slave *sdw,
const struct regmap_config *config,
struct lock_class_key *lock_key,
const char *lock_name);
-
+struct regmap *__devm_regmap_init_slimbus(struct slim_device *slimbus,
+ const struct regmap_config *config,
+ struct lock_class_key *lock_key,
+ const char *lock_name);
/*
* Wrapper for regmap_init macros to include a unique lockdep key and name
* for each call. No-op if CONFIG_LOCKDEP is not set.
@@ -906,6 +910,19 @@ bool regmap_ac97_default_volatile(struct device *dev, unsigned int reg);
__regmap_lockdep_wrapper(__devm_regmap_init_sdw, #config, \
sdw, config)
+/**
+ * devm_regmap_init_slimbus() - Initialise managed register map
+ *
+ * @slimbus: Device that will be interacted with
+ * @config: Configuration for register map
+ *
+ * The return value will be an ERR_PTR() on error or a valid pointer
+ * to a struct regmap. The regmap will be automatically freed by the
+ * device management code.
+ */
+#define devm_regmap_init_slimbus(slimbus, config) \
+ __regmap_lockdep_wrapper(__devm_regmap_init_slimbus, #config, \
+ slimbus, config)
int regmap_mmio_attach_clk(struct regmap *map, struct clk *clk);
void regmap_mmio_detach_clk(struct regmap *map);
void regmap_exit(struct regmap *map);
diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h
index d09a9c7af109..dfdaede9139e 100644
--- a/include/linux/remoteproc.h
+++ b/include/linux/remoteproc.h
@@ -569,7 +569,7 @@ static inline struct rproc *vdev_to_rproc(struct virtio_device *vdev)
void rproc_add_subdev(struct rproc *rproc,
struct rproc_subdev *subdev,
int (*probe)(struct rproc_subdev *subdev),
- void (*remove)(struct rproc_subdev *subdev, bool graceful));
+ void (*remove)(struct rproc_subdev *subdev, bool crashed));
void rproc_remove_subdev(struct rproc *rproc, struct rproc_subdev *subdev);
diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h
index 56707d5ff6ad..ab93b6eae696 100644
--- a/include/linux/rwsem.h
+++ b/include/linux/rwsem.h
@@ -44,6 +44,12 @@ struct rw_semaphore {
#endif
};
+/*
+ * Setting bit 0 of the owner field with other non-zero bits will indicate
+ * that the rwsem is writer-owned with an unknown owner.
+ */
+#define RWSEM_OWNER_UNKNOWN ((struct task_struct *)-1L)
+
extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem);
extern struct rw_semaphore *rwsem_down_read_failed_killable(struct rw_semaphore *sem);
extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *sem);
diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h
index 841585f6e5f2..e6539536dea9 100644
--- a/include/linux/sbitmap.h
+++ b/include/linux/sbitmap.h
@@ -127,6 +127,12 @@ struct sbitmap_queue {
* @round_robin: Allocate bits in strict round-robin order.
*/
bool round_robin;
+
+ /**
+ * @min_shallow_depth: The minimum shallow depth which may be passed to
+ * sbitmap_queue_get_shallow() or __sbitmap_queue_get_shallow().
+ */
+ unsigned int min_shallow_depth;
};
/**
@@ -390,6 +396,9 @@ int __sbitmap_queue_get(struct sbitmap_queue *sbq);
* @shallow_depth: The maximum number of bits to allocate from a single word.
* See sbitmap_get_shallow().
*
+ * If you call this, make sure to call sbitmap_queue_min_shallow_depth() after
+ * initializing @sbq.
+ *
* Return: Non-negative allocated bit number if successful, -1 otherwise.
*/
int __sbitmap_queue_get_shallow(struct sbitmap_queue *sbq,
@@ -424,6 +433,9 @@ static inline int sbitmap_queue_get(struct sbitmap_queue *sbq,
* @shallow_depth: The maximum number of bits to allocate from a single word.
* See sbitmap_get_shallow().
*
+ * If you call this, make sure to call sbitmap_queue_min_shallow_depth() after
+ * initializing @sbq.
+ *
* Return: Non-negative allocated bit number if successful, -1 otherwise.
*/
static inline int sbitmap_queue_get_shallow(struct sbitmap_queue *sbq,
@@ -439,6 +451,23 @@ static inline int sbitmap_queue_get_shallow(struct sbitmap_queue *sbq,
}
/**
+ * sbitmap_queue_min_shallow_depth() - Inform a &struct sbitmap_queue of the
+ * minimum shallow depth that will be used.
+ * @sbq: Bitmap queue in question.
+ * @min_shallow_depth: The minimum shallow depth that will be passed to
+ * sbitmap_queue_get_shallow() or __sbitmap_queue_get_shallow().
+ *
+ * sbitmap_queue_clear() batches wakeups as an optimization. The batch size
+ * depends on the depth of the bitmap. Since the shallow allocation functions
+ * effectively operate with a different depth, the shallow depth must be taken
+ * into account when calculating the batch size. This function must be called
+ * with the minimum shallow depth that will be used. Failure to do so can result
+ * in missed wakeups.
+ */
+void sbitmap_queue_min_shallow_depth(struct sbitmap_queue *sbq,
+ unsigned int min_shallow_depth);
+
+/**
* sbitmap_queue_clear() - Free an allocated bit and wake up waiters on a
* &struct sbitmap_queue.
* @sbq: Bitmap to free from.
@@ -484,6 +513,13 @@ static inline struct sbq_wait_state *sbq_wait_ptr(struct sbitmap_queue *sbq,
void sbitmap_queue_wake_all(struct sbitmap_queue *sbq);
/**
+ * sbitmap_queue_wake_up() - Wake up some of waiters in one waitqueue
+ * on a &struct sbitmap_queue.
+ * @sbq: Bitmap queue to wake up.
+ */
+void sbitmap_queue_wake_up(struct sbitmap_queue *sbq);
+
+/**
* sbitmap_queue_show() - Dump &struct sbitmap_queue information to a &struct
* seq_file.
* @sbq: Bitmap queue to show.
diff --git a/include/linux/sched.h b/include/linux/sched.h
index b3d697f3b573..14e4f9c12337 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -112,17 +112,36 @@ struct task_group;
#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
+/*
+ * Special states are those that do not use the normal wait-loop pattern. See
+ * the comment with set_special_state().
+ */
+#define is_special_task_state(state) \
+ ((state) & (__TASK_STOPPED | __TASK_TRACED | TASK_DEAD))
+
#define __set_current_state(state_value) \
do { \
+ WARN_ON_ONCE(is_special_task_state(state_value));\
current->task_state_change = _THIS_IP_; \
current->state = (state_value); \
} while (0)
+
#define set_current_state(state_value) \
do { \
+ WARN_ON_ONCE(is_special_task_state(state_value));\
current->task_state_change = _THIS_IP_; \
smp_store_mb(current->state, (state_value)); \
} while (0)
+#define set_special_state(state_value) \
+ do { \
+ unsigned long flags; /* may shadow */ \
+ WARN_ON_ONCE(!is_special_task_state(state_value)); \
+ raw_spin_lock_irqsave(&current->pi_lock, flags); \
+ current->task_state_change = _THIS_IP_; \
+ current->state = (state_value); \
+ raw_spin_unlock_irqrestore(&current->pi_lock, flags); \
+ } while (0)
#else
/*
* set_current_state() includes a barrier so that the write of current->state
@@ -144,8 +163,8 @@ struct task_group;
*
* The above is typically ordered against the wakeup, which does:
*
- * need_sleep = false;
- * wake_up_state(p, TASK_UNINTERRUPTIBLE);
+ * need_sleep = false;
+ * wake_up_state(p, TASK_UNINTERRUPTIBLE);
*
* Where wake_up_state() (and all other wakeup primitives) imply enough
* barriers to order the store of the variable against wakeup.
@@ -154,12 +173,33 @@ struct task_group;
* once it observes the TASK_UNINTERRUPTIBLE store the waking CPU can issue a
* TASK_RUNNING store which can collide with __set_current_state(TASK_RUNNING).
*
- * This is obviously fine, since they both store the exact same value.
+ * However, with slightly different timing the wakeup TASK_RUNNING store can
+ * also collide with the TASK_UNINTERRUPTIBLE store. Loosing that store is not
+ * a problem either because that will result in one extra go around the loop
+ * and our @cond test will save the day.
*
* Also see the comments of try_to_wake_up().
*/
-#define __set_current_state(state_value) do { current->state = (state_value); } while (0)
-#define set_current_state(state_value) smp_store_mb(current->state, (state_value))
+#define __set_current_state(state_value) \
+ current->state = (state_value)
+
+#define set_current_state(state_value) \
+ smp_store_mb(current->state, (state_value))
+
+/*
+ * set_special_state() should be used for those states when the blocking task
+ * can not use the regular condition based wait-loop. In that case we must
+ * serialize against wakeups such that any possible in-flight TASK_RUNNING stores
+ * will not collide with our state change.
+ */
+#define set_special_state(state_value) \
+ do { \
+ unsigned long flags; /* may shadow */ \
+ raw_spin_lock_irqsave(&current->pi_lock, flags); \
+ current->state = (state_value); \
+ raw_spin_unlock_irqrestore(&current->pi_lock, flags); \
+ } while (0)
+
#endif
/* Task command name length: */
@@ -1393,7 +1433,8 @@ static inline bool is_percpu_thread(void)
#define PFA_NO_NEW_PRIVS 0 /* May not gain new privileges. */
#define PFA_SPREAD_PAGE 1 /* Spread page cache over cpuset */
#define PFA_SPREAD_SLAB 2 /* Spread some slab caches over cpuset */
-
+#define PFA_SPEC_SSB_DISABLE 3 /* Speculative Store Bypass disabled */
+#define PFA_SPEC_SSB_FORCE_DISABLE 4 /* Speculative Store Bypass force disabled*/
#define TASK_PFA_TEST(name, func) \
static inline bool task_##func(struct task_struct *p) \
@@ -1418,6 +1459,13 @@ TASK_PFA_TEST(SPREAD_SLAB, spread_slab)
TASK_PFA_SET(SPREAD_SLAB, spread_slab)
TASK_PFA_CLEAR(SPREAD_SLAB, spread_slab)
+TASK_PFA_TEST(SPEC_SSB_DISABLE, spec_ssb_disable)
+TASK_PFA_SET(SPEC_SSB_DISABLE, spec_ssb_disable)
+TASK_PFA_CLEAR(SPEC_SSB_DISABLE, spec_ssb_disable)
+
+TASK_PFA_TEST(SPEC_SSB_FORCE_DISABLE, spec_ssb_force_disable)
+TASK_PFA_SET(SPEC_SSB_FORCE_DISABLE, spec_ssb_force_disable)
+
static inline void
current_restore_flags(unsigned long orig_flags, unsigned long flags)
{
@@ -1464,6 +1512,7 @@ static inline int task_nice(const struct task_struct *p)
extern int can_nice(const struct task_struct *p, const int nice);
extern int task_curr(const struct task_struct *p);
extern int idle_cpu(int cpu);
+extern int available_idle_cpu(int cpu);
extern int sched_setscheduler(struct task_struct *, int, const struct sched_param *);
extern int sched_setscheduler_nocheck(struct task_struct *, int, const struct sched_param *);
extern int sched_setattr(struct task_struct *, const struct sched_attr *);
@@ -1613,7 +1662,6 @@ static inline int test_tsk_need_resched(struct task_struct *tsk)
* explicit rescheduling in places that are safe. The return
* value indicates whether a reschedule was done in fact.
* cond_resched_lock() will drop the spinlock before scheduling,
- * cond_resched_softirq() will enable bhs before scheduling.
*/
#ifndef CONFIG_PREEMPT
extern int _cond_resched(void);
@@ -1633,13 +1681,6 @@ extern int __cond_resched_lock(spinlock_t *lock);
__cond_resched_lock(lock); \
})
-extern int __cond_resched_softirq(void);
-
-#define cond_resched_softirq() ({ \
- ___might_sleep(__FILE__, __LINE__, SOFTIRQ_DISABLE_OFFSET); \
- __cond_resched_softirq(); \
-})
-
static inline void cond_resched_rcu(void)
{
#if defined(CONFIG_DEBUG_ATOMIC_SLEEP) || !defined(CONFIG_PREEMPT_RCU)
diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
index 2c570cd934af..76a8cb4ef178 100644
--- a/include/linux/sched/mm.h
+++ b/include/linux/sched/mm.h
@@ -28,7 +28,7 @@ extern struct mm_struct *mm_alloc(void);
*
* Use mmdrop() to release the reference acquired by mmgrab().
*
- * See also <Documentation/vm/active_mm.txt> for an in-depth explanation
+ * See also <Documentation/vm/active_mm.rst> for an in-depth explanation
* of &mm_struct.mm_count vs &mm_struct.mm_users.
*/
static inline void mmgrab(struct mm_struct *mm)
@@ -62,7 +62,7 @@ static inline void mmdrop(struct mm_struct *mm)
*
* Use mmput() to release the reference acquired by mmget().
*
- * See also <Documentation/vm/active_mm.txt> for an in-depth explanation
+ * See also <Documentation/vm/active_mm.rst> for an in-depth explanation
* of &mm_struct.mm_count vs &mm_struct.mm_users.
*/
static inline void mmget(struct mm_struct *mm)
@@ -170,6 +170,17 @@ static inline void fs_reclaim_acquire(gfp_t gfp_mask) { }
static inline void fs_reclaim_release(gfp_t gfp_mask) { }
#endif
+/**
+ * memalloc_noio_save - Marks implicit GFP_NOIO allocation scope.
+ *
+ * This functions marks the beginning of the GFP_NOIO allocation scope.
+ * All further allocations will implicitly drop __GFP_IO flag and so
+ * they are safe for the IO critical section from the allocation recursion
+ * point of view. Use memalloc_noio_restore to end the scope with flags
+ * returned by this function.
+ *
+ * This function is safe to be used from any context.
+ */
static inline unsigned int memalloc_noio_save(void)
{
unsigned int flags = current->flags & PF_MEMALLOC_NOIO;
@@ -177,11 +188,30 @@ static inline unsigned int memalloc_noio_save(void)
return flags;
}
+/**
+ * memalloc_noio_restore - Ends the implicit GFP_NOIO scope.
+ * @flags: Flags to restore.
+ *
+ * Ends the implicit GFP_NOIO scope started by memalloc_noio_save function.
+ * Always make sure that that the given flags is the return value from the
+ * pairing memalloc_noio_save call.
+ */
static inline void memalloc_noio_restore(unsigned int flags)
{
current->flags = (current->flags & ~PF_MEMALLOC_NOIO) | flags;
}
+/**
+ * memalloc_nofs_save - Marks implicit GFP_NOFS allocation scope.
+ *
+ * This functions marks the beginning of the GFP_NOFS allocation scope.
+ * All further allocations will implicitly drop __GFP_FS flag and so
+ * they are safe for the FS critical section from the allocation recursion
+ * point of view. Use memalloc_nofs_restore to end the scope with flags
+ * returned by this function.
+ *
+ * This function is safe to be used from any context.
+ */
static inline unsigned int memalloc_nofs_save(void)
{
unsigned int flags = current->flags & PF_MEMALLOC_NOFS;
@@ -189,6 +219,14 @@ static inline unsigned int memalloc_nofs_save(void)
return flags;
}
+/**
+ * memalloc_nofs_restore - Ends the implicit GFP_NOFS scope.
+ * @flags: Flags to restore.
+ *
+ * Ends the implicit GFP_NOFS scope started by memalloc_nofs_save function.
+ * Always make sure that that the given flags is the return value from the
+ * pairing memalloc_nofs_save call.
+ */
static inline void memalloc_nofs_restore(unsigned int flags)
{
current->flags = (current->flags & ~PF_MEMALLOC_NOFS) | flags;
diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index a7ce74c74e49..113d1ad1ced7 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -280,7 +280,7 @@ static inline void kernel_signal_stop(void)
{
spin_lock_irq(&current->sighand->siglock);
if (current->jobctl & JOBCTL_STOP_DEQUEUED)
- __set_current_state(TASK_STOPPED);
+ set_special_state(TASK_STOPPED);
spin_unlock_irq(&current->sighand->siglock);
schedule();
diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h
index c723a5c4e3ff..e5320f6c8654 100644
--- a/include/linux/seccomp.h
+++ b/include/linux/seccomp.h
@@ -4,8 +4,9 @@
#include <uapi/linux/seccomp.h>
-#define SECCOMP_FILTER_FLAG_MASK (SECCOMP_FILTER_FLAG_TSYNC | \
- SECCOMP_FILTER_FLAG_LOG)
+#define SECCOMP_FILTER_FLAG_MASK (SECCOMP_FILTER_FLAG_TSYNC | \
+ SECCOMP_FILTER_FLAG_LOG | \
+ SECCOMP_FILTER_FLAG_SPEC_ALLOW)
#ifdef CONFIG_SECCOMP
diff --git a/include/linux/seq_file_net.h b/include/linux/seq_file_net.h
index 43ccd84127b6..0fdbe1ddd8d1 100644
--- a/include/linux/seq_file_net.h
+++ b/include/linux/seq_file_net.h
@@ -13,12 +13,6 @@ struct seq_net_private {
#endif
};
-int seq_open_net(struct inode *, struct file *,
- const struct seq_operations *, int);
-int single_open_net(struct inode *, struct file *file,
- int (*show)(struct seq_file *, void *));
-int seq_release_net(struct inode *, struct file *);
-int single_release_net(struct inode *, struct file *);
static inline struct net *seq_file_net(struct seq_file *seq)
{
#ifdef CONFIG_NET_NS
@@ -28,4 +22,17 @@ static inline struct net *seq_file_net(struct seq_file *seq)
#endif
}
+/*
+ * This one is needed for proc_create_net_single since net is stored directly
+ * in private not as a struct i.e. seq_file_net can't be used.
+ */
+static inline struct net *seq_file_single_net(struct seq_file *seq)
+{
+#ifdef CONFIG_NET_NS
+ return (struct net *)seq->private;
+#else
+ return &init_net;
+#endif
+}
+
#endif
diff --git a/include/linux/signal.h b/include/linux/signal.h
index a9bc7e1b077e..3c5200137b24 100644
--- a/include/linux/signal.h
+++ b/include/linux/signal.h
@@ -28,6 +28,9 @@ enum siginfo_layout {
SIL_TIMER,
SIL_POLL,
SIL_FAULT,
+ SIL_FAULT_MCEERR,
+ SIL_FAULT_BNDERR,
+ SIL_FAULT_PKUERR,
SIL_CHLD,
SIL_RT,
SIL_SYS,
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 9065477ed255..89198379b39d 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -3250,8 +3250,7 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned flags,
int *peeked, int *off, int *err);
struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags, int noblock,
int *err);
-__poll_t datagram_poll(struct file *file, struct socket *sock,
- struct poll_table_struct *wait);
+__poll_t datagram_poll_mask(struct socket *sock, __poll_t events);
int skb_copy_datagram_iter(const struct sk_buff *from, int offset,
struct iov_iter *to, int size);
static inline int skb_copy_datagram_msg(const struct sk_buff *from, int offset,
diff --git a/include/linux/spi/spi-mem.h b/include/linux/spi/spi-mem.h
new file mode 100644
index 000000000000..bb4bd15ae1f6
--- /dev/null
+++ b/include/linux/spi/spi-mem.h
@@ -0,0 +1,249 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Copyright (C) 2018 Exceet Electronics GmbH
+ * Copyright (C) 2018 Bootlin
+ *
+ * Author: Boris Brezillon <boris.brezillon@bootlin.com>
+ */
+
+#ifndef __LINUX_SPI_MEM_H
+#define __LINUX_SPI_MEM_H
+
+#include <linux/spi/spi.h>
+
+#define SPI_MEM_OP_CMD(__opcode, __buswidth) \
+ { \
+ .buswidth = __buswidth, \
+ .opcode = __opcode, \
+ }
+
+#define SPI_MEM_OP_ADDR(__nbytes, __val, __buswidth) \
+ { \
+ .nbytes = __nbytes, \
+ .val = __val, \
+ .buswidth = __buswidth, \
+ }
+
+#define SPI_MEM_OP_NO_ADDR { }
+
+#define SPI_MEM_OP_DUMMY(__nbytes, __buswidth) \
+ { \
+ .nbytes = __nbytes, \
+ .buswidth = __buswidth, \
+ }
+
+#define SPI_MEM_OP_NO_DUMMY { }
+
+#define SPI_MEM_OP_DATA_IN(__nbytes, __buf, __buswidth) \
+ { \
+ .dir = SPI_MEM_DATA_IN, \
+ .nbytes = __nbytes, \
+ .buf.in = __buf, \
+ .buswidth = __buswidth, \
+ }
+
+#define SPI_MEM_OP_DATA_OUT(__nbytes, __buf, __buswidth) \
+ { \
+ .dir = SPI_MEM_DATA_OUT, \
+ .nbytes = __nbytes, \
+ .buf.out = __buf, \
+ .buswidth = __buswidth, \
+ }
+
+#define SPI_MEM_OP_NO_DATA { }
+
+/**
+ * enum spi_mem_data_dir - describes the direction of a SPI memory data
+ * transfer from the controller perspective
+ * @SPI_MEM_DATA_IN: data coming from the SPI memory
+ * @SPI_MEM_DATA_OUT: data sent the SPI memory
+ */
+enum spi_mem_data_dir {
+ SPI_MEM_DATA_IN,
+ SPI_MEM_DATA_OUT,
+};
+
+/**
+ * struct spi_mem_op - describes a SPI memory operation
+ * @cmd.buswidth: number of IO lines used to transmit the command
+ * @cmd.opcode: operation opcode
+ * @addr.nbytes: number of address bytes to send. Can be zero if the operation
+ * does not need to send an address
+ * @addr.buswidth: number of IO lines used to transmit the address cycles
+ * @addr.val: address value. This value is always sent MSB first on the bus.
+ * Note that only @addr.nbytes are taken into account in this
+ * address value, so users should make sure the value fits in the
+ * assigned number of bytes.
+ * @dummy.nbytes: number of dummy bytes to send after an opcode or address. Can
+ * be zero if the operation does not require dummy bytes
+ * @dummy.buswidth: number of IO lanes used to transmit the dummy bytes
+ * @data.buswidth: number of IO lanes used to send/receive the data
+ * @data.dir: direction of the transfer
+ * @data.buf.in: input buffer
+ * @data.buf.out: output buffer
+ */
+struct spi_mem_op {
+ struct {
+ u8 buswidth;
+ u8 opcode;
+ } cmd;
+
+ struct {
+ u8 nbytes;
+ u8 buswidth;
+ u64 val;
+ } addr;
+
+ struct {
+ u8 nbytes;
+ u8 buswidth;
+ } dummy;
+
+ struct {
+ u8 buswidth;
+ enum spi_mem_data_dir dir;
+ unsigned int nbytes;
+ /* buf.{in,out} must be DMA-able. */
+ union {
+ void *in;
+ const void *out;
+ } buf;
+ } data;
+};
+
+#define SPI_MEM_OP(__cmd, __addr, __dummy, __data) \
+ { \
+ .cmd = __cmd, \
+ .addr = __addr, \
+ .dummy = __dummy, \
+ .data = __data, \
+ }
+
+/**
+ * struct spi_mem - describes a SPI memory device
+ * @spi: the underlying SPI device
+ * @drvpriv: spi_mem_drviver private data
+ *
+ * Extra information that describe the SPI memory device and may be needed by
+ * the controller to properly handle this device should be placed here.
+ *
+ * One example would be the device size since some controller expose their SPI
+ * mem devices through a io-mapped region.
+ */
+struct spi_mem {
+ struct spi_device *spi;
+ void *drvpriv;
+};
+
+/**
+ * struct spi_mem_set_drvdata() - attach driver private data to a SPI mem
+ * device
+ * @mem: memory device
+ * @data: data to attach to the memory device
+ */
+static inline void spi_mem_set_drvdata(struct spi_mem *mem, void *data)
+{
+ mem->drvpriv = data;
+}
+
+/**
+ * struct spi_mem_get_drvdata() - get driver private data attached to a SPI mem
+ * device
+ * @mem: memory device
+ *
+ * Return: the data attached to the mem device.
+ */
+static inline void *spi_mem_get_drvdata(struct spi_mem *mem)
+{
+ return mem->drvpriv;
+}
+
+/**
+ * struct spi_controller_mem_ops - SPI memory operations
+ * @adjust_op_size: shrink the data xfer of an operation to match controller's
+ * limitations (can be alignment of max RX/TX size
+ * limitations)
+ * @supports_op: check if an operation is supported by the controller
+ * @exec_op: execute a SPI memory operation
+ *
+ * This interface should be implemented by SPI controllers providing an
+ * high-level interface to execute SPI memory operation, which is usually the
+ * case for QSPI controllers.
+ */
+struct spi_controller_mem_ops {
+ int (*adjust_op_size)(struct spi_mem *mem, struct spi_mem_op *op);
+ bool (*supports_op)(struct spi_mem *mem,
+ const struct spi_mem_op *op);
+ int (*exec_op)(struct spi_mem *mem,
+ const struct spi_mem_op *op);
+};
+
+/**
+ * struct spi_mem_driver - SPI memory driver
+ * @spidrv: inherit from a SPI driver
+ * @probe: probe a SPI memory. Usually where detection/initialization takes
+ * place
+ * @remove: remove a SPI memory
+ * @shutdown: take appropriate action when the system is shutdown
+ *
+ * This is just a thin wrapper around a spi_driver. The core takes care of
+ * allocating the spi_mem object and forwarding the probe/remove/shutdown
+ * request to the spi_mem_driver. The reason we use this wrapper is because
+ * we might have to stuff more information into the spi_mem struct to let
+ * SPI controllers know more about the SPI memory they interact with, and
+ * having this intermediate layer allows us to do that without adding more
+ * useless fields to the spi_device object.
+ */
+struct spi_mem_driver {
+ struct spi_driver spidrv;
+ int (*probe)(struct spi_mem *mem);
+ int (*remove)(struct spi_mem *mem);
+ void (*shutdown)(struct spi_mem *mem);
+};
+
+#if IS_ENABLED(CONFIG_SPI_MEM)
+int spi_controller_dma_map_mem_op_data(struct spi_controller *ctlr,
+ const struct spi_mem_op *op,
+ struct sg_table *sg);
+
+void spi_controller_dma_unmap_mem_op_data(struct spi_controller *ctlr,
+ const struct spi_mem_op *op,
+ struct sg_table *sg);
+#else
+static inline int
+spi_controller_dma_map_mem_op_data(struct spi_controller *ctlr,
+ const struct spi_mem_op *op,
+ struct sg_table *sg)
+{
+ return -ENOTSUPP;
+}
+
+static inline void
+spi_controller_dma_unmap_mem_op_data(struct spi_controller *ctlr,
+ const struct spi_mem_op *op,
+ struct sg_table *sg)
+{
+}
+#endif /* CONFIG_SPI_MEM */
+
+int spi_mem_adjust_op_size(struct spi_mem *mem, struct spi_mem_op *op);
+
+bool spi_mem_supports_op(struct spi_mem *mem,
+ const struct spi_mem_op *op);
+
+int spi_mem_exec_op(struct spi_mem *mem,
+ const struct spi_mem_op *op);
+
+int spi_mem_driver_register_with_owner(struct spi_mem_driver *drv,
+ struct module *owner);
+
+void spi_mem_driver_unregister(struct spi_mem_driver *drv);
+
+#define spi_mem_driver_register(__drv) \
+ spi_mem_driver_register_with_owner(__drv, THIS_MODULE)
+
+#define module_spi_mem_driver(__drv) \
+ module_driver(__drv, spi_mem_driver_register, \
+ spi_mem_driver_unregister)
+
+#endif /* __LINUX_SPI_MEM_H */
diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index bc6bb325d1bf..a64235e05321 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -26,7 +26,7 @@ struct dma_chan;
struct property_entry;
struct spi_controller;
struct spi_transfer;
-struct spi_flash_read_message;
+struct spi_controller_mem_ops;
/*
* INTERFACES between SPI master-side drivers and SPI slave protocol handlers,
@@ -376,13 +376,11 @@ static inline void spi_unregister_driver(struct spi_driver *sdrv)
* transfer_one callback.
* @handle_err: the subsystem calls the driver to handle an error that occurs
* in the generic implementation of transfer_one_message().
+ * @mem_ops: optimized/dedicated operations for interactions with SPI memory.
+ * This field is optional and should only be implemented if the
+ * controller has native support for memory like operations.
* @unprepare_message: undo any work done by prepare_message().
* @slave_abort: abort the ongoing transfer request on an SPI slave controller
- * @spi_flash_read: to support spi-controller hardwares that provide
- * accelerated interface to read from flash devices.
- * @spi_flash_can_dma: analogous to can_dma() interface, but for
- * controllers implementing spi_flash_read.
- * @flash_read_supported: spi device supports flash read
* @cs_gpios: Array of GPIOs to use as chip select lines; one per CS
* number. Any individual value may be -ENOENT for CS lines that
* are not GPIOs (driven by the SPI controller itself).
@@ -548,11 +546,6 @@ struct spi_controller {
int (*unprepare_message)(struct spi_controller *ctlr,
struct spi_message *message);
int (*slave_abort)(struct spi_controller *ctlr);
- int (*spi_flash_read)(struct spi_device *spi,
- struct spi_flash_read_message *msg);
- bool (*spi_flash_can_dma)(struct spi_device *spi,
- struct spi_flash_read_message *msg);
- bool (*flash_read_supported)(struct spi_device *spi);
/*
* These hooks are for drivers that use a generic implementation
@@ -564,6 +557,9 @@ struct spi_controller {
void (*handle_err)(struct spi_controller *ctlr,
struct spi_message *message);
+ /* Optimized handlers for SPI memory-like operations. */
+ const struct spi_controller_mem_ops *mem_ops;
+
/* gpio chip select */
int *cs_gpios;
@@ -1183,48 +1179,6 @@ static inline ssize_t spi_w8r16be(struct spi_device *spi, u8 cmd)
return be16_to_cpu(result);
}
-/**
- * struct spi_flash_read_message - flash specific information for
- * spi-masters that provide accelerated flash read interfaces
- * @buf: buffer to read data
- * @from: offset within the flash from where data is to be read
- * @len: length of data to be read
- * @retlen: actual length of data read
- * @read_opcode: read_opcode to be used to communicate with flash
- * @addr_width: number of address bytes
- * @dummy_bytes: number of dummy bytes
- * @opcode_nbits: number of lines to send opcode
- * @addr_nbits: number of lines to send address
- * @data_nbits: number of lines for data
- * @rx_sg: Scatterlist for receive data read from flash
- * @cur_msg_mapped: message has been mapped for DMA
- */
-struct spi_flash_read_message {
- void *buf;
- loff_t from;
- size_t len;
- size_t retlen;
- u8 read_opcode;
- u8 addr_width;
- u8 dummy_bytes;
- u8 opcode_nbits;
- u8 addr_nbits;
- u8 data_nbits;
- struct sg_table rx_sg;
- bool cur_msg_mapped;
-};
-
-/* SPI core interface for flash read support */
-static inline bool spi_flash_read_supported(struct spi_device *spi)
-{
- return spi->controller->spi_flash_read &&
- (!spi->controller->flash_read_supported ||
- spi->controller->flash_read_supported(spi));
-}
-
-int spi_flash_read(struct spi_device *spi,
- struct spi_flash_read_message *msg);
-
/*---------------------------------------------------------------------------*/
/*
diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h
index 4894d322d258..1e8a46435838 100644
--- a/include/linux/spinlock.h
+++ b/include/linux/spinlock.h
@@ -380,6 +380,24 @@ static __always_inline int spin_trylock_irq(spinlock_t *lock)
raw_spin_trylock_irqsave(spinlock_check(lock), flags); \
})
+/**
+ * spin_is_locked() - Check whether a spinlock is locked.
+ * @lock: Pointer to the spinlock.
+ *
+ * This function is NOT required to provide any memory ordering
+ * guarantees; it could be used for debugging purposes or, when
+ * additional synchronization is needed, accompanied with other
+ * constructs (memory barriers) enforcing the synchronization.
+ *
+ * Returns: 1 if @lock is locked, 0 otherwise.
+ *
+ * Note that the function only tells you that the spinlock is
+ * seen to be locked, not that it is locked on your CPU.
+ *
+ * Further, on CONFIG_SMP=n builds with CONFIG_DEBUG_SPINLOCK=n,
+ * the return value is always 0 (see include/linux/spinlock_up.h).
+ * Therefore you should not rely heavily on the return value.
+ */
static __always_inline int spin_is_locked(spinlock_t *lock)
{
return raw_spin_is_locked(&lock->rlock);
diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index 33c1c698df09..91494d7e8e41 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -69,11 +69,45 @@ struct srcu_struct { };
void call_srcu(struct srcu_struct *sp, struct rcu_head *head,
void (*func)(struct rcu_head *head));
-void cleanup_srcu_struct(struct srcu_struct *sp);
+void _cleanup_srcu_struct(struct srcu_struct *sp, bool quiesced);
int __srcu_read_lock(struct srcu_struct *sp) __acquires(sp);
void __srcu_read_unlock(struct srcu_struct *sp, int idx) __releases(sp);
void synchronize_srcu(struct srcu_struct *sp);
+/**
+ * cleanup_srcu_struct - deconstruct a sleep-RCU structure
+ * @sp: structure to clean up.
+ *
+ * Must invoke this after you are finished using a given srcu_struct that
+ * was initialized via init_srcu_struct(), else you leak memory.
+ */
+static inline void cleanup_srcu_struct(struct srcu_struct *sp)
+{
+ _cleanup_srcu_struct(sp, false);
+}
+
+/**
+ * cleanup_srcu_struct_quiesced - deconstruct a quiesced sleep-RCU structure
+ * @sp: structure to clean up.
+ *
+ * Must invoke this after you are finished using a given srcu_struct that
+ * was initialized via init_srcu_struct(), else you leak memory. Also,
+ * all grace-period processing must have completed.
+ *
+ * "Completed" means that the last synchronize_srcu() and
+ * synchronize_srcu_expedited() calls must have returned before the call
+ * to cleanup_srcu_struct_quiesced(). It also means that the callback
+ * from the last call_srcu() must have been invoked before the call to
+ * cleanup_srcu_struct_quiesced(), but you can use srcu_barrier() to help
+ * with this last. Violating these rules will get you a WARN_ON() splat
+ * (with high probability, anyway), and will also cause the srcu_struct
+ * to be leaked.
+ */
+static inline void cleanup_srcu_struct_quiesced(struct srcu_struct *sp)
+{
+ _cleanup_srcu_struct(sp, true);
+}
+
#ifdef CONFIG_DEBUG_LOCK_ALLOC
/**
diff --git a/include/linux/string.h b/include/linux/string.h
index dd39a690c841..4a5a0eb7df51 100644
--- a/include/linux/string.h
+++ b/include/linux/string.h
@@ -147,8 +147,8 @@ extern int memcmp(const void *,const void *,__kernel_size_t);
extern void * memchr(const void *,int,__kernel_size_t);
#endif
#ifndef __HAVE_ARCH_MEMCPY_MCSAFE
-static inline __must_check int memcpy_mcsafe(void *dst, const void *src,
- size_t cnt)
+static inline __must_check unsigned long memcpy_mcsafe(void *dst,
+ const void *src, size_t cnt)
{
memcpy(dst, src, cnt);
return 0;
diff --git a/include/linux/sunrpc/rpc_pipe_fs.h b/include/linux/sunrpc/rpc_pipe_fs.h
index a5704daf5df9..e90b9bd99ded 100644
--- a/include/linux/sunrpc/rpc_pipe_fs.h
+++ b/include/linux/sunrpc/rpc_pipe_fs.h
@@ -122,8 +122,6 @@ extern struct dentry *rpc_create_cache_dir(struct dentry *,
struct cache_detail *);
extern void rpc_remove_cache_dir(struct dentry *);
-extern int rpc_rmdir(struct dentry *dentry);
-
struct rpc_pipe *rpc_mkpipe_data(const struct rpc_pipe_ops *ops, int flags);
void rpc_destroy_pipe_data(struct rpc_pipe *pipe);
extern struct dentry *rpc_mkpipe_dentry(struct dentry *, const char *, void *,
diff --git a/include/linux/swait.h b/include/linux/swait.h
index c98aaf677466..bf8cb0dee23c 100644
--- a/include/linux/swait.h
+++ b/include/linux/swait.h
@@ -5,10 +5,23 @@
#include <linux/list.h>
#include <linux/stddef.h>
#include <linux/spinlock.h>
+#include <linux/wait.h>
#include <asm/current.h>
/*
- * Simple wait queues
+ * BROKEN wait-queues.
+ *
+ * These "simple" wait-queues are broken garbage, and should never be
+ * used. The comments below claim that they are "similar" to regular
+ * wait-queues, but the semantics are actually completely different, and
+ * every single user we have ever had has been buggy (or pointless).
+ *
+ * A "swake_up()" only wakes up _one_ waiter, which is not at all what
+ * "wake_up()" does, and has led to problems. In other cases, it has
+ * been fine, because there's only ever one waiter (kvm), but in that
+ * case gthe whole "simple" wait-queue is just pointless to begin with,
+ * since there is no "queue". Use "wake_up_process()" with a direct
+ * pointer instead.
*
* While these are very similar to regular wait queues (wait.h) the most
* important difference is that the simple waitqueue allows for deterministic
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 2417d288e016..c063443d8638 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -53,7 +53,7 @@ static inline int current_is_kswapd(void)
/*
* Unaddressable device memory support. See include/linux/hmm.h and
- * Documentation/vm/hmm.txt. Short description is we need struct pages for
+ * Documentation/vm/hmm.rst. Short description is we need struct pages for
* device memory that is unaddressable (inaccessible) by CPU, so that we can
* migrate part of a process memory to device memory.
*
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index c9a2a2601852..796ea267f1c7 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -290,6 +290,12 @@ asmlinkage long sys_io_getevents(aio_context_t ctx_id,
long nr,
struct io_event __user *events,
struct timespec __user *timeout);
+asmlinkage long sys_io_pgetevents(aio_context_t ctx_id,
+ long min_nr,
+ long nr,
+ struct io_event __user *events,
+ struct timespec __user *timeout,
+ const struct __aio_sigset *sig);
/* fs/xattr.c */
asmlinkage long sys_setxattr(const char __user *path, const char __user *name,
diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h
index 26c152122a42..4a8841963c2e 100644
--- a/include/linux/tracehook.h
+++ b/include/linux/tracehook.h
@@ -124,6 +124,7 @@ static inline void tracehook_report_syscall_exit(struct pt_regs *regs, int step)
{
if (step) {
siginfo_t info;
+ clear_siginfo(&info);
user_single_step_siginfo(current, regs, &info);
force_sig_info(SIGTRAP, &info, current);
return;
diff --git a/include/linux/tty.h b/include/linux/tty.h
index 1dd587ba6d88..9bd7d37adbfa 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -10,6 +10,7 @@
#include <linux/tty_ldisc.h>
#include <linux/mutex.h>
#include <linux/tty_flags.h>
+#include <linux/seq_file.h>
#include <uapi/linux/tty.h>
#include <linux/rwsem.h>
#include <linux/llist.h>
@@ -535,7 +536,7 @@ extern void tty_ldisc_deref(struct tty_ldisc *);
extern struct tty_ldisc *tty_ldisc_ref_wait(struct tty_struct *);
extern void tty_ldisc_hangup(struct tty_struct *tty, bool reset);
extern int tty_ldisc_reinit(struct tty_struct *tty, int disc);
-extern const struct file_operations tty_ldiscs_proc_fops;
+extern const struct seq_operations tty_ldiscs_seq_ops;
extern void tty_wakeup(struct tty_struct *tty);
extern void tty_ldisc_flush(struct tty_struct *tty);
diff --git a/include/linux/tty_driver.h b/include/linux/tty_driver.h
index 31c2b5b166de..71dbc891851a 100644
--- a/include/linux/tty_driver.h
+++ b/include/linux/tty_driver.h
@@ -293,7 +293,7 @@ struct tty_operations {
int (*poll_get_char)(struct tty_driver *driver, int line);
void (*poll_put_char)(struct tty_driver *driver, int line, char ch);
#endif
- const struct file_operations *proc_fops;
+ int (*proc_show)(struct seq_file *, void *);
} __randomize_layout;
struct tty_driver {
diff --git a/include/linux/uio.h b/include/linux/uio.h
index e67e12adb136..f5766e853a77 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -154,6 +154,12 @@ size_t _copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i);
#define _copy_from_iter_flushcache _copy_from_iter_nocache
#endif
+#ifdef CONFIG_ARCH_HAS_UACCESS_MCSAFE
+size_t _copy_to_iter_mcsafe(void *addr, size_t bytes, struct iov_iter *i);
+#else
+#define _copy_to_iter_mcsafe _copy_to_iter
+#endif
+
static __always_inline __must_check
size_t copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i)
{
@@ -163,6 +169,15 @@ size_t copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i)
return _copy_from_iter_flushcache(addr, bytes, i);
}
+static __always_inline __must_check
+size_t copy_to_iter_mcsafe(void *addr, size_t bytes, struct iov_iter *i)
+{
+ if (unlikely(!check_copy_size(addr, bytes, false)))
+ return 0;
+ else
+ return _copy_to_iter_mcsafe(addr, bytes, i);
+}
+
size_t iov_iter_zero(size_t bytes, struct iov_iter *);
unsigned long iov_iter_alignment(const struct iov_iter *i);
unsigned long iov_iter_gap_alignment(const struct iov_iter *i);
diff --git a/include/linux/usb/composite.h b/include/linux/usb/composite.h
index 4b6b9283fa7b..8675e145ea8b 100644
--- a/include/linux/usb/composite.h
+++ b/include/linux/usb/composite.h
@@ -52,7 +52,7 @@
#define USB_GADGET_DELAYED_STATUS 0x7fff /* Impossibly large value */
/* big enough to hold our biggest descriptor */
-#define USB_COMP_EP0_BUFSIZ 1024
+#define USB_COMP_EP0_BUFSIZ 4096
/* OS feature descriptor length <= 4kB */
#define USB_COMP_EP0_OS_DESC_BUFSIZ 4096
diff --git a/include/linux/wait_bit.h b/include/linux/wait_bit.h
index 9318b2166439..2b0072fa5e92 100644
--- a/include/linux/wait_bit.h
+++ b/include/linux/wait_bit.h
@@ -305,4 +305,21 @@ do { \
__ret; \
})
+/**
+ * clear_and_wake_up_bit - clear a bit and wake up anyone waiting on that bit
+ *
+ * @bit: the bit of the word being waited on
+ * @word: the word being waited on, a kernel virtual address
+ *
+ * You can use this helper if bitflags are manipulated atomically rather than
+ * non-atomically under a lock.
+ */
+static inline void clear_and_wake_up_bit(int bit, void *word)
+{
+ clear_bit_unlock(bit, word);
+ /* See wake_up_bit() for which memory barrier you need to use. */
+ smp_mb__after_atomic();
+ wake_up_bit(word, bit);
+}
+
#endif /* _LINUX_WAIT_BIT_H */
diff --git a/include/linux/xattr.h b/include/linux/xattr.h
index d70f77a4b62a..6dad031be3c2 100644
--- a/include/linux/xattr.h
+++ b/include/linux/xattr.h
@@ -46,7 +46,6 @@ struct xattr {
size_t value_len;
};
-ssize_t xattr_getsecurity(struct inode *, const char *, void *, size_t);
ssize_t __vfs_getxattr(struct dentry *, struct inode *, const char *, void *, size_t);
ssize_t vfs_getxattr(struct dentry *, const char *, void *, size_t);
ssize_t vfs_listxattr(struct dentry *d, char *list, size_t size);
diff --git a/include/media/i2c/tvp7002.h b/include/media/i2c/tvp7002.h
index 5ee007c1cead..cb213c136089 100644
--- a/include/media/i2c/tvp7002.h
+++ b/include/media/i2c/tvp7002.h
@@ -5,7 +5,7 @@
* Author: Santiago Nunez-Corrales <santiago.nunez@ridgerun.com>
*
* This code is partially based upon the TVP5150 driver
- * written by Mauro Carvalho Chehab (mchehab@infradead.org),
+ * written by Mauro Carvalho Chehab <mchehab@kernel.org>,
* the TVP514x driver written by Vaibhav Hiremath <hvaibhav@ti.com>
* and the TVP7002 driver in the TI LSP 2.10.00.14
*
diff --git a/include/media/videobuf-core.h b/include/media/videobuf-core.h
index 0bda0adc744f..60a664febba0 100644
--- a/include/media/videobuf-core.h
+++ b/include/media/videobuf-core.h
@@ -1,11 +1,11 @@
/*
* generic helper functions for handling video4linux capture buffers
*
- * (c) 2007 Mauro Carvalho Chehab, <mchehab@infradead.org>
+ * (c) 2007 Mauro Carvalho Chehab, <mchehab@kernel.org>
*
* Highly based on video-buf written originally by:
* (c) 2001,02 Gerd Knorr <kraxel@bytesex.org>
- * (c) 2006 Mauro Carvalho Chehab, <mchehab@infradead.org>
+ * (c) 2006 Mauro Carvalho Chehab, <mchehab@kernel.org>
* (c) 2006 Ted Walther and John Sokol
*
* This program is free software; you can redistribute it and/or modify
diff --git a/include/media/videobuf-dma-sg.h b/include/media/videobuf-dma-sg.h
index d8b27854e3bf..01bd142b979d 100644
--- a/include/media/videobuf-dma-sg.h
+++ b/include/media/videobuf-dma-sg.h
@@ -6,11 +6,11 @@
* into PAGE_SIZE chunks). They also assume the driver does not need
* to touch the video data.
*
- * (c) 2007 Mauro Carvalho Chehab, <mchehab@infradead.org>
+ * (c) 2007 Mauro Carvalho Chehab, <mchehab@kernel.org>
*
* Highly based on video-buf written originally by:
* (c) 2001,02 Gerd Knorr <kraxel@bytesex.org>
- * (c) 2006 Mauro Carvalho Chehab, <mchehab@infradead.org>
+ * (c) 2006 Mauro Carvalho Chehab, <mchehab@kernel.org>
* (c) 2006 Ted Walther and John Sokol
*
* This program is free software; you can redistribute it and/or modify
diff --git a/include/media/videobuf-vmalloc.h b/include/media/videobuf-vmalloc.h
index 486a97efdb56..36c6a4ad3504 100644
--- a/include/media/videobuf-vmalloc.h
+++ b/include/media/videobuf-vmalloc.h
@@ -6,7 +6,7 @@
* into PAGE_SIZE chunks). They also assume the driver does not need
* to touch the video data.
*
- * (c) 2007 Mauro Carvalho Chehab, <mchehab@infradead.org>
+ * (c) 2007 Mauro Carvalho Chehab, <mchehab@kernel.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
diff --git a/include/net/ax25.h b/include/net/ax25.h
index c91bc87931c7..3f9aea8087e3 100644
--- a/include/net/ax25.h
+++ b/include/net/ax25.h
@@ -15,6 +15,7 @@
#include <linux/refcount.h>
#include <net/neighbour.h>
#include <net/sock.h>
+#include <linux/seq_file.h>
#define AX25_T1CLAMPLO 1
#define AX25_T1CLAMPHI (30 * HZ)
@@ -399,7 +400,7 @@ int ax25_check_iframes_acked(ax25_cb *, unsigned short);
/* ax25_route.c */
void ax25_rt_device_down(struct net_device *);
int ax25_rt_ioctl(unsigned int, void __user *);
-extern const struct file_operations ax25_route_fops;
+extern const struct seq_operations ax25_rt_seqops;
ax25_route *ax25_get_route(ax25_address *addr, struct net_device *dev);
int ax25_rt_autobind(ax25_cb *, ax25_address *);
struct sk_buff *ax25_rt_build_path(struct sk_buff *, ax25_address *,
@@ -455,7 +456,7 @@ unsigned long ax25_display_timer(struct timer_list *);
extern int ax25_uid_policy;
ax25_uid_assoc *ax25_findbyuid(kuid_t);
int __must_check ax25_uid_ioctl(int, struct sockaddr_ax25 *);
-extern const struct file_operations ax25_uid_fops;
+extern const struct seq_operations ax25_uid_seqops;
void ax25_uid_free(void);
/* sysctl_net_ax25.c */
diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h
index ec9d6bc65855..53ce8176c313 100644
--- a/include/net/bluetooth/bluetooth.h
+++ b/include/net/bluetooth/bluetooth.h
@@ -271,7 +271,7 @@ int bt_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
int flags);
int bt_sock_stream_recvmsg(struct socket *sock, struct msghdr *msg,
size_t len, int flags);
-__poll_t bt_sock_poll(struct file *file, struct socket *sock, poll_table *wait);
+__poll_t bt_sock_poll_mask(struct socket *sock, __poll_t events);
int bt_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
int bt_sock_wait_state(struct sock *sk, int state, unsigned long timeo);
int bt_sock_wait_ready(struct sock *sk, unsigned long flags);
diff --git a/include/net/bonding.h b/include/net/bonding.h
index f801fc940b29..b52235158836 100644
--- a/include/net/bonding.h
+++ b/include/net/bonding.h
@@ -198,6 +198,7 @@ struct bonding {
struct slave __rcu *primary_slave;
struct bond_up_slave __rcu *slave_arr; /* Array of usable slaves */
bool force_primary;
+ u32 nest_level;
s32 slave_cnt; /* never change this value outside the attach/detach wrappers */
int (*recv_probe)(const struct sk_buff *, struct bonding *,
struct slave *);
diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h
index 71c72a939bf8..c5187438af38 100644
--- a/include/net/busy_poll.h
+++ b/include/net/busy_poll.h
@@ -121,6 +121,21 @@ static inline void sk_busy_loop(struct sock *sk, int nonblock)
#endif
}
+static inline void sock_poll_busy_loop(struct socket *sock, __poll_t events)
+{
+ if (sk_can_busy_loop(sock->sk) &&
+ events && (events & POLL_BUSY_LOOP)) {
+ /* once, only if requested by syscall */
+ sk_busy_loop(sock->sk, 1);
+ }
+}
+
+/* if this socket can poll_ll, tell the system call */
+static inline __poll_t sock_poll_busy_flag(struct socket *sock)
+{
+ return sk_can_busy_loop(sock->sk) ? POLL_BUSY_LOOP : 0;
+}
+
/* used in the NIC receive handler to mark the skb */
static inline void skb_mark_napi_id(struct sk_buff *skb,
struct napi_struct *napi)
diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h
index 9a074776f70b..d1fcf2442a42 100644
--- a/include/net/flow_dissector.h
+++ b/include/net/flow_dissector.h
@@ -251,7 +251,7 @@ extern struct flow_dissector flow_keys_buf_dissector;
* This structure is used to hold a digest of the full flow keys. This is a
* larger "hash" of a flow to allow definitively matching specific flows where
* the 32 bit skb->hash is not large enough. The size is limited to 16 bytes so
- * that it can by used in CB of skb (see sch_choke for an example).
+ * that it can be used in CB of skb (see sch_choke for an example).
*/
#define FLOW_KEYS_DIGEST_LEN 16
struct flow_keys_digest {
diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index 5e86fd9dc857..0e79c3408569 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -394,7 +394,15 @@ void fib6_gc_cleanup(void);
int fib6_init(void);
-int ipv6_route_open(struct inode *inode, struct file *file);
+struct ipv6_route_iter {
+ struct seq_net_private p;
+ struct fib6_walker w;
+ loff_t skip;
+ struct fib6_table *tbl;
+ int sernum;
+};
+
+extern const struct seq_operations ipv6_route_seq_ops;
int call_fib6_notifier(struct notifier_block *nb, struct net *net,
enum fib_event_type event_type,
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index eb0bec043c96..aea7a124e66b 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -41,18 +41,6 @@ static inline struct netns_ipvs *net_ipvs(struct net* net)
return net->ipvs;
}
-/* This one needed for single_open_net since net is stored directly in
- * private not as a struct i.e. seq_file_net can't be used.
- */
-static inline struct net *seq_file_single_net(struct seq_file *seq)
-{
-#ifdef CONFIG_NET_NS
- return (struct net *)seq->private;
-#else
- return &init_net;
-#endif
-}
-
/* Connections' size value needed by ip_vs_ctl.c */
extern int ip_vs_conn_tab_size;
diff --git a/include/net/iucv/af_iucv.h b/include/net/iucv/af_iucv.h
index f4c21b5a1242..b0eaeb02d46d 100644
--- a/include/net/iucv/af_iucv.h
+++ b/include/net/iucv/af_iucv.h
@@ -153,8 +153,6 @@ struct iucv_sock_list {
atomic_t autobind_name;
};
-__poll_t iucv_sock_poll(struct file *file, struct socket *sock,
- poll_table *wait);
void iucv_sock_link(struct iucv_sock_list *l, struct sock *s);
void iucv_sock_unlink(struct iucv_sock_list *l, struct sock *s);
void iucv_accept_enqueue(struct sock *parent, struct sock *sk);
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index d2279b2d61aa..b2f3a0c018e7 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -2080,7 +2080,7 @@ struct ieee80211_txq {
* virtual interface might not be given air time for the transmission of
* the frame, as it is not synced with the AP/P2P GO yet, and thus the
* deauthentication frame might not be transmitted.
- >
+ *
* @IEEE80211_HW_DOESNT_SUPPORT_QOS_NDP: The driver (or firmware) doesn't
* support QoS NDP for AP probing - that's most likely a driver bug.
*
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index cd368d1b8cb8..a1e28dd5d0bf 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -170,6 +170,7 @@ struct nft_data_desc {
int nft_data_init(const struct nft_ctx *ctx,
struct nft_data *data, unsigned int size,
struct nft_data_desc *desc, const struct nlattr *nla);
+void nft_data_hold(const struct nft_data *data, enum nft_data_types type);
void nft_data_release(const struct nft_data *data, enum nft_data_types type);
int nft_data_dump(struct sk_buff *skb, int attr, const struct nft_data *data,
enum nft_data_types type, unsigned int len);
@@ -736,6 +737,10 @@ struct nft_expr_ops {
int (*init)(const struct nft_ctx *ctx,
const struct nft_expr *expr,
const struct nlattr * const tb[]);
+ void (*activate)(const struct nft_ctx *ctx,
+ const struct nft_expr *expr);
+ void (*deactivate)(const struct nft_ctx *ctx,
+ const struct nft_expr *expr);
void (*destroy)(const struct nft_ctx *ctx,
const struct nft_expr *expr);
int (*dump)(struct sk_buff *skb,
diff --git a/include/net/netrom.h b/include/net/netrom.h
index 0dad2dd5f9d7..5a0714ff500f 100644
--- a/include/net/netrom.h
+++ b/include/net/netrom.h
@@ -13,6 +13,7 @@
#include <linux/slab.h>
#include <net/sock.h>
#include <linux/refcount.h>
+#include <linux/seq_file.h>
#define NR_NETWORK_LEN 15
#define NR_TRANSPORT_LEN 5
@@ -216,8 +217,8 @@ struct net_device *nr_dev_get(ax25_address *);
int nr_rt_ioctl(unsigned int, void __user *);
void nr_link_failed(ax25_cb *, int);
int nr_route_frame(struct sk_buff *, ax25_cb *);
-extern const struct file_operations nr_nodes_fops;
-extern const struct file_operations nr_neigh_fops;
+extern const struct seq_operations nr_node_seqops;
+extern const struct seq_operations nr_neigh_seqops;
void nr_rt_free(void);
/* nr_subr.c */
diff --git a/include/net/phonet/pn_dev.h b/include/net/phonet/pn_dev.h
index 8639de5750f6..cbee32be1d9c 100644
--- a/include/net/phonet/pn_dev.h
+++ b/include/net/phonet/pn_dev.h
@@ -56,7 +56,7 @@ struct net_device *phonet_route_output(struct net *net, u8 daddr);
#define PN_NO_ADDR 0xff
-extern const struct file_operations pn_sock_seq_fops;
-extern const struct file_operations pn_res_seq_fops;
+extern const struct seq_operations pn_sock_seq_ops;
+extern const struct seq_operations pn_res_seq_ops;
#endif
diff --git a/include/net/ping.h b/include/net/ping.h
index 4cd90d6b5c25..fd080e043a6e 100644
--- a/include/net/ping.h
+++ b/include/net/ping.h
@@ -83,20 +83,9 @@ int ping_queue_rcv_skb(struct sock *sk, struct sk_buff *skb);
bool ping_rcv(struct sk_buff *skb);
#ifdef CONFIG_PROC_FS
-struct ping_seq_afinfo {
- char *name;
- sa_family_t family;
- const struct file_operations *seq_fops;
- const struct seq_operations seq_ops;
-};
-
-extern const struct file_operations ping_seq_fops;
-
void *ping_seq_start(struct seq_file *seq, loff_t *pos, sa_family_t family);
void *ping_seq_next(struct seq_file *seq, void *v, loff_t *pos);
void ping_seq_stop(struct seq_file *seq, void *v);
-int ping_proc_register(struct net *net, struct ping_seq_afinfo *afinfo);
-void ping_proc_unregister(struct net *net, struct ping_seq_afinfo *afinfo);
int __init ping_proc_init(void);
void ping_proc_exit(void);
diff --git a/include/net/raw.h b/include/net/raw.h
index 99d26d0c4a19..9c9fa98a91a4 100644
--- a/include/net/raw.h
+++ b/include/net/raw.h
@@ -48,7 +48,6 @@ void raw_proc_exit(void);
struct raw_iter_state {
struct seq_net_private p;
int bucket;
- struct raw_hashinfo *h;
};
static inline struct raw_iter_state *raw_seq_private(struct seq_file *seq)
@@ -58,9 +57,6 @@ static inline struct raw_iter_state *raw_seq_private(struct seq_file *seq)
void *raw_seq_start(struct seq_file *seq, loff_t *pos);
void *raw_seq_next(struct seq_file *seq, void *v, loff_t *pos);
void raw_seq_stop(struct seq_file *seq, void *v);
-int raw_seq_open(struct inode *ino, struct file *file,
- struct raw_hashinfo *h, const struct seq_operations *ops);
-
#endif
int raw_hash_sk(struct sock *sk);
diff --git a/include/net/rose.h b/include/net/rose.h
index 04b72681f2ab..cf517d306a28 100644
--- a/include/net/rose.h
+++ b/include/net/rose.h
@@ -200,9 +200,9 @@ void rose_enquiry_response(struct sock *);
/* rose_route.c */
extern struct rose_neigh *rose_loopback_neigh;
-extern const struct file_operations rose_neigh_fops;
-extern const struct file_operations rose_nodes_fops;
-extern const struct file_operations rose_routes_fops;
+extern const struct seq_operations rose_neigh_seqops;
+extern const struct seq_operations rose_node_seqops;
+extern struct seq_operations rose_route_seqops;
void rose_add_loopback_neigh(void);
int __must_check rose_add_loopback_node(rose_address *);
diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index 28b996d63490..e6d349b2a791 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -103,12 +103,13 @@ void sctp_addr_wq_mgmt(struct net *, struct sctp_sockaddr_entry *, int);
/*
* sctp/socket.c
*/
+int sctp_inet_connect(struct socket *sock, struct sockaddr *uaddr,
+ int addr_len, int flags);
int sctp_backlog_rcv(struct sock *sk, struct sk_buff *skb);
int sctp_inet_listen(struct socket *sock, int backlog);
void sctp_write_space(struct sock *sk);
void sctp_data_ready(struct sock *sk);
-__poll_t sctp_poll(struct file *file, struct socket *sock,
- poll_table *wait);
+__poll_t sctp_poll_mask(struct socket *sock, __poll_t events);
void sctp_sock_rfree(struct sk_buff *skb);
void sctp_copy_sock(struct sock *newsk, struct sock *sk,
struct sctp_association *asoc);
diff --git a/include/net/sock.h b/include/net/sock.h
index 74d725fdbe0f..4d2e8ad98985 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1591,8 +1591,6 @@ int sock_no_connect(struct socket *, struct sockaddr *, int, int);
int sock_no_socketpair(struct socket *, struct socket *);
int sock_no_accept(struct socket *, struct socket *, int, bool);
int sock_no_getname(struct socket *, struct sockaddr *, int);
-__poll_t sock_no_poll(struct file *, struct socket *,
- struct poll_table_struct *);
int sock_no_ioctl(struct socket *, unsigned int, unsigned long);
int sock_no_listen(struct socket *, int);
int sock_no_shutdown(struct socket *, int);
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 9c9b3768b350..f88f8a2cab0d 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -388,8 +388,7 @@ bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst);
void tcp_close(struct sock *sk, long timeout);
void tcp_init_sock(struct sock *sk);
void tcp_init_transfer(struct sock *sk, int bpf_op);
-__poll_t tcp_poll(struct file *file, struct socket *sock,
- struct poll_table_struct *wait);
+__poll_t tcp_poll_mask(struct socket *sock, __poll_t events);
int tcp_getsockopt(struct sock *sk, int level, int optname,
char __user *optval, int __user *optlen);
int tcp_setsockopt(struct sock *sk, int level, int optname,
@@ -1747,27 +1746,22 @@ enum tcp_seq_states {
TCP_SEQ_STATE_ESTABLISHED,
};
-int tcp_seq_open(struct inode *inode, struct file *file);
+void *tcp_seq_start(struct seq_file *seq, loff_t *pos);
+void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos);
+void tcp_seq_stop(struct seq_file *seq, void *v);
struct tcp_seq_afinfo {
- char *name;
sa_family_t family;
- const struct file_operations *seq_fops;
- struct seq_operations seq_ops;
};
struct tcp_iter_state {
struct seq_net_private p;
- sa_family_t family;
enum tcp_seq_states state;
struct sock *syn_wait_sk;
int bucket, offset, sbucket, num;
loff_t last_pos;
};
-int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo);
-void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo);
-
extern struct request_sock_ops tcp_request_sock_ops;
extern struct request_sock_ops tcp6_request_sock_ops;
diff --git a/include/net/tls.h b/include/net/tls.h
index 3da8e13a6d96..f5fb16da3860 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -97,6 +97,9 @@ struct tls_sw_context {
u8 control;
bool decrypted;
+ char rx_aad_ciphertext[TLS_AAD_SPACE_SIZE];
+ char rx_aad_plaintext[TLS_AAD_SPACE_SIZE];
+
/* Sending context */
char aad_space[TLS_AAD_SPACE_SIZE];
@@ -148,6 +151,7 @@ struct tls_context {
struct scatterlist *partially_sent_record;
u16 partially_sent_offset;
unsigned long flags;
+ bool in_tcp_sendpages;
u16 pending_open_record_frags;
int (*push_pending_record)(struct sock *sk, int flags);
diff --git a/include/net/udp.h b/include/net/udp.h
index 0676b272f6ac..d8ca3b26964d 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -276,7 +276,7 @@ int udp_init_sock(struct sock *sk);
int udp_pre_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len);
int __udp_disconnect(struct sock *sk, int flags);
int udp_disconnect(struct sock *sk, int flags);
-__poll_t udp_poll(struct file *file, struct socket *sock, poll_table *wait);
+__poll_t udp_poll_mask(struct socket *sock, __poll_t events);
struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb,
netdev_features_t features,
bool is_ipv6);
@@ -408,31 +408,27 @@ do { \
#define __UDPX_INC_STATS(sk, field) __UDP_INC_STATS(sock_net(sk), field, 0)
#endif
-/* /proc */
-int udp_seq_open(struct inode *inode, struct file *file);
-
+#ifdef CONFIG_PROC_FS
struct udp_seq_afinfo {
- char *name;
sa_family_t family;
struct udp_table *udp_table;
- const struct file_operations *seq_fops;
- struct seq_operations seq_ops;
};
struct udp_iter_state {
struct seq_net_private p;
- sa_family_t family;
int bucket;
- struct udp_table *udp_table;
};
-#ifdef CONFIG_PROC_FS
-int udp_proc_register(struct net *net, struct udp_seq_afinfo *afinfo);
-void udp_proc_unregister(struct net *net, struct udp_seq_afinfo *afinfo);
+void *udp_seq_start(struct seq_file *seq, loff_t *pos);
+void *udp_seq_next(struct seq_file *seq, void *v, loff_t *pos);
+void udp_seq_stop(struct seq_file *seq, void *v);
+
+extern const struct seq_operations udp_seq_ops;
+extern const struct seq_operations udp6_seq_ops;
int udp4_proc_init(void);
void udp4_proc_exit(void);
-#endif
+#endif /* CONFIG_PROC_FS */
int udpv4_offload_init(void);
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index a872379b69da..45e75c36b738 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -375,6 +375,7 @@ struct xfrm_input_afinfo {
int xfrm_input_register_afinfo(const struct xfrm_input_afinfo *afinfo);
int xfrm_input_unregister_afinfo(const struct xfrm_input_afinfo *afinfo);
+void xfrm_flush_gc(void);
void xfrm_state_delete_tunnel(struct xfrm_state *x);
struct xfrm_type {
diff --git a/include/rdma/ib_umem.h b/include/rdma/ib_umem.h
index 23159dd5be18..a1fd63871d17 100644
--- a/include/rdma/ib_umem.h
+++ b/include/rdma/ib_umem.h
@@ -48,7 +48,6 @@ struct ib_umem {
int writable;
int hugetlb;
struct work_struct work;
- struct pid *pid;
struct mm_struct *mm;
unsigned long diff;
struct ib_umem_odp *odp_data;
diff --git a/include/rdma/uverbs_ioctl.h b/include/rdma/uverbs_ioctl.h
index 4a4201d997a7..095383a4bd1a 100644
--- a/include/rdma/uverbs_ioctl.h
+++ b/include/rdma/uverbs_ioctl.h
@@ -411,13 +411,13 @@ static inline int uverbs_attr_get_enum_id(const struct uverbs_attr_bundle *attrs
static inline void *uverbs_attr_get_obj(const struct uverbs_attr_bundle *attrs_bundle,
u16 idx)
{
- struct ib_uobject *uobj =
- uverbs_attr_get(attrs_bundle, idx)->obj_attr.uobject;
+ const struct uverbs_attr *attr;
- if (IS_ERR(uobj))
- return uobj;
+ attr = uverbs_attr_get(attrs_bundle, idx);
+ if (IS_ERR(attr))
+ return ERR_CAST(attr);
- return uobj->object;
+ return attr->obj_attr.uobject->object;
}
static inline int uverbs_copy_to(const struct uverbs_attr_bundle *attrs_bundle,
diff --git a/include/scsi/osd_initiator.h b/include/scsi/osd_initiator.h
index a29d3086eb56..86a569d008b2 100644
--- a/include/scsi/osd_initiator.h
+++ b/include/scsi/osd_initiator.h
@@ -148,7 +148,6 @@ struct osd_request {
u8 *pad_buff;
} out, in;
- gfp_t alloc_flags;
unsigned timeout;
unsigned retries;
unsigned sense_len;
@@ -202,14 +201,11 @@ static inline bool osd_req_is_ver1(struct osd_request *or)
*
* @osd_dev: OSD device that holds the scsi-device and default values
* that the request is associated with.
- * @gfp: The allocation flags to use for request allocation, and all
- * subsequent allocations. This will be stored at
- * osd_request->alloc_flags, can be changed by user later
*
* Allocate osd_request and initialize all members to the
* default/initial state.
*/
-struct osd_request *osd_start_request(struct osd_dev *od, gfp_t gfp);
+struct osd_request *osd_start_request(struct osd_dev *od);
enum osd_req_options {
OSD_REQ_FUA = 0x08, /* Force Unit Access */
diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h
index 12f454cb6f61..53b485fe9b67 100644
--- a/include/scsi/scsi_host.h
+++ b/include/scsi/scsi_host.h
@@ -307,7 +307,7 @@ struct scsi_host_template {
* EH_HANDLED: I fixed the error, please complete the command
* EH_RESET_TIMER: I need more time, reset the timer and
* begin counting again
- * EH_NOT_HANDLED Begin normal error recovery
+ * EH_DONE: Begin normal error recovery
*
* Status: OPTIONAL
*/
diff --git a/include/trace/events/afs.h b/include/trace/events/afs.h
index f0820554caa9..d0a341bc4540 100644
--- a/include/trace/events/afs.h
+++ b/include/trace/events/afs.h
@@ -575,6 +575,48 @@ TRACE_EVENT(afs_protocol_error,
__entry->call, __entry->error, __entry->where)
);
+TRACE_EVENT(afs_cm_no_server,
+ TP_PROTO(struct afs_call *call, struct sockaddr_rxrpc *srx),
+
+ TP_ARGS(call, srx),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, call )
+ __field(unsigned int, op_id )
+ __field_struct(struct sockaddr_rxrpc, srx )
+ ),
+
+ TP_fast_assign(
+ __entry->call = call->debug_id;
+ __entry->op_id = call->operation_ID;
+ memcpy(&__entry->srx, srx, sizeof(__entry->srx));
+ ),
+
+ TP_printk("c=%08x op=%u %pISpc",
+ __entry->call, __entry->op_id, &__entry->srx.transport)
+ );
+
+TRACE_EVENT(afs_cm_no_server_u,
+ TP_PROTO(struct afs_call *call, const uuid_t *uuid),
+
+ TP_ARGS(call, uuid),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, call )
+ __field(unsigned int, op_id )
+ __field_struct(uuid_t, uuid )
+ ),
+
+ TP_fast_assign(
+ __entry->call = call->debug_id;
+ __entry->op_id = call->operation_ID;
+ memcpy(&__entry->uuid, uuid, sizeof(__entry->uuid));
+ ),
+
+ TP_printk("c=%08x op=%u %pU",
+ __entry->call, __entry->op_id, &__entry->uuid)
+ );
+
#endif /* _TRACE_AFS_H */
/* This part must be outside protection */
diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h
index 965c650a5273..39b94ec965be 100644
--- a/include/trace/events/btrfs.h
+++ b/include/trace/events/btrfs.h
@@ -121,9 +121,9 @@ TRACE_EVENT(btrfs_transaction_commit,
__entry->root_objectid = root->root_key.objectid;
),
- TP_printk_btrfs("root = %llu(%s), gen = %llu",
+ TP_printk_btrfs("root=%llu(%s) gen=%llu",
show_root_type(__entry->root_objectid),
- (unsigned long long)__entry->generation)
+ __entry->generation)
);
DECLARE_EVENT_CLASS(btrfs__inode,
@@ -133,7 +133,7 @@ DECLARE_EVENT_CLASS(btrfs__inode,
TP_ARGS(inode),
TP_STRUCT__entry_btrfs(
- __field( ino_t, ino )
+ __field( u64, ino )
__field( blkcnt_t, blocks )
__field( u64, disk_i_size )
__field( u64, generation )
@@ -143,7 +143,7 @@ DECLARE_EVENT_CLASS(btrfs__inode,
),
TP_fast_assign_btrfs(btrfs_sb(inode->i_sb),
- __entry->ino = inode->i_ino;
+ __entry->ino = btrfs_ino(BTRFS_I(inode));
__entry->blocks = inode->i_blocks;
__entry->disk_i_size = BTRFS_I(inode)->disk_i_size;
__entry->generation = BTRFS_I(inode)->generation;
@@ -153,15 +153,15 @@ DECLARE_EVENT_CLASS(btrfs__inode,
BTRFS_I(inode)->root->root_key.objectid;
),
- TP_printk_btrfs("root=%llu(%s) gen=%llu ino=%lu blocks=%llu "
+ TP_printk_btrfs("root=%llu(%s) gen=%llu ino=%llu blocks=%llu "
"disk_i_size=%llu last_trans=%llu logged_trans=%llu",
show_root_type(__entry->root_objectid),
- (unsigned long long)__entry->generation,
- (unsigned long)__entry->ino,
+ __entry->generation,
+ __entry->ino,
(unsigned long long)__entry->blocks,
- (unsigned long long)__entry->disk_i_size,
- (unsigned long long)__entry->last_trans,
- (unsigned long long)__entry->logged_trans)
+ __entry->disk_i_size,
+ __entry->last_trans,
+ __entry->logged_trans)
);
DEFINE_EVENT(btrfs__inode, btrfs_inode_new,
@@ -244,23 +244,25 @@ TRACE_EVENT_CONDITION(btrfs_get_extent,
"block_len=%llu flags=%s refs=%u "
"compress_type=%u",
show_root_type(__entry->root_objectid),
- (unsigned long long)__entry->ino,
- (unsigned long long)__entry->start,
- (unsigned long long)__entry->len,
- (unsigned long long)__entry->orig_start,
+ __entry->ino,
+ __entry->start,
+ __entry->len,
+ __entry->orig_start,
show_map_type(__entry->block_start),
- (unsigned long long)__entry->block_len,
+ __entry->block_len,
show_map_flags(__entry->flags),
__entry->refs, __entry->compress_type)
);
TRACE_EVENT(btrfs_handle_em_exist,
- TP_PROTO(const struct extent_map *existing, const struct extent_map *map, u64 start, u64 len),
+ TP_PROTO(struct btrfs_fs_info *fs_info,
+ const struct extent_map *existing, const struct extent_map *map,
+ u64 start, u64 len),
- TP_ARGS(existing, map, start, len),
+ TP_ARGS(fs_info, existing, map, start, len),
- TP_STRUCT__entry(
+ TP_STRUCT__entry_btrfs(
__field( u64, e_start )
__field( u64, e_len )
__field( u64, map_start )
@@ -269,7 +271,7 @@ TRACE_EVENT(btrfs_handle_em_exist,
__field( u64, len )
),
- TP_fast_assign(
+ TP_fast_assign_btrfs(fs_info,
__entry->e_start = existing->start;
__entry->e_len = existing->len;
__entry->map_start = map->start;
@@ -278,15 +280,15 @@ TRACE_EVENT(btrfs_handle_em_exist,
__entry->len = len;
),
- TP_printk("start=%llu len=%llu "
+ TP_printk_btrfs("start=%llu len=%llu "
"existing(start=%llu len=%llu) "
"em(start=%llu len=%llu)",
- (unsigned long long)__entry->start,
- (unsigned long long)__entry->len,
- (unsigned long long)__entry->e_start,
- (unsigned long long)__entry->e_len,
- (unsigned long long)__entry->map_start,
- (unsigned long long)__entry->map_len)
+ __entry->start,
+ __entry->len,
+ __entry->e_start,
+ __entry->e_len,
+ __entry->map_start,
+ __entry->map_len)
);
/* file extent item */
@@ -443,7 +445,7 @@ DECLARE_EVENT_CLASS(btrfs__ordered_extent,
TP_ARGS(inode, ordered),
TP_STRUCT__entry_btrfs(
- __field( ino_t, ino )
+ __field( u64, ino )
__field( u64, file_offset )
__field( u64, start )
__field( u64, len )
@@ -457,7 +459,7 @@ DECLARE_EVENT_CLASS(btrfs__ordered_extent,
),
TP_fast_assign_btrfs(btrfs_sb(inode->i_sb),
- __entry->ino = inode->i_ino;
+ __entry->ino = btrfs_ino(BTRFS_I(inode));
__entry->file_offset = ordered->file_offset;
__entry->start = ordered->start;
__entry->len = ordered->len;
@@ -477,13 +479,13 @@ DECLARE_EVENT_CLASS(btrfs__ordered_extent,
"bytes_left=%llu flags=%s compress_type=%d "
"refs=%d",
show_root_type(__entry->root_objectid),
- (unsigned long long)__entry->ino,
- (unsigned long long)__entry->file_offset,
- (unsigned long long)__entry->start,
- (unsigned long long)__entry->len,
- (unsigned long long)__entry->disk_len,
- (unsigned long long)__entry->truncated_len,
- (unsigned long long)__entry->bytes_left,
+ __entry->ino,
+ __entry->file_offset,
+ __entry->start,
+ __entry->len,
+ __entry->disk_len,
+ __entry->truncated_len,
+ __entry->bytes_left,
show_ordered_flags(__entry->flags),
__entry->compress_type, __entry->refs)
);
@@ -528,7 +530,7 @@ DECLARE_EVENT_CLASS(btrfs__writepage,
TP_ARGS(page, inode, wbc),
TP_STRUCT__entry_btrfs(
- __field( ino_t, ino )
+ __field( u64, ino )
__field( pgoff_t, index )
__field( long, nr_to_write )
__field( long, pages_skipped )
@@ -542,7 +544,7 @@ DECLARE_EVENT_CLASS(btrfs__writepage,
),
TP_fast_assign_btrfs(btrfs_sb(inode->i_sb),
- __entry->ino = inode->i_ino;
+ __entry->ino = btrfs_ino(BTRFS_I(inode));
__entry->index = page->index;
__entry->nr_to_write = wbc->nr_to_write;
__entry->pages_skipped = wbc->pages_skipped;
@@ -556,12 +558,12 @@ DECLARE_EVENT_CLASS(btrfs__writepage,
BTRFS_I(inode)->root->root_key.objectid;
),
- TP_printk_btrfs("root=%llu(%s) ino=%lu page_index=%lu "
+ TP_printk_btrfs("root=%llu(%s) ino=%llu page_index=%lu "
"nr_to_write=%ld pages_skipped=%ld range_start=%llu "
"range_end=%llu for_kupdate=%d "
"for_reclaim=%d range_cyclic=%d writeback_index=%lu",
show_root_type(__entry->root_objectid),
- (unsigned long)__entry->ino, __entry->index,
+ __entry->ino, __entry->index,
__entry->nr_to_write, __entry->pages_skipped,
__entry->range_start, __entry->range_end,
__entry->for_kupdate,
@@ -584,7 +586,7 @@ TRACE_EVENT(btrfs_writepage_end_io_hook,
TP_ARGS(page, start, end, uptodate),
TP_STRUCT__entry_btrfs(
- __field( ino_t, ino )
+ __field( u64, ino )
__field( pgoff_t, index )
__field( u64, start )
__field( u64, end )
@@ -593,7 +595,7 @@ TRACE_EVENT(btrfs_writepage_end_io_hook,
),
TP_fast_assign_btrfs(btrfs_sb(page->mapping->host->i_sb),
- __entry->ino = page->mapping->host->i_ino;
+ __entry->ino = btrfs_ino(BTRFS_I(page->mapping->host));
__entry->index = page->index;
__entry->start = start;
__entry->end = end;
@@ -602,12 +604,12 @@ TRACE_EVENT(btrfs_writepage_end_io_hook,
BTRFS_I(page->mapping->host)->root->root_key.objectid;
),
- TP_printk_btrfs("root=%llu(%s) ino=%lu page_index=%lu start=%llu "
+ TP_printk_btrfs("root=%llu(%s) ino=%llu page_index=%lu start=%llu "
"end=%llu uptodate=%d",
show_root_type(__entry->root_objectid),
- (unsigned long)__entry->ino, (unsigned long)__entry->index,
- (unsigned long long)__entry->start,
- (unsigned long long)__entry->end, __entry->uptodate)
+ __entry->ino, (unsigned long)__entry->index,
+ __entry->start,
+ __entry->end, __entry->uptodate)
);
TRACE_EVENT(btrfs_sync_file,
@@ -617,8 +619,8 @@ TRACE_EVENT(btrfs_sync_file,
TP_ARGS(file, datasync),
TP_STRUCT__entry_btrfs(
- __field( ino_t, ino )
- __field( ino_t, parent )
+ __field( u64, ino )
+ __field( u64, parent )
__field( int, datasync )
__field( u64, root_objectid )
),
@@ -628,16 +630,17 @@ TRACE_EVENT(btrfs_sync_file,
const struct inode *inode = d_inode(dentry);
TP_fast_assign_fsid(btrfs_sb(file->f_path.dentry->d_sb));
- __entry->ino = inode->i_ino;
- __entry->parent = d_inode(dentry->d_parent)->i_ino;
+ __entry->ino = btrfs_ino(BTRFS_I(inode));
+ __entry->parent = btrfs_ino(BTRFS_I(d_inode(dentry->d_parent)));
__entry->datasync = datasync;
__entry->root_objectid =
BTRFS_I(inode)->root->root_key.objectid;
),
- TP_printk_btrfs("root=%llu(%s) ino=%ld parent=%ld datasync=%d",
+ TP_printk_btrfs("root=%llu(%s) ino=%llu parent=%llu datasync=%d",
show_root_type(__entry->root_objectid),
- (unsigned long)__entry->ino, (unsigned long)__entry->parent,
+ __entry->ino,
+ __entry->parent,
__entry->datasync)
);
@@ -655,7 +658,7 @@ TRACE_EVENT(btrfs_sync_fs,
__entry->wait = wait;
),
- TP_printk_btrfs("wait = %d", __entry->wait)
+ TP_printk_btrfs("wait=%d", __entry->wait)
);
TRACE_EVENT(btrfs_add_block_group,
@@ -665,8 +668,7 @@ TRACE_EVENT(btrfs_add_block_group,
TP_ARGS(fs_info, block_group, create),
- TP_STRUCT__entry(
- __array( u8, fsid, BTRFS_FSID_SIZE )
+ TP_STRUCT__entry_btrfs(
__field( u64, offset )
__field( u64, size )
__field( u64, flags )
@@ -675,8 +677,7 @@ TRACE_EVENT(btrfs_add_block_group,
__field( int, create )
),
- TP_fast_assign(
- memcpy(__entry->fsid, fs_info->fsid, BTRFS_FSID_SIZE);
+ TP_fast_assign_btrfs(fs_info,
__entry->offset = block_group->key.objectid;
__entry->size = block_group->key.offset;
__entry->flags = block_group->flags;
@@ -686,16 +687,16 @@ TRACE_EVENT(btrfs_add_block_group,
__entry->create = create;
),
- TP_printk("%pU: block_group offset=%llu size=%llu "
+ TP_printk_btrfs("block_group offset=%llu size=%llu "
"flags=%llu(%s) bytes_used=%llu bytes_super=%llu "
- "create=%d", __entry->fsid,
- (unsigned long long)__entry->offset,
- (unsigned long long)__entry->size,
- (unsigned long long)__entry->flags,
+ "create=%d",
+ __entry->offset,
+ __entry->size,
+ __entry->flags,
__print_flags((unsigned long)__entry->flags, "|",
BTRFS_GROUP_FLAGS),
- (unsigned long long)__entry->bytes_used,
- (unsigned long long)__entry->bytes_super, __entry->create)
+ __entry->bytes_used,
+ __entry->bytes_super, __entry->create)
);
#define show_ref_action(action) \
@@ -740,13 +741,13 @@ DECLARE_EVENT_CLASS(btrfs_delayed_tree_ref,
TP_printk_btrfs("bytenr=%llu num_bytes=%llu action=%s "
"parent=%llu(%s) ref_root=%llu(%s) level=%d "
"type=%s seq=%llu",
- (unsigned long long)__entry->bytenr,
- (unsigned long long)__entry->num_bytes,
+ __entry->bytenr,
+ __entry->num_bytes,
show_ref_action(__entry->action),
show_root_type(__entry->parent),
show_root_type(__entry->ref_root),
__entry->level, show_ref_type(__entry->type),
- (unsigned long long)__entry->seq)
+ __entry->seq)
);
DEFINE_EVENT(btrfs_delayed_tree_ref, add_delayed_tree_ref,
@@ -805,15 +806,15 @@ DECLARE_EVENT_CLASS(btrfs_delayed_data_ref,
TP_printk_btrfs("bytenr=%llu num_bytes=%llu action=%s "
"parent=%llu(%s) ref_root=%llu(%s) owner=%llu "
"offset=%llu type=%s seq=%llu",
- (unsigned long long)__entry->bytenr,
- (unsigned long long)__entry->num_bytes,
+ __entry->bytenr,
+ __entry->num_bytes,
show_ref_action(__entry->action),
show_root_type(__entry->parent),
show_root_type(__entry->ref_root),
- (unsigned long long)__entry->owner,
- (unsigned long long)__entry->offset,
+ __entry->owner,
+ __entry->offset,
show_ref_type(__entry->type),
- (unsigned long long)__entry->seq)
+ __entry->seq)
);
DEFINE_EVENT(btrfs_delayed_data_ref, add_delayed_data_ref,
@@ -859,8 +860,8 @@ DECLARE_EVENT_CLASS(btrfs_delayed_ref_head,
),
TP_printk_btrfs("bytenr=%llu num_bytes=%llu action=%s is_data=%d",
- (unsigned long long)__entry->bytenr,
- (unsigned long long)__entry->num_bytes,
+ __entry->bytenr,
+ __entry->num_bytes,
show_ref_action(__entry->action),
__entry->is_data)
);
@@ -923,8 +924,8 @@ DECLARE_EVENT_CLASS(btrfs__chunk,
TP_printk_btrfs("root=%llu(%s) offset=%llu size=%llu "
"num_stripes=%d sub_stripes=%d type=%s",
show_root_type(__entry->root_objectid),
- (unsigned long long)__entry->offset,
- (unsigned long long)__entry->size,
+ __entry->offset,
+ __entry->size,
__entry->num_stripes, __entry->sub_stripes,
show_chunk_type(__entry->type))
);
@@ -974,9 +975,9 @@ TRACE_EVENT(btrfs_cow_block,
"(orig_level=%d) cow_buf=%llu (cow_level=%d)",
show_root_type(__entry->root_objectid),
__entry->refs,
- (unsigned long long)__entry->buf_start,
+ __entry->buf_start,
__entry->buf_level,
- (unsigned long long)__entry->cow_start,
+ __entry->cow_start,
__entry->cow_level)
);
@@ -1001,7 +1002,7 @@ TRACE_EVENT(btrfs_space_reservation,
__entry->reserve = reserve;
),
- TP_printk_btrfs("%s: %Lu %s %Lu", __get_str(type), __entry->val,
+ TP_printk_btrfs("%s: %llu %s %llu", __get_str(type), __entry->val,
__entry->reserve ? "reserve" : "release",
__entry->bytes)
);
@@ -1019,29 +1020,27 @@ TRACE_EVENT(btrfs_trigger_flush,
TP_ARGS(fs_info, flags, bytes, flush, reason),
- TP_STRUCT__entry(
- __array( u8, fsid, BTRFS_FSID_SIZE )
+ TP_STRUCT__entry_btrfs(
__field( u64, flags )
__field( u64, bytes )
__field( int, flush )
__string( reason, reason )
),
- TP_fast_assign(
- memcpy(__entry->fsid, fs_info->fsid, BTRFS_FSID_SIZE);
+ TP_fast_assign_btrfs(fs_info,
__entry->flags = flags;
__entry->bytes = bytes;
__entry->flush = flush;
__assign_str(reason, reason)
),
- TP_printk("%pU: %s: flush=%d(%s) flags=%llu(%s) bytes=%llu",
- __entry->fsid, __get_str(reason), __entry->flush,
+ TP_printk_btrfs("%s: flush=%d(%s) flags=%llu(%s) bytes=%llu",
+ __get_str(reason), __entry->flush,
show_flush_action(__entry->flush),
- (unsigned long long)__entry->flags,
+ __entry->flags,
__print_flags((unsigned long)__entry->flags, "|",
BTRFS_GROUP_FLAGS),
- (unsigned long long)__entry->bytes)
+ __entry->bytes)
);
#define show_flush_state(state) \
@@ -1060,29 +1059,27 @@ TRACE_EVENT(btrfs_flush_space,
TP_ARGS(fs_info, flags, num_bytes, state, ret),
- TP_STRUCT__entry(
- __array( u8, fsid, BTRFS_FSID_SIZE )
+ TP_STRUCT__entry_btrfs(
__field( u64, flags )
__field( u64, num_bytes )
__field( int, state )
__field( int, ret )
),
- TP_fast_assign(
- memcpy(__entry->fsid, fs_info->fsid, BTRFS_FSID_SIZE);
+ TP_fast_assign_btrfs(fs_info,
__entry->flags = flags;
__entry->num_bytes = num_bytes;
__entry->state = state;
__entry->ret = ret;
),
- TP_printk("%pU: state=%d(%s) flags=%llu(%s) num_bytes=%llu ret=%d",
- __entry->fsid, __entry->state,
+ TP_printk_btrfs("state=%d(%s) flags=%llu(%s) num_bytes=%llu ret=%d",
+ __entry->state,
show_flush_state(__entry->state),
- (unsigned long long)__entry->flags,
+ __entry->flags,
__print_flags((unsigned long)__entry->flags, "|",
BTRFS_GROUP_FLAGS),
- (unsigned long long)__entry->num_bytes, __entry->ret)
+ __entry->num_bytes, __entry->ret)
);
DECLARE_EVENT_CLASS(btrfs__reserved_extent,
@@ -1103,8 +1100,8 @@ DECLARE_EVENT_CLASS(btrfs__reserved_extent,
TP_printk_btrfs("root=%llu(%s) start=%llu len=%llu",
show_root_type(BTRFS_EXTENT_TREE_OBJECTID),
- (unsigned long long)__entry->start,
- (unsigned long long)__entry->len)
+ __entry->start,
+ __entry->len)
);
DEFINE_EVENT(btrfs__reserved_extent, btrfs_reserved_extent_alloc,
@@ -1140,7 +1137,7 @@ TRACE_EVENT(find_free_extent,
__entry->data = data;
),
- TP_printk_btrfs("root=%Lu(%s) len=%Lu empty_size=%Lu flags=%Lu(%s)",
+ TP_printk_btrfs("root=%llu(%s) len=%llu empty_size=%llu flags=%llu(%s)",
show_root_type(BTRFS_EXTENT_TREE_OBJECTID),
__entry->num_bytes, __entry->empty_size, __entry->data,
__print_flags((unsigned long)__entry->data, "|",
@@ -1149,11 +1146,10 @@ TRACE_EVENT(find_free_extent,
DECLARE_EVENT_CLASS(btrfs__reserve_extent,
- TP_PROTO(const struct btrfs_fs_info *fs_info,
- const struct btrfs_block_group_cache *block_group, u64 start,
+ TP_PROTO(const struct btrfs_block_group_cache *block_group, u64 start,
u64 len),
- TP_ARGS(fs_info, block_group, start, len),
+ TP_ARGS(block_group, start, len),
TP_STRUCT__entry_btrfs(
__field( u64, bg_objectid )
@@ -1162,15 +1158,15 @@ DECLARE_EVENT_CLASS(btrfs__reserve_extent,
__field( u64, len )
),
- TP_fast_assign_btrfs(fs_info,
+ TP_fast_assign_btrfs(block_group->fs_info,
__entry->bg_objectid = block_group->key.objectid;
__entry->flags = block_group->flags;
__entry->start = start;
__entry->len = len;
),
- TP_printk_btrfs("root=%Lu(%s) block_group=%Lu flags=%Lu(%s) "
- "start=%Lu len=%Lu",
+ TP_printk_btrfs("root=%llu(%s) block_group=%llu flags=%llu(%s) "
+ "start=%llu len=%llu",
show_root_type(BTRFS_EXTENT_TREE_OBJECTID),
__entry->bg_objectid,
__entry->flags, __print_flags((unsigned long)__entry->flags,
@@ -1180,20 +1176,18 @@ DECLARE_EVENT_CLASS(btrfs__reserve_extent,
DEFINE_EVENT(btrfs__reserve_extent, btrfs_reserve_extent,
- TP_PROTO(const struct btrfs_fs_info *fs_info,
- const struct btrfs_block_group_cache *block_group, u64 start,
+ TP_PROTO(const struct btrfs_block_group_cache *block_group, u64 start,
u64 len),
- TP_ARGS(fs_info, block_group, start, len)
+ TP_ARGS(block_group, start, len)
);
DEFINE_EVENT(btrfs__reserve_extent, btrfs_reserve_extent_cluster,
- TP_PROTO(const struct btrfs_fs_info *fs_info,
- const struct btrfs_block_group_cache *block_group, u64 start,
+ TP_PROTO(const struct btrfs_block_group_cache *block_group, u64 start,
u64 len),
- TP_ARGS(fs_info, block_group, start, len)
+ TP_ARGS(block_group, start, len)
);
TRACE_EVENT(btrfs_find_cluster,
@@ -1221,8 +1215,8 @@ TRACE_EVENT(btrfs_find_cluster,
__entry->min_bytes = min_bytes;
),
- TP_printk_btrfs("block_group=%Lu flags=%Lu(%s) start=%Lu len=%Lu "
- "empty_size=%Lu min_bytes=%Lu", __entry->bg_objectid,
+ TP_printk_btrfs("block_group=%llu flags=%llu(%s) start=%llu len=%llu "
+ "empty_size=%llu min_bytes=%llu", __entry->bg_objectid,
__entry->flags,
__print_flags((unsigned long)__entry->flags, "|",
BTRFS_GROUP_FLAGS), __entry->start,
@@ -1243,7 +1237,7 @@ TRACE_EVENT(btrfs_failed_cluster_setup,
__entry->bg_objectid = block_group->key.objectid;
),
- TP_printk_btrfs("block_group=%Lu", __entry->bg_objectid)
+ TP_printk_btrfs("block_group=%llu", __entry->bg_objectid)
);
TRACE_EVENT(btrfs_setup_cluster,
@@ -1272,8 +1266,8 @@ TRACE_EVENT(btrfs_setup_cluster,
__entry->bitmap = bitmap;
),
- TP_printk_btrfs("block_group=%Lu flags=%Lu(%s) window_start=%Lu "
- "size=%Lu max_size=%Lu bitmap=%d",
+ TP_printk_btrfs("block_group=%llu flags=%llu(%s) window_start=%llu "
+ "size=%llu max_size=%llu bitmap=%d",
__entry->bg_objectid,
__entry->flags,
__print_flags((unsigned long)__entry->flags, "|",
@@ -1476,7 +1470,7 @@ DECLARE_EVENT_CLASS(btrfs__qgroup_rsv_data,
TP_STRUCT__entry_btrfs(
__field( u64, rootid )
- __field( unsigned long, ino )
+ __field( u64, ino )
__field( u64, start )
__field( u64, len )
__field( u64, reserved )
@@ -1485,14 +1479,14 @@ DECLARE_EVENT_CLASS(btrfs__qgroup_rsv_data,
TP_fast_assign_btrfs(btrfs_sb(inode->i_sb),
__entry->rootid = BTRFS_I(inode)->root->objectid;
- __entry->ino = inode->i_ino;
+ __entry->ino = btrfs_ino(BTRFS_I(inode));
__entry->start = start;
__entry->len = len;
__entry->reserved = reserved;
__entry->op = op;
),
- TP_printk_btrfs("root=%llu ino=%lu start=%llu len=%llu reserved=%llu op=%s",
+ TP_printk_btrfs("root=%llu ino=%llu start=%llu len=%llu reserved=%llu op=%s",
__entry->rootid, __entry->ino, __entry->start, __entry->len,
__entry->reserved,
__print_flags((unsigned long)__entry->op, "",
@@ -1584,12 +1578,14 @@ DEFINE_EVENT(btrfs_qgroup_extent, btrfs_qgroup_trace_extent,
TRACE_EVENT(btrfs_qgroup_account_extent,
- TP_PROTO(const struct btrfs_fs_info *fs_info, u64 bytenr,
+ TP_PROTO(const struct btrfs_fs_info *fs_info, u64 transid, u64 bytenr,
u64 num_bytes, u64 nr_old_roots, u64 nr_new_roots),
- TP_ARGS(fs_info, bytenr, num_bytes, nr_old_roots, nr_new_roots),
+ TP_ARGS(fs_info, transid, bytenr, num_bytes, nr_old_roots,
+ nr_new_roots),
TP_STRUCT__entry_btrfs(
+ __field( u64, transid )
__field( u64, bytenr )
__field( u64, num_bytes )
__field( u64, nr_old_roots )
@@ -1597,43 +1593,49 @@ TRACE_EVENT(btrfs_qgroup_account_extent,
),
TP_fast_assign_btrfs(fs_info,
+ __entry->transid = transid;
__entry->bytenr = bytenr;
__entry->num_bytes = num_bytes;
__entry->nr_old_roots = nr_old_roots;
__entry->nr_new_roots = nr_new_roots;
),
- TP_printk_btrfs("bytenr=%llu num_bytes=%llu nr_old_roots=%llu "
- "nr_new_roots=%llu",
- __entry->bytenr,
- __entry->num_bytes,
- __entry->nr_old_roots,
- __entry->nr_new_roots)
+ TP_printk_btrfs(
+"transid=%llu bytenr=%llu num_bytes=%llu nr_old_roots=%llu nr_new_roots=%llu",
+ __entry->transid,
+ __entry->bytenr,
+ __entry->num_bytes,
+ __entry->nr_old_roots,
+ __entry->nr_new_roots)
);
TRACE_EVENT(qgroup_update_counters,
- TP_PROTO(const struct btrfs_fs_info *fs_info, u64 qgid,
+ TP_PROTO(const struct btrfs_fs_info *fs_info,
+ struct btrfs_qgroup *qgroup,
u64 cur_old_count, u64 cur_new_count),
- TP_ARGS(fs_info, qgid, cur_old_count, cur_new_count),
+ TP_ARGS(fs_info, qgroup, cur_old_count, cur_new_count),
TP_STRUCT__entry_btrfs(
__field( u64, qgid )
+ __field( u64, old_rfer )
+ __field( u64, old_excl )
__field( u64, cur_old_count )
__field( u64, cur_new_count )
),
TP_fast_assign_btrfs(fs_info,
- __entry->qgid = qgid;
+ __entry->qgid = qgroup->qgroupid;
+ __entry->old_rfer = qgroup->rfer;
+ __entry->old_excl = qgroup->excl;
__entry->cur_old_count = cur_old_count;
__entry->cur_new_count = cur_new_count;
),
- TP_printk_btrfs("qgid=%llu cur_old_count=%llu cur_new_count=%llu",
- __entry->qgid,
- __entry->cur_old_count,
- __entry->cur_new_count)
+ TP_printk_btrfs("qgid=%llu old_rfer=%llu old_excl=%llu cur_old_count=%llu cur_new_count=%llu",
+ __entry->qgid, __entry->old_rfer, __entry->old_excl,
+ __entry->cur_old_count, __entry->cur_new_count)
);
TRACE_EVENT(qgroup_update_reserve,
@@ -1765,14 +1767,14 @@ DECLARE_EVENT_CLASS(btrfs__prelim_ref,
),
TP_printk_btrfs("root_id=%llu key=[%llu,%u,%llu] level=%d count=[%d+%d=%d] parent=%llu wanted_disk_byte=%llu nodes=%llu",
- (unsigned long long)__entry->root_id,
- (unsigned long long)__entry->objectid, __entry->type,
- (unsigned long long)__entry->offset, __entry->level,
+ __entry->root_id,
+ __entry->objectid, __entry->type,
+ __entry->offset, __entry->level,
__entry->old_count, __entry->mod_count,
__entry->old_count + __entry->mod_count,
- (unsigned long long)__entry->parent,
- (unsigned long long)__entry->bytenr,
- (unsigned long long)__entry->tree_size)
+ __entry->parent,
+ __entry->bytenr,
+ __entry->tree_size)
);
DEFINE_EVENT(btrfs__prelim_ref, btrfs_prelim_ref_merge,
@@ -1808,8 +1810,51 @@ TRACE_EVENT(btrfs_inode_mod_outstanding_extents,
TP_printk_btrfs("root=%llu(%s) ino=%llu mod=%d",
show_root_type(__entry->root_objectid),
- (unsigned long long)__entry->ino, __entry->mod)
+ __entry->ino, __entry->mod)
);
+
+DECLARE_EVENT_CLASS(btrfs__block_group,
+ TP_PROTO(const struct btrfs_block_group_cache *bg_cache),
+
+ TP_ARGS(bg_cache),
+
+ TP_STRUCT__entry_btrfs(
+ __field( u64, bytenr )
+ __field( u64, len )
+ __field( u64, used )
+ __field( u64, flags )
+ ),
+
+ TP_fast_assign_btrfs(bg_cache->fs_info,
+ __entry->bytenr = bg_cache->key.objectid,
+ __entry->len = bg_cache->key.offset,
+ __entry->used = btrfs_block_group_used(&bg_cache->item);
+ __entry->flags = bg_cache->flags;
+ ),
+
+ TP_printk_btrfs("bg bytenr=%llu len=%llu used=%llu flags=%llu(%s)",
+ __entry->bytenr, __entry->len, __entry->used, __entry->flags,
+ __print_flags(__entry->flags, "|", BTRFS_GROUP_FLAGS))
+);
+
+DEFINE_EVENT(btrfs__block_group, btrfs_remove_block_group,
+ TP_PROTO(const struct btrfs_block_group_cache *bg_cache),
+
+ TP_ARGS(bg_cache)
+);
+
+DEFINE_EVENT(btrfs__block_group, btrfs_add_unused_block_group,
+ TP_PROTO(const struct btrfs_block_group_cache *bg_cache),
+
+ TP_ARGS(bg_cache)
+);
+
+DEFINE_EVENT(btrfs__block_group, btrfs_skip_unused_block_group,
+ TP_PROTO(const struct btrfs_block_group_cache *bg_cache),
+
+ TP_ARGS(bg_cache)
+);
+
#endif /* _TRACE_BTRFS_H */
/* This part must be outside protection */
diff --git a/include/trace/events/initcall.h b/include/trace/events/initcall.h
index 8d6cf10d27c9..eb903c3f195f 100644
--- a/include/trace/events/initcall.h
+++ b/include/trace/events/initcall.h
@@ -31,7 +31,11 @@ TRACE_EVENT(initcall_start,
TP_ARGS(func),
TP_STRUCT__entry(
- __field(initcall_t, func)
+ /*
+ * Use field_struct to avoid is_signed_type()
+ * comparison of a function pointer
+ */
+ __field_struct(initcall_t, func)
),
TP_fast_assign(
@@ -48,8 +52,12 @@ TRACE_EVENT(initcall_finish,
TP_ARGS(func, ret),
TP_STRUCT__entry(
- __field(initcall_t, func)
- __field(int, ret)
+ /*
+ * Use field_struct to avoid is_signed_type()
+ * comparison of a function pointer
+ */
+ __field_struct(initcall_t, func)
+ __field(int, ret)
),
TP_fast_assign(
diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h
index d8c33298c153..5936aac357ab 100644
--- a/include/trace/events/rcu.h
+++ b/include/trace/events/rcu.h
@@ -84,20 +84,21 @@ TRACE_EVENT(rcu_grace_period,
);
/*
- * Tracepoint for future grace-period events, including those for no-callbacks
- * CPUs. The caller should pull the data from the rcu_node structure,
- * other than rcuname, which comes from the rcu_state structure, and event,
- * which is one of the following:
+ * Tracepoint for future grace-period events. The caller should pull
+ * the data from the rcu_node structure, other than rcuname, which comes
+ * from the rcu_state structure, and event, which is one of the following:
*
- * "Startleaf": Request a nocb grace period based on leaf-node data.
+ * "Startleaf": Request a grace period based on leaf-node data.
+ * "Prestarted": Someone beat us to the request
* "Startedleaf": Leaf-node start proved sufficient.
* "Startedleafroot": Leaf-node start proved sufficient after checking root.
* "Startedroot": Requested a nocb grace period based on root-node data.
+ * "NoGPkthread": The RCU grace-period kthread has not yet started.
* "StartWait": Start waiting for the requested grace period.
* "ResumeWait": Resume waiting after signal.
* "EndWait": Complete wait.
* "Cleanup": Clean up rcu_node structure after previous GP.
- * "CleanupMore": Clean up, and another no-CB GP is needed.
+ * "CleanupMore": Clean up, and another GP is needed.
*/
TRACE_EVENT(rcu_future_grace_period,
diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h
index 9e96c2fe2793..077e664ac9a2 100644
--- a/include/trace/events/rxrpc.h
+++ b/include/trace/events/rxrpc.h
@@ -15,6 +15,7 @@
#define _TRACE_RXRPC_H
#include <linux/tracepoint.h>
+#include <linux/errqueue.h>
/*
* Define enums for tracing information.
@@ -210,6 +211,20 @@ enum rxrpc_congest_change {
rxrpc_cong_saw_nack,
};
+enum rxrpc_tx_fail_trace {
+ rxrpc_tx_fail_call_abort,
+ rxrpc_tx_fail_call_ack,
+ rxrpc_tx_fail_call_data_frag,
+ rxrpc_tx_fail_call_data_nofrag,
+ rxrpc_tx_fail_call_final_resend,
+ rxrpc_tx_fail_conn_abort,
+ rxrpc_tx_fail_conn_challenge,
+ rxrpc_tx_fail_conn_response,
+ rxrpc_tx_fail_reject,
+ rxrpc_tx_fail_version_keepalive,
+ rxrpc_tx_fail_version_reply,
+};
+
#endif /* end __RXRPC_DECLARE_TRACE_ENUMS_ONCE_ONLY */
/*
@@ -437,6 +452,19 @@ enum rxrpc_congest_change {
EM(RXRPC_CALL_LOCAL_ERROR, "LocalError") \
E_(RXRPC_CALL_NETWORK_ERROR, "NetError")
+#define rxrpc_tx_fail_traces \
+ EM(rxrpc_tx_fail_call_abort, "CallAbort") \
+ EM(rxrpc_tx_fail_call_ack, "CallAck") \
+ EM(rxrpc_tx_fail_call_data_frag, "CallDataFrag") \
+ EM(rxrpc_tx_fail_call_data_nofrag, "CallDataNofrag") \
+ EM(rxrpc_tx_fail_call_final_resend, "CallFinalResend") \
+ EM(rxrpc_tx_fail_conn_abort, "ConnAbort") \
+ EM(rxrpc_tx_fail_conn_challenge, "ConnChall") \
+ EM(rxrpc_tx_fail_conn_response, "ConnResp") \
+ EM(rxrpc_tx_fail_reject, "Reject") \
+ EM(rxrpc_tx_fail_version_keepalive, "VerKeepalive") \
+ E_(rxrpc_tx_fail_version_reply, "VerReply")
+
/*
* Export enum symbols via userspace.
*/
@@ -460,6 +488,7 @@ rxrpc_propose_ack_traces;
rxrpc_propose_ack_outcomes;
rxrpc_congest_modes;
rxrpc_congest_changes;
+rxrpc_tx_fail_traces;
/*
* Now redefine the EM() and E_() macros to map the enums to the strings that
@@ -1374,6 +1403,62 @@ TRACE_EVENT(rxrpc_resend,
__entry->anno)
);
+TRACE_EVENT(rxrpc_rx_icmp,
+ TP_PROTO(struct rxrpc_peer *peer, struct sock_extended_err *ee,
+ struct sockaddr_rxrpc *srx),
+
+ TP_ARGS(peer, ee, srx),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, peer )
+ __field_struct(struct sock_extended_err, ee )
+ __field_struct(struct sockaddr_rxrpc, srx )
+ ),
+
+ TP_fast_assign(
+ __entry->peer = peer->debug_id;
+ memcpy(&__entry->ee, ee, sizeof(__entry->ee));
+ memcpy(&__entry->srx, srx, sizeof(__entry->srx));
+ ),
+
+ TP_printk("P=%08x o=%u t=%u c=%u i=%u d=%u e=%d %pISp",
+ __entry->peer,
+ __entry->ee.ee_origin,
+ __entry->ee.ee_type,
+ __entry->ee.ee_code,
+ __entry->ee.ee_info,
+ __entry->ee.ee_data,
+ __entry->ee.ee_errno,
+ &__entry->srx.transport)
+ );
+
+TRACE_EVENT(rxrpc_tx_fail,
+ TP_PROTO(unsigned int debug_id, rxrpc_serial_t serial, int ret,
+ enum rxrpc_tx_fail_trace what),
+
+ TP_ARGS(debug_id, serial, ret, what),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, debug_id )
+ __field(rxrpc_serial_t, serial )
+ __field(int, ret )
+ __field(enum rxrpc_tx_fail_trace, what )
+ ),
+
+ TP_fast_assign(
+ __entry->debug_id = debug_id;
+ __entry->serial = serial;
+ __entry->ret = ret;
+ __entry->what = what;
+ ),
+
+ TP_printk("c=%08x r=%x ret=%d %s",
+ __entry->debug_id,
+ __entry->serial,
+ __entry->ret,
+ __print_symbolic(__entry->what, rxrpc_tx_fail_traces))
+ );
+
#endif /* _TRACE_RXRPC_H */
/* This part must be outside protection */
diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
index bc01e06bc716..0be866c91f62 100644
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -435,7 +435,9 @@ TRACE_EVENT(sched_pi_setprio,
memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
__entry->pid = tsk->pid;
__entry->oldprio = tsk->prio;
- __entry->newprio = pi_task ? pi_task->prio : tsk->prio;
+ __entry->newprio = pi_task ?
+ min(tsk->normal_prio, pi_task->prio) :
+ tsk->normal_prio;
/* XXX SCHED_DEADLINE bits missing */
),
diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h
index 335d87242439..bbb08a3ef5cc 100644
--- a/include/trace/events/sunrpc.h
+++ b/include/trace/events/sunrpc.h
@@ -224,6 +224,8 @@ TRACE_EVENT(rpc_stats_latency,
TP_ARGS(task, backlog, rtt, execute),
TP_STRUCT__entry(
+ __field(unsigned int, task_id)
+ __field(unsigned int, client_id)
__field(u32, xid)
__field(int, version)
__string(progname, task->tk_client->cl_program->name)
@@ -231,13 +233,11 @@ TRACE_EVENT(rpc_stats_latency,
__field(unsigned long, backlog)
__field(unsigned long, rtt)
__field(unsigned long, execute)
- __string(addr,
- task->tk_xprt->address_strings[RPC_DISPLAY_ADDR])
- __string(port,
- task->tk_xprt->address_strings[RPC_DISPLAY_PORT])
),
TP_fast_assign(
+ __entry->client_id = task->tk_client->cl_clid;
+ __entry->task_id = task->tk_pid;
__entry->xid = be32_to_cpu(task->tk_rqstp->rq_xid);
__entry->version = task->tk_client->cl_vers;
__assign_str(progname, task->tk_client->cl_program->name)
@@ -245,14 +245,10 @@ TRACE_EVENT(rpc_stats_latency,
__entry->backlog = ktime_to_us(backlog);
__entry->rtt = ktime_to_us(rtt);
__entry->execute = ktime_to_us(execute);
- __assign_str(addr,
- task->tk_xprt->address_strings[RPC_DISPLAY_ADDR]);
- __assign_str(port,
- task->tk_xprt->address_strings[RPC_DISPLAY_PORT]);
),
- TP_printk("peer=[%s]:%s xid=0x%08x %sv%d %s backlog=%lu rtt=%lu execute=%lu",
- __get_str(addr), __get_str(port), __entry->xid,
+ TP_printk("task:%u@%d xid=0x%08x %sv%d %s backlog=%lu rtt=%lu execute=%lu",
+ __entry->task_id, __entry->client_id, __entry->xid,
__get_str(progname), __entry->version, __get_str(procname),
__entry->backlog, __entry->rtt, __entry->execute)
);
diff --git a/include/trace/events/xen.h b/include/trace/events/xen.h
index 7dd8f34c37df..fdcf88bcf0ea 100644
--- a/include/trace/events/xen.h
+++ b/include/trace/events/xen.h
@@ -352,22 +352,6 @@ DECLARE_EVENT_CLASS(xen_mmu_pgd,
DEFINE_XEN_MMU_PGD_EVENT(xen_mmu_pgd_pin);
DEFINE_XEN_MMU_PGD_EVENT(xen_mmu_pgd_unpin);
-TRACE_EVENT(xen_mmu_flush_tlb_all,
- TP_PROTO(int x),
- TP_ARGS(x),
- TP_STRUCT__entry(__array(char, x, 0)),
- TP_fast_assign((void)x),
- TP_printk("%s", "")
- );
-
-TRACE_EVENT(xen_mmu_flush_tlb,
- TP_PROTO(int x),
- TP_ARGS(x),
- TP_STRUCT__entry(__array(char, x, 0)),
- TP_fast_assign((void)x),
- TP_printk("%s", "")
- );
-
TRACE_EVENT(xen_mmu_flush_tlb_one_user,
TP_PROTO(unsigned long addr),
TP_ARGS(addr),
diff --git a/include/uapi/asm-generic/siginfo.h b/include/uapi/asm-generic/siginfo.h
index 558b902f18d4..80e2a7227205 100644
--- a/include/uapi/asm-generic/siginfo.h
+++ b/include/uapi/asm-generic/siginfo.h
@@ -249,7 +249,8 @@ typedef struct siginfo {
#define TRAP_TRACE 2 /* process trace trap */
#define TRAP_BRANCH 3 /* process taken branch trap */
#define TRAP_HWBKPT 4 /* hardware breakpoint/watchpoint */
-#define NSIGTRAP 4
+#define TRAP_UNK 5 /* undiagnosed trap */
+#define NSIGTRAP 5
/*
* There is an additional set of SIGTRAP si_codes used by ptrace
diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
index 8bcb186c6f67..42990676a55e 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -732,9 +732,11 @@ __SYSCALL(__NR_pkey_alloc, sys_pkey_alloc)
__SYSCALL(__NR_pkey_free, sys_pkey_free)
#define __NR_statx 291
__SYSCALL(__NR_statx, sys_statx)
+#define __NR_io_pgetevents 292
+__SC_COMP(__NR_io_pgetevents, sys_io_pgetevents, compat_sys_io_pgetevents)
#undef __NR_syscalls
-#define __NR_syscalls 292
+#define __NR_syscalls 293
/*
* 32 bit systems traditionally used different
diff --git a/include/uapi/linux/aio_abi.h b/include/uapi/linux/aio_abi.h
index a04adbc70ddf..ed0185945bb2 100644
--- a/include/uapi/linux/aio_abi.h
+++ b/include/uapi/linux/aio_abi.h
@@ -29,6 +29,7 @@
#include <linux/types.h>
#include <linux/fs.h>
+#include <linux/signal.h>
#include <asm/byteorder.h>
typedef __kernel_ulong_t aio_context_t;
@@ -38,10 +39,8 @@ enum {
IOCB_CMD_PWRITE = 1,
IOCB_CMD_FSYNC = 2,
IOCB_CMD_FDSYNC = 3,
- /* These two are experimental.
- * IOCB_CMD_PREADX = 4,
- * IOCB_CMD_POLL = 5,
- */
+ /* 4 was the experimental IOCB_CMD_PREADX */
+ IOCB_CMD_POLL = 5,
IOCB_CMD_NOOP = 6,
IOCB_CMD_PREADV = 7,
IOCB_CMD_PWRITEV = 8,
@@ -108,5 +107,10 @@ struct iocb {
#undef IFBIG
#undef IFLITTLE
+struct __aio_sigset {
+ sigset_t __user *sigmask;
+ size_t sigsetsize;
+};
+
#endif /* __LINUX__AIO_ABI_H */
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index c5ec89732a8d..8c317737ba3f 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -1017,6 +1017,7 @@ struct bpf_prog_info {
__aligned_u64 map_ids;
char name[BPF_OBJ_NAME_LEN];
__u32 ifindex;
+ __u32 :32;
__u64 netns_dev;
__u64 netns_ino;
} __attribute__((aligned(8)));
@@ -1030,6 +1031,7 @@ struct bpf_map_info {
__u32 map_flags;
char name[BPF_OBJ_NAME_LEN];
__u32 ifindex;
+ __u32 :32;
__u64 netns_dev;
__u64 netns_ino;
} __attribute__((aligned(8)));
diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h
index c8d99b9ca550..5ca1d21fc4a7 100644
--- a/include/uapi/linux/btrfs.h
+++ b/include/uapi/linux/btrfs.h
@@ -422,6 +422,21 @@ struct btrfs_ioctl_ino_lookup_args {
char name[BTRFS_INO_LOOKUP_PATH_MAX];
};
+#define BTRFS_INO_LOOKUP_USER_PATH_MAX (4080 - BTRFS_VOL_NAME_MAX - 1)
+struct btrfs_ioctl_ino_lookup_user_args {
+ /* in, inode number containing the subvolume of 'subvolid' */
+ __u64 dirid;
+ /* in */
+ __u64 treeid;
+ /* out, name of the subvolume of 'treeid' */
+ char name[BTRFS_VOL_NAME_MAX + 1];
+ /*
+ * out, constructed path from the directory with which the ioctl is
+ * called to dirid
+ */
+ char path[BTRFS_INO_LOOKUP_USER_PATH_MAX];
+};
+
/* Search criteria for the btrfs SEARCH ioctl family. */
struct btrfs_ioctl_search_key {
/*
@@ -725,6 +740,82 @@ struct btrfs_ioctl_send_args {
__u64 reserved[4]; /* in */
};
+/*
+ * Information about a fs tree root.
+ *
+ * All items are filled by the ioctl
+ */
+struct btrfs_ioctl_get_subvol_info_args {
+ /* Id of this subvolume */
+ __u64 treeid;
+
+ /* Name of this subvolume, used to get the real name at mount point */
+ char name[BTRFS_VOL_NAME_MAX + 1];
+
+ /*
+ * Id of the subvolume which contains this subvolume.
+ * Zero for top-level subvolume or a deleted subvolume.
+ */
+ __u64 parent_id;
+
+ /*
+ * Inode number of the directory which contains this subvolume.
+ * Zero for top-level subvolume or a deleted subvolume
+ */
+ __u64 dirid;
+
+ /* Latest transaction id of this subvolume */
+ __u64 generation;
+
+ /* Flags of this subvolume */
+ __u64 flags;
+
+ /* UUID of this subvolume */
+ __u8 uuid[BTRFS_UUID_SIZE];
+
+ /*
+ * UUID of the subvolume of which this subvolume is a snapshot.
+ * All zero for a non-snapshot subvolume.
+ */
+ __u8 parent_uuid[BTRFS_UUID_SIZE];
+
+ /*
+ * UUID of the subvolume from which this subvolume was received.
+ * All zero for non-received subvolume.
+ */
+ __u8 received_uuid[BTRFS_UUID_SIZE];
+
+ /* Transaction id indicating when change/create/send/receive happened */
+ __u64 ctransid;
+ __u64 otransid;
+ __u64 stransid;
+ __u64 rtransid;
+ /* Time corresponding to c/o/s/rtransid */
+ struct btrfs_ioctl_timespec ctime;
+ struct btrfs_ioctl_timespec otime;
+ struct btrfs_ioctl_timespec stime;
+ struct btrfs_ioctl_timespec rtime;
+
+ /* Must be zero */
+ __u64 reserved[8];
+};
+
+#define BTRFS_MAX_ROOTREF_BUFFER_NUM 255
+struct btrfs_ioctl_get_subvol_rootref_args {
+ /* in/out, minimum id of rootref's treeid to be searched */
+ __u64 min_treeid;
+
+ /* out */
+ struct {
+ __u64 treeid;
+ __u64 dirid;
+ } rootref[BTRFS_MAX_ROOTREF_BUFFER_NUM];
+
+ /* out, number of found items */
+ __u8 num_items;
+ __u8 align[7];
+};
+
/* Error codes as returned by the kernel */
enum btrfs_err_code {
BTRFS_ERROR_DEV_RAID1_MIN_NOT_MET = 1,
@@ -843,5 +934,11 @@ enum btrfs_err_code {
struct btrfs_ioctl_vol_args_v2)
#define BTRFS_IOC_LOGICAL_INO_V2 _IOWR(BTRFS_IOCTL_MAGIC, 59, \
struct btrfs_ioctl_logical_ino_args)
+#define BTRFS_IOC_GET_SUBVOL_INFO _IOR(BTRFS_IOCTL_MAGIC, 60, \
+ struct btrfs_ioctl_get_subvol_info_args)
+#define BTRFS_IOC_GET_SUBVOL_ROOTREF _IOWR(BTRFS_IOCTL_MAGIC, 61, \
+ struct btrfs_ioctl_get_subvol_rootref_args)
+#define BTRFS_IOC_INO_LOOKUP_USER _IOWR(BTRFS_IOCTL_MAGIC, 62, \
+ struct btrfs_ioctl_ino_lookup_user_args)
#endif /* _UAPI_LINUX_BTRFS_H */
diff --git a/include/uapi/linux/if_infiniband.h b/include/uapi/linux/if_infiniband.h
index 050b92dcf8cf..0fc33bf30e45 100644
--- a/include/uapi/linux/if_infiniband.h
+++ b/include/uapi/linux/if_infiniband.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */
+/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause) */
/*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
diff --git a/include/uapi/linux/netfilter/nf_conntrack_tcp.h b/include/uapi/linux/netfilter/nf_conntrack_tcp.h
index 74b91151d494..bcba72def817 100644
--- a/include/uapi/linux/netfilter/nf_conntrack_tcp.h
+++ b/include/uapi/linux/netfilter/nf_conntrack_tcp.h
@@ -46,6 +46,9 @@ enum tcp_conntrack {
/* Marks possibility for expected RFC5961 challenge ACK */
#define IP_CT_EXP_CHALLENGE_ACK 0x40
+/* Simultaneous open initialized */
+#define IP_CT_TCP_SIMULTANEOUS_OPEN 0x80
+
struct nf_ct_tcp_flags {
__u8 flags;
__u8 mask;
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 15daf5e2638d..271b93783d28 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -2698,6 +2698,8 @@ enum nl80211_attrs {
#define NL80211_ATTR_KEYS NL80211_ATTR_KEYS
#define NL80211_ATTR_FEATURE_FLAGS NL80211_ATTR_FEATURE_FLAGS
+#define NL80211_WIPHY_NAME_MAXLEN 64
+
#define NL80211_MAX_SUPP_RATES 32
#define NL80211_MAX_SUPP_HT_RATES 77
#define NL80211_MAX_SUPP_REG_RULES 64
diff --git a/include/uapi/linux/ppp-ioctl.h b/include/uapi/linux/ppp-ioctl.h
index b19a9c249b15..784c2e3e572e 100644
--- a/include/uapi/linux/ppp-ioctl.h
+++ b/include/uapi/linux/ppp-ioctl.h
@@ -106,7 +106,7 @@ struct pppol2tp_ioc_stats {
#define PPPIOCGIDLE _IOR('t', 63, struct ppp_idle) /* get idle time */
#define PPPIOCNEWUNIT _IOWR('t', 62, int) /* create new ppp unit */
#define PPPIOCATTACH _IOW('t', 61, int) /* attach to ppp unit */
-#define PPPIOCDETACH _IOW('t', 60, int) /* detach from ppp unit/chan */
+#define PPPIOCDETACH _IOW('t', 60, int) /* obsolete, do not use */
#define PPPIOCSMRRU _IOW('t', 59, int) /* set multilink MRU */
#define PPPIOCCONNECT _IOW('t', 58, int) /* connect channel to unit */
#define PPPIOCDISCONN _IO('t', 57) /* disconnect channel */
diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h
index af5f8c2df87a..db9f15f5db04 100644
--- a/include/uapi/linux/prctl.h
+++ b/include/uapi/linux/prctl.h
@@ -207,4 +207,16 @@ struct prctl_mm_map {
# define PR_SVE_VL_LEN_MASK 0xffff
# define PR_SVE_VL_INHERIT (1 << 17) /* inherit across exec */
+/* Per task speculation control */
+#define PR_GET_SPECULATION_CTRL 52
+#define PR_SET_SPECULATION_CTRL 53
+/* Speculation control variants */
+# define PR_SPEC_STORE_BYPASS 0
+/* Return and control values for PR_SET/GET_SPECULATION_CTRL */
+# define PR_SPEC_NOT_AFFECTED 0
+# define PR_SPEC_PRCTL (1UL << 0)
+# define PR_SPEC_ENABLE (1UL << 1)
+# define PR_SPEC_DISABLE (1UL << 2)
+# define PR_SPEC_FORCE_DISABLE (1UL << 3)
+
#endif /* _LINUX_PRCTL_H */
diff --git a/include/uapi/linux/rds.h b/include/uapi/linux/rds.h
index a66b213de3d7..20c6bd0b0007 100644
--- a/include/uapi/linux/rds.h
+++ b/include/uapi/linux/rds.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause) */
+/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR Linux-OpenIB) */
/*
* Copyright (c) 2008 Oracle. All rights reserved.
*
diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h
index 2a0bd9dd104d..9efc0e73d50b 100644
--- a/include/uapi/linux/seccomp.h
+++ b/include/uapi/linux/seccomp.h
@@ -17,8 +17,9 @@
#define SECCOMP_GET_ACTION_AVAIL 2
/* Valid flags for SECCOMP_SET_MODE_FILTER */
-#define SECCOMP_FILTER_FLAG_TSYNC 1
-#define SECCOMP_FILTER_FLAG_LOG 2
+#define SECCOMP_FILTER_FLAG_TSYNC (1UL << 0)
+#define SECCOMP_FILTER_FLAG_LOG (1UL << 1)
+#define SECCOMP_FILTER_FLAG_SPEC_ALLOW (1UL << 2)
/*
* All BPF programs must return a 32-bit value.
diff --git a/include/uapi/linux/signalfd.h b/include/uapi/linux/signalfd.h
index 6f0da42fc5ef..83429a05b698 100644
--- a/include/uapi/linux/signalfd.h
+++ b/include/uapi/linux/signalfd.h
@@ -35,6 +35,10 @@ struct signalfd_siginfo {
__u64 ssi_stime;
__u64 ssi_addr;
__u16 ssi_addr_lsb;
+ __u16 __pad2;
+ __s32 ssi_syscall;
+ __u64 ssi_call_addr;
+ __u32 ssi_arch;
/*
* Pad strcture to 128 bytes. Remember to update the
@@ -45,7 +49,7 @@ struct signalfd_siginfo {
* comes out of a read(2) and we really don't want to have
* a compat on read(2).
*/
- __u8 __pad[46];
+ __u8 __pad[28];
};
diff --git a/include/uapi/linux/tls.h b/include/uapi/linux/tls.h
index c6633e97eca4..ff02287495ac 100644
--- a/include/uapi/linux/tls.h
+++ b/include/uapi/linux/tls.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause) */
+/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR Linux-OpenIB) */
/*
* Copyright (c) 2016-2017, Mellanox Technologies. All rights reserved.
*
diff --git a/include/uapi/linux/types.h b/include/uapi/linux/types.h
index cd4f0b897a48..2fce8b6876e9 100644
--- a/include/uapi/linux/types.h
+++ b/include/uapi/linux/types.h
@@ -49,11 +49,7 @@ typedef __u32 __bitwise __wsum;
#define __aligned_be64 __be64 __attribute__((aligned(8)))
#define __aligned_le64 __le64 __attribute__((aligned(8)))
-#ifdef __CHECK_POLL
typedef unsigned __bitwise __poll_t;
-#else
-typedef unsigned __poll_t;
-#endif
#endif /* __ASSEMBLY__ */
#endif /* _UAPI_LINUX_TYPES_H */
diff --git a/include/uapi/rdma/cxgb3-abi.h b/include/uapi/rdma/cxgb3-abi.h
index 9acb4b7a6246..85aed672f43e 100644
--- a/include/uapi/rdma/cxgb3-abi.h
+++ b/include/uapi/rdma/cxgb3-abi.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause) */
+/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR Linux-OpenIB) */
/*
* Copyright (c) 2006 Chelsio, Inc. All rights reserved.
*
diff --git a/include/uapi/rdma/cxgb4-abi.h b/include/uapi/rdma/cxgb4-abi.h
index 1fefd0140c26..a159ba8dcf8f 100644
--- a/include/uapi/rdma/cxgb4-abi.h
+++ b/include/uapi/rdma/cxgb4-abi.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause) */
+/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR Linux-OpenIB) */
/*
* Copyright (c) 2009-2010 Chelsio, Inc. All rights reserved.
*
diff --git a/include/uapi/rdma/hns-abi.h b/include/uapi/rdma/hns-abi.h
index 7092c8de4bd8..78613b609fa8 100644
--- a/include/uapi/rdma/hns-abi.h
+++ b/include/uapi/rdma/hns-abi.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause) */
+/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR Linux-OpenIB) */
/*
* Copyright (c) 2016 Hisilicon Limited.
*
diff --git a/include/uapi/rdma/ib_user_cm.h b/include/uapi/rdma/ib_user_cm.h
index 4a8f9562f7cd..e2709bb8cb18 100644
--- a/include/uapi/rdma/ib_user_cm.h
+++ b/include/uapi/rdma/ib_user_cm.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause) */
+/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR Linux-OpenIB) */
/*
* Copyright (c) 2005 Topspin Communications. All rights reserved.
* Copyright (c) 2005 Intel Corporation. All rights reserved.
diff --git a/include/uapi/rdma/ib_user_ioctl_verbs.h b/include/uapi/rdma/ib_user_ioctl_verbs.h
index 04e46ea517d3..625545d862d7 100644
--- a/include/uapi/rdma/ib_user_ioctl_verbs.h
+++ b/include/uapi/rdma/ib_user_ioctl_verbs.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause) */
+/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR Linux-OpenIB) */
/*
* Copyright (c) 2017-2018, Mellanox Technologies inc. All rights reserved.
*
diff --git a/include/uapi/rdma/ib_user_mad.h b/include/uapi/rdma/ib_user_mad.h
index ef92118dad97..90c0cf228020 100644
--- a/include/uapi/rdma/ib_user_mad.h
+++ b/include/uapi/rdma/ib_user_mad.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause) */
+/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR Linux-OpenIB) */
/*
* Copyright (c) 2004 Topspin Communications. All rights reserved.
* Copyright (c) 2005 Voltaire, Inc. All rights reserved.
diff --git a/include/uapi/rdma/ib_user_sa.h b/include/uapi/rdma/ib_user_sa.h
index 0d2607f0cd20..435155d6e1c6 100644
--- a/include/uapi/rdma/ib_user_sa.h
+++ b/include/uapi/rdma/ib_user_sa.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause) */
+/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR Linux-OpenIB) */
/*
* Copyright (c) 2005 Intel Corporation. All rights reserved.
*
diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h
index 9be07394fdbe..6aeb03315b0b 100644
--- a/include/uapi/rdma/ib_user_verbs.h
+++ b/include/uapi/rdma/ib_user_verbs.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause) */
+/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR Linux-OpenIB) */
/*
* Copyright (c) 2005 Topspin Communications. All rights reserved.
* Copyright (c) 2005, 2006 Cisco Systems. All rights reserved.
diff --git a/include/uapi/rdma/mlx4-abi.h b/include/uapi/rdma/mlx4-abi.h
index 04f64bc4045f..f74557528175 100644
--- a/include/uapi/rdma/mlx4-abi.h
+++ b/include/uapi/rdma/mlx4-abi.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause) */
+/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR Linux-OpenIB) */
/*
* Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved.
diff --git a/include/uapi/rdma/mlx5-abi.h b/include/uapi/rdma/mlx5-abi.h
index cb4a02c4a1ce..fdaf00e20649 100644
--- a/include/uapi/rdma/mlx5-abi.h
+++ b/include/uapi/rdma/mlx5-abi.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause) */
+/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR Linux-OpenIB) */
/*
* Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
*
diff --git a/include/uapi/rdma/mthca-abi.h b/include/uapi/rdma/mthca-abi.h
index ac756cd9e807..91b12e1a6f43 100644
--- a/include/uapi/rdma/mthca-abi.h
+++ b/include/uapi/rdma/mthca-abi.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause) */
+/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR Linux-OpenIB) */
/*
* Copyright (c) 2005 Topspin Communications. All rights reserved.
* Copyright (c) 2005, 2006 Cisco Systems. All rights reserved.
diff --git a/include/uapi/rdma/nes-abi.h b/include/uapi/rdma/nes-abi.h
index 35bfd4015d07..f80495baa969 100644
--- a/include/uapi/rdma/nes-abi.h
+++ b/include/uapi/rdma/nes-abi.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause) */
+/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR Linux-OpenIB) */
/*
* Copyright (c) 2006 - 2011 Intel Corporation. All rights reserved.
* Copyright (c) 2005 Topspin Communications. All rights reserved.
diff --git a/include/uapi/rdma/qedr-abi.h b/include/uapi/rdma/qedr-abi.h
index 8ba098900e9a..24c658b3c790 100644
--- a/include/uapi/rdma/qedr-abi.h
+++ b/include/uapi/rdma/qedr-abi.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause) */
+/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR Linux-OpenIB) */
/* QLogic qedr NIC Driver
* Copyright (c) 2015-2016 QLogic Corporation
*
diff --git a/include/uapi/rdma/rdma_user_cm.h b/include/uapi/rdma/rdma_user_cm.h
index e1269024af47..0d1e78ebad05 100644
--- a/include/uapi/rdma/rdma_user_cm.h
+++ b/include/uapi/rdma/rdma_user_cm.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause) */
+/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR Linux-OpenIB) */
/*
* Copyright (c) 2005-2006 Intel Corporation. All rights reserved.
*
diff --git a/include/uapi/rdma/rdma_user_ioctl.h b/include/uapi/rdma/rdma_user_ioctl.h
index d223f4164a0f..d92d2721b28c 100644
--- a/include/uapi/rdma/rdma_user_ioctl.h
+++ b/include/uapi/rdma/rdma_user_ioctl.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause) */
+/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR Linux-OpenIB) */
/*
* Copyright (c) 2016 Mellanox Technologies, LTD. All rights reserved.
*
diff --git a/include/uapi/rdma/rdma_user_rxe.h b/include/uapi/rdma/rdma_user_rxe.h
index 1f8a9e7daea4..44ef6a3b7afc 100644
--- a/include/uapi/rdma/rdma_user_rxe.h
+++ b/include/uapi/rdma/rdma_user_rxe.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause) */
+/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR Linux-OpenIB) */
/*
* Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
*
diff --git a/init/Kconfig b/init/Kconfig
index f013afc74b11..18b151f0ddc1 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -738,7 +738,7 @@ config CFS_BANDWIDTH
tasks running within the fair group scheduler. Groups with no limit
set are considered to be unconstrained and will run with no
restriction.
- See tip/Documentation/scheduler/sched-bwc.txt for more information.
+ See Documentation/scheduler/sched-bwc.txt for more information.
config RT_GROUP_SCHED
bool "Group scheduling for SCHED_RR/FIFO"
diff --git a/init/main.c b/init/main.c
index b795aa341a3a..3b4ada11ed52 100644
--- a/init/main.c
+++ b/init/main.c
@@ -91,6 +91,7 @@
#include <linux/cache.h>
#include <linux/rodata_test.h>
#include <linux/jump_label.h>
+#include <linux/mem_encrypt.h>
#include <asm/io.h>
#include <asm/bugs.h>
@@ -423,7 +424,7 @@ static noinline void __ref rest_init(void)
/*
* Enable might_sleep() and smp_processor_id() checks.
- * They cannot be enabled earlier because with CONFIG_PRREMPT=y
+ * They cannot be enabled earlier because with CONFIG_PREEMPT=y
* kernel_thread() would trigger might_sleep() splats. With
* CONFIG_PREEMPT_VOLUNTARY=y the init task might have scheduled
* already, but it's stuck on the kthreadd_done completion.
@@ -1034,6 +1035,13 @@ __setup("rodata=", set_debug_rodata);
static void mark_readonly(void)
{
if (rodata_enabled) {
+ /*
+ * load_module() results in W+X mappings, which are cleaned up
+ * with call_rcu_sched(). Let's make sure that queued work is
+ * flushed so that we don't hit false positives looking for
+ * insecure pages which are W+X.
+ */
+ rcu_barrier_sched();
mark_rodata_ro();
rodata_test();
} else
diff --git a/ipc/shm.c b/ipc/shm.c
index 3cf48988d68c..d73269381ec7 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -1363,14 +1363,17 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg,
if (addr) {
if (addr & (shmlba - 1)) {
- /*
- * Round down to the nearest multiple of shmlba.
- * For sane do_mmap_pgoff() parameters, avoid
- * round downs that trigger nil-page and MAP_FIXED.
- */
- if ((shmflg & SHM_RND) && addr >= shmlba)
- addr &= ~(shmlba - 1);
- else
+ if (shmflg & SHM_RND) {
+ addr &= ~(shmlba - 1); /* round down */
+
+ /*
+ * Ensure that the round-down is non-nil
+ * when remapping. This can happen for
+ * cases when addr < shmlba.
+ */
+ if (!addr && (shmflg & SHM_REMAP))
+ goto out;
+ } else
#ifndef __ARCH_FORCE_SHMLBA
if (addr & ~PAGE_MASK)
#endif
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index 14750e7c5ee4..027107f4be53 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -476,7 +476,7 @@ static u32 prog_fd_array_sys_lookup_elem(void *ptr)
}
/* decrement refcnt of all bpf_progs that are stored in this map */
-void bpf_fd_array_map_clear(struct bpf_map *map)
+static void bpf_fd_array_map_clear(struct bpf_map *map)
{
struct bpf_array *array = container_of(map, struct bpf_array, map);
int i;
@@ -495,6 +495,7 @@ const struct bpf_map_ops prog_array_map_ops = {
.map_fd_get_ptr = prog_fd_array_get_ptr,
.map_fd_put_ptr = prog_fd_array_put_ptr,
.map_fd_sys_lookup_elem = prog_fd_array_sys_lookup_elem,
+ .map_release_uref = bpf_fd_array_map_clear,
};
static struct bpf_event_entry *bpf_event_entry_gen(struct file *perf_file,
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index ba03ec39efb3..6ef6746a7871 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -218,47 +218,84 @@ int bpf_prog_calc_tag(struct bpf_prog *fp)
return 0;
}
-static void bpf_adj_branches(struct bpf_prog *prog, u32 pos, u32 delta)
+static int bpf_adj_delta_to_imm(struct bpf_insn *insn, u32 pos, u32 delta,
+ u32 curr, const bool probe_pass)
{
+ const s64 imm_min = S32_MIN, imm_max = S32_MAX;
+ s64 imm = insn->imm;
+
+ if (curr < pos && curr + imm + 1 > pos)
+ imm += delta;
+ else if (curr > pos + delta && curr + imm + 1 <= pos + delta)
+ imm -= delta;
+ if (imm < imm_min || imm > imm_max)
+ return -ERANGE;
+ if (!probe_pass)
+ insn->imm = imm;
+ return 0;
+}
+
+static int bpf_adj_delta_to_off(struct bpf_insn *insn, u32 pos, u32 delta,
+ u32 curr, const bool probe_pass)
+{
+ const s32 off_min = S16_MIN, off_max = S16_MAX;
+ s32 off = insn->off;
+
+ if (curr < pos && curr + off + 1 > pos)
+ off += delta;
+ else if (curr > pos + delta && curr + off + 1 <= pos + delta)
+ off -= delta;
+ if (off < off_min || off > off_max)
+ return -ERANGE;
+ if (!probe_pass)
+ insn->off = off;
+ return 0;
+}
+
+static int bpf_adj_branches(struct bpf_prog *prog, u32 pos, u32 delta,
+ const bool probe_pass)
+{
+ u32 i, insn_cnt = prog->len + (probe_pass ? delta : 0);
struct bpf_insn *insn = prog->insnsi;
- u32 i, insn_cnt = prog->len;
- bool pseudo_call;
- u8 code;
- int off;
+ int ret = 0;
for (i = 0; i < insn_cnt; i++, insn++) {
+ u8 code;
+
+ /* In the probing pass we still operate on the original,
+ * unpatched image in order to check overflows before we
+ * do any other adjustments. Therefore skip the patchlet.
+ */
+ if (probe_pass && i == pos) {
+ i += delta + 1;
+ insn++;
+ }
code = insn->code;
- if (BPF_CLASS(code) != BPF_JMP)
- continue;
- if (BPF_OP(code) == BPF_EXIT)
+ if (BPF_CLASS(code) != BPF_JMP ||
+ BPF_OP(code) == BPF_EXIT)
continue;
+ /* Adjust offset of jmps if we cross patch boundaries. */
if (BPF_OP(code) == BPF_CALL) {
- if (insn->src_reg == BPF_PSEUDO_CALL)
- pseudo_call = true;
- else
+ if (insn->src_reg != BPF_PSEUDO_CALL)
continue;
+ ret = bpf_adj_delta_to_imm(insn, pos, delta, i,
+ probe_pass);
} else {
- pseudo_call = false;
+ ret = bpf_adj_delta_to_off(insn, pos, delta, i,
+ probe_pass);
}
- off = pseudo_call ? insn->imm : insn->off;
-
- /* Adjust offset of jmps if we cross boundaries. */
- if (i < pos && i + off + 1 > pos)
- off += delta;
- else if (i > pos + delta && i + off + 1 <= pos + delta)
- off -= delta;
-
- if (pseudo_call)
- insn->imm = off;
- else
- insn->off = off;
+ if (ret)
+ break;
}
+
+ return ret;
}
struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off,
const struct bpf_insn *patch, u32 len)
{
u32 insn_adj_cnt, insn_rest, insn_delta = len - 1;
+ const u32 cnt_max = S16_MAX;
struct bpf_prog *prog_adj;
/* Since our patchlet doesn't expand the image, we're done. */
@@ -269,6 +306,15 @@ struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off,
insn_adj_cnt = prog->len + insn_delta;
+ /* Reject anything that would potentially let the insn->off
+ * target overflow when we have excessive program expansions.
+ * We need to probe here before we do any reallocation where
+ * we afterwards may not fail anymore.
+ */
+ if (insn_adj_cnt > cnt_max &&
+ bpf_adj_branches(prog, off, insn_delta, true))
+ return NULL;
+
/* Several new instructions need to be inserted. Make room
* for them. Likely, there's no need for a new allocation as
* last page could have large enough tailroom.
@@ -294,7 +340,11 @@ struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off,
sizeof(*patch) * insn_rest);
memcpy(prog_adj->insnsi + off, patch, sizeof(*patch) * len);
- bpf_adj_branches(prog_adj, off, insn_delta);
+ /* We are guaranteed to not fail at this point, otherwise
+ * the ship has sailed to reverse to the original state. An
+ * overflow cannot happen at this point.
+ */
+ BUG_ON(bpf_adj_branches(prog_adj, off, insn_delta, false));
return prog_adj;
}
diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c
index a3b21385e947..95a84b2f10ce 100644
--- a/kernel/bpf/sockmap.c
+++ b/kernel/bpf/sockmap.c
@@ -43,6 +43,7 @@
#include <net/tcp.h>
#include <linux/ptr_ring.h>
#include <net/inet_common.h>
+#include <linux/sched/signal.h>
#define SOCK_CREATE_FLAG_MASK \
(BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY)
@@ -325,6 +326,9 @@ retry:
if (ret > 0) {
if (apply)
apply_bytes -= ret;
+
+ sg->offset += ret;
+ sg->length -= ret;
size -= ret;
offset += ret;
if (uncharge)
@@ -332,8 +336,6 @@ retry:
goto retry;
}
- sg->length = size;
- sg->offset = offset;
return ret;
}
@@ -391,7 +393,8 @@ static void return_mem_sg(struct sock *sk, int bytes, struct sk_msg_buff *md)
} while (i != md->sg_end);
}
-static void free_bytes_sg(struct sock *sk, int bytes, struct sk_msg_buff *md)
+static void free_bytes_sg(struct sock *sk, int bytes,
+ struct sk_msg_buff *md, bool charge)
{
struct scatterlist *sg = md->sg_data;
int i = md->sg_start, free;
@@ -401,11 +404,13 @@ static void free_bytes_sg(struct sock *sk, int bytes, struct sk_msg_buff *md)
if (bytes < free) {
sg[i].length -= bytes;
sg[i].offset += bytes;
- sk_mem_uncharge(sk, bytes);
+ if (charge)
+ sk_mem_uncharge(sk, bytes);
break;
}
- sk_mem_uncharge(sk, sg[i].length);
+ if (charge)
+ sk_mem_uncharge(sk, sg[i].length);
put_page(sg_page(&sg[i]));
bytes -= sg[i].length;
sg[i].length = 0;
@@ -416,6 +421,7 @@ static void free_bytes_sg(struct sock *sk, int bytes, struct sk_msg_buff *md)
if (i == MAX_SKB_FRAGS)
i = 0;
}
+ md->sg_start = i;
}
static int free_sg(struct sock *sk, int start, struct sk_msg_buff *md)
@@ -523,8 +529,6 @@ static int bpf_tcp_ingress(struct sock *sk, int apply_bytes,
i = md->sg_start;
do {
- r->sg_data[i] = md->sg_data[i];
-
size = (apply && apply_bytes < md->sg_data[i].length) ?
apply_bytes : md->sg_data[i].length;
@@ -535,6 +539,7 @@ static int bpf_tcp_ingress(struct sock *sk, int apply_bytes,
}
sk_mem_charge(sk, size);
+ r->sg_data[i] = md->sg_data[i];
r->sg_data[i].length = size;
md->sg_data[i].length -= size;
md->sg_data[i].offset += size;
@@ -575,10 +580,10 @@ static int bpf_tcp_sendmsg_do_redirect(struct sock *sk, int send,
struct sk_msg_buff *md,
int flags)
{
+ bool ingress = !!(md->flags & BPF_F_INGRESS);
struct smap_psock *psock;
struct scatterlist *sg;
- int i, err, free = 0;
- bool ingress = !!(md->flags & BPF_F_INGRESS);
+ int err = 0;
sg = md->sg_data;
@@ -606,16 +611,8 @@ static int bpf_tcp_sendmsg_do_redirect(struct sock *sk, int send,
out_rcu:
rcu_read_unlock();
out:
- i = md->sg_start;
- while (sg[i].length) {
- free += sg[i].length;
- put_page(sg_page(&sg[i]));
- sg[i].length = 0;
- i++;
- if (i == MAX_SKB_FRAGS)
- i = 0;
- }
- return free;
+ free_bytes_sg(NULL, send, md, false);
+ return err;
}
static inline void bpf_md_init(struct smap_psock *psock)
@@ -700,19 +697,26 @@ more_data:
err = bpf_tcp_sendmsg_do_redirect(redir, send, m, flags);
lock_sock(sk);
+ if (unlikely(err < 0)) {
+ free_start_sg(sk, m);
+ psock->sg_size = 0;
+ if (!cork)
+ *copied -= send;
+ } else {
+ psock->sg_size -= send;
+ }
+
if (cork) {
free_start_sg(sk, m);
+ psock->sg_size = 0;
kfree(m);
m = NULL;
+ err = 0;
}
- if (unlikely(err))
- *copied -= err;
- else
- psock->sg_size -= send;
break;
case __SK_DROP:
default:
- free_bytes_sg(sk, send, m);
+ free_bytes_sg(sk, send, m, true);
apply_bytes_dec(psock, send);
*copied -= send;
psock->sg_size -= send;
@@ -732,6 +736,26 @@ out_err:
return err;
}
+static int bpf_wait_data(struct sock *sk,
+ struct smap_psock *psk, int flags,
+ long timeo, int *err)
+{
+ int rc;
+
+ DEFINE_WAIT_FUNC(wait, woken_wake_function);
+
+ add_wait_queue(sk_sleep(sk), &wait);
+ sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
+ rc = sk_wait_event(sk, &timeo,
+ !list_empty(&psk->ingress) ||
+ !skb_queue_empty(&sk->sk_receive_queue),
+ &wait);
+ sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
+ remove_wait_queue(sk_sleep(sk), &wait);
+
+ return rc;
+}
+
static int bpf_tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
int nonblock, int flags, int *addr_len)
{
@@ -755,6 +779,7 @@ static int bpf_tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len);
lock_sock(sk);
+bytes_ready:
while (copied != len) {
struct scatterlist *sg;
struct sk_msg_buff *md;
@@ -809,6 +834,28 @@ static int bpf_tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
}
}
+ if (!copied) {
+ long timeo;
+ int data;
+ int err = 0;
+
+ timeo = sock_rcvtimeo(sk, nonblock);
+ data = bpf_wait_data(sk, psock, flags, timeo, &err);
+
+ if (data) {
+ if (!skb_queue_empty(&sk->sk_receive_queue)) {
+ release_sock(sk);
+ smap_release_sock(psock, sk);
+ copied = tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len);
+ return copied;
+ }
+ goto bytes_ready;
+ }
+
+ if (err)
+ copied = err;
+ }
+
release_sock(sk);
smap_release_sock(psock, sk);
return copied;
@@ -1656,11 +1703,11 @@ static int sock_map_ctx_update_elem(struct bpf_sock_ops_kern *skops,
* we increment the refcnt. If this is the case abort with an
* error.
*/
- verdict = bpf_prog_inc_not_zero(stab->bpf_verdict);
+ verdict = bpf_prog_inc_not_zero(verdict);
if (IS_ERR(verdict))
return PTR_ERR(verdict);
- parse = bpf_prog_inc_not_zero(stab->bpf_parse);
+ parse = bpf_prog_inc_not_zero(parse);
if (IS_ERR(parse)) {
bpf_prog_put(verdict);
return PTR_ERR(parse);
@@ -1668,12 +1715,12 @@ static int sock_map_ctx_update_elem(struct bpf_sock_ops_kern *skops,
}
if (tx_msg) {
- tx_msg = bpf_prog_inc_not_zero(stab->bpf_tx_msg);
+ tx_msg = bpf_prog_inc_not_zero(tx_msg);
if (IS_ERR(tx_msg)) {
- if (verdict)
- bpf_prog_put(verdict);
- if (parse)
+ if (parse && verdict) {
bpf_prog_put(parse);
+ bpf_prog_put(verdict);
+ }
return PTR_ERR(tx_msg);
}
}
@@ -1758,10 +1805,10 @@ static int sock_map_ctx_update_elem(struct bpf_sock_ops_kern *skops,
out_free:
smap_release_sock(psock, sock);
out_progs:
- if (verdict)
- bpf_prog_put(verdict);
- if (parse)
+ if (parse && verdict) {
bpf_prog_put(parse);
+ bpf_prog_put(verdict);
+ }
if (tx_msg)
bpf_prog_put(tx_msg);
write_unlock_bh(&sock->sk_callback_lock);
@@ -1831,7 +1878,7 @@ static int sock_map_update_elem(struct bpf_map *map,
return err;
}
-static void sock_map_release(struct bpf_map *map, struct file *map_file)
+static void sock_map_release(struct bpf_map *map)
{
struct bpf_stab *stab = container_of(map, struct bpf_stab, map);
struct bpf_prog *orig;
@@ -1855,7 +1902,7 @@ const struct bpf_map_ops sock_map_ops = {
.map_get_next_key = sock_map_get_next_key,
.map_update_elem = sock_map_update_elem,
.map_delete_elem = sock_map_delete_elem,
- .map_release = sock_map_release,
+ .map_release_uref = sock_map_release,
};
BPF_CALL_4(bpf_sock_map_update, struct bpf_sock_ops_kern *, bpf_sock,
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 4ca46df19c9a..016ef9025827 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -26,6 +26,7 @@
#include <linux/cred.h>
#include <linux/timekeeping.h>
#include <linux/ctype.h>
+#include <linux/nospec.h>
#define IS_FD_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PROG_ARRAY || \
(map)->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || \
@@ -102,12 +103,14 @@ const struct bpf_map_ops bpf_map_offload_ops = {
static struct bpf_map *find_and_alloc_map(union bpf_attr *attr)
{
const struct bpf_map_ops *ops;
+ u32 type = attr->map_type;
struct bpf_map *map;
int err;
- if (attr->map_type >= ARRAY_SIZE(bpf_map_types))
+ if (type >= ARRAY_SIZE(bpf_map_types))
return ERR_PTR(-EINVAL);
- ops = bpf_map_types[attr->map_type];
+ type = array_index_nospec(type, ARRAY_SIZE(bpf_map_types));
+ ops = bpf_map_types[type];
if (!ops)
return ERR_PTR(-EINVAL);
@@ -122,7 +125,7 @@ static struct bpf_map *find_and_alloc_map(union bpf_attr *attr)
if (IS_ERR(map))
return map;
map->ops = ops;
- map->map_type = attr->map_type;
+ map->map_type = type;
return map;
}
@@ -257,8 +260,8 @@ static void bpf_map_free_deferred(struct work_struct *work)
static void bpf_map_put_uref(struct bpf_map *map)
{
if (atomic_dec_and_test(&map->usercnt)) {
- if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY)
- bpf_fd_array_map_clear(map);
+ if (map->ops->map_release_uref)
+ map->ops->map_release_uref(map);
}
}
@@ -871,11 +874,17 @@ static const struct bpf_prog_ops * const bpf_prog_types[] = {
static int find_prog_type(enum bpf_prog_type type, struct bpf_prog *prog)
{
- if (type >= ARRAY_SIZE(bpf_prog_types) || !bpf_prog_types[type])
+ const struct bpf_prog_ops *ops;
+
+ if (type >= ARRAY_SIZE(bpf_prog_types))
+ return -EINVAL;
+ type = array_index_nospec(type, ARRAY_SIZE(bpf_prog_types));
+ ops = bpf_prog_types[type];
+ if (!ops)
return -EINVAL;
if (!bpf_prog_is_dev_bound(prog->aux))
- prog->aux->ops = bpf_prog_types[type];
+ prog->aux->ops = ops;
else
prog->aux->ops = &bpf_offload_prog_ops;
prog->type = type;
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 5dd1dcb902bf..1904e814f282 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -156,7 +156,29 @@ struct bpf_verifier_stack_elem {
#define BPF_COMPLEXITY_LIMIT_INSNS 131072
#define BPF_COMPLEXITY_LIMIT_STACK 1024
-#define BPF_MAP_PTR_POISON ((void *)0xeB9F + POISON_POINTER_DELTA)
+#define BPF_MAP_PTR_UNPRIV 1UL
+#define BPF_MAP_PTR_POISON ((void *)((0xeB9FUL << 1) + \
+ POISON_POINTER_DELTA))
+#define BPF_MAP_PTR(X) ((struct bpf_map *)((X) & ~BPF_MAP_PTR_UNPRIV))
+
+static bool bpf_map_ptr_poisoned(const struct bpf_insn_aux_data *aux)
+{
+ return BPF_MAP_PTR(aux->map_state) == BPF_MAP_PTR_POISON;
+}
+
+static bool bpf_map_ptr_unpriv(const struct bpf_insn_aux_data *aux)
+{
+ return aux->map_state & BPF_MAP_PTR_UNPRIV;
+}
+
+static void bpf_map_ptr_store(struct bpf_insn_aux_data *aux,
+ const struct bpf_map *map, bool unpriv)
+{
+ BUILD_BUG_ON((unsigned long)BPF_MAP_PTR_POISON & BPF_MAP_PTR_UNPRIV);
+ unpriv |= bpf_map_ptr_unpriv(aux);
+ aux->map_state = (unsigned long)map |
+ (unpriv ? BPF_MAP_PTR_UNPRIV : 0UL);
+}
struct bpf_call_arg_meta {
struct bpf_map *map_ptr;
@@ -978,7 +1000,7 @@ static bool register_is_null(struct bpf_reg_state *reg)
*/
static int check_stack_write(struct bpf_verifier_env *env,
struct bpf_func_state *state, /* func where register points to */
- int off, int size, int value_regno)
+ int off, int size, int value_regno, int insn_idx)
{
struct bpf_func_state *cur; /* state of the current function */
int i, slot = -off - 1, spi = slot / BPF_REG_SIZE, err;
@@ -1017,8 +1039,33 @@ static int check_stack_write(struct bpf_verifier_env *env,
state->stack[spi].spilled_ptr = cur->regs[value_regno];
state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
- for (i = 0; i < BPF_REG_SIZE; i++)
+ for (i = 0; i < BPF_REG_SIZE; i++) {
+ if (state->stack[spi].slot_type[i] == STACK_MISC &&
+ !env->allow_ptr_leaks) {
+ int *poff = &env->insn_aux_data[insn_idx].sanitize_stack_off;
+ int soff = (-spi - 1) * BPF_REG_SIZE;
+
+ /* detected reuse of integer stack slot with a pointer
+ * which means either llvm is reusing stack slot or
+ * an attacker is trying to exploit CVE-2018-3639
+ * (speculative store bypass)
+ * Have to sanitize that slot with preemptive
+ * store of zero.
+ */
+ if (*poff && *poff != soff) {
+ /* disallow programs where single insn stores
+ * into two different stack slots, since verifier
+ * cannot sanitize them
+ */
+ verbose(env,
+ "insn %d cannot access two stack slots fp%d and fp%d",
+ insn_idx, *poff, soff);
+ return -EINVAL;
+ }
+ *poff = soff;
+ }
state->stack[spi].slot_type[i] = STACK_SPILL;
+ }
} else {
u8 type = STACK_MISC;
@@ -1694,7 +1741,7 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
if (t == BPF_WRITE)
err = check_stack_write(env, state, off, size,
- value_regno);
+ value_regno, insn_idx);
else
err = check_stack_read(env, state, off, size,
value_regno);
@@ -2333,6 +2380,29 @@ static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
return 0;
}
+static int
+record_func_map(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
+ int func_id, int insn_idx)
+{
+ struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx];
+
+ if (func_id != BPF_FUNC_tail_call &&
+ func_id != BPF_FUNC_map_lookup_elem)
+ return 0;
+ if (meta->map_ptr == NULL) {
+ verbose(env, "kernel subsystem misconfigured verifier\n");
+ return -EINVAL;
+ }
+
+ if (!BPF_MAP_PTR(aux->map_state))
+ bpf_map_ptr_store(aux, meta->map_ptr,
+ meta->map_ptr->unpriv_array);
+ else if (BPF_MAP_PTR(aux->map_state) != meta->map_ptr)
+ bpf_map_ptr_store(aux, BPF_MAP_PTR_POISON,
+ meta->map_ptr->unpriv_array);
+ return 0;
+}
+
static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn_idx)
{
const struct bpf_func_proto *fn = NULL;
@@ -2387,13 +2457,6 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
err = check_func_arg(env, BPF_REG_2, fn->arg2_type, &meta);
if (err)
return err;
- if (func_id == BPF_FUNC_tail_call) {
- if (meta.map_ptr == NULL) {
- verbose(env, "verifier bug\n");
- return -EINVAL;
- }
- env->insn_aux_data[insn_idx].map_ptr = meta.map_ptr;
- }
err = check_func_arg(env, BPF_REG_3, fn->arg3_type, &meta);
if (err)
return err;
@@ -2404,6 +2467,10 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
if (err)
return err;
+ err = record_func_map(env, &meta, func_id, insn_idx);
+ if (err)
+ return err;
+
/* Mark slots with STACK_MISC in case of raw mode, stack offset
* is inferred from register state.
*/
@@ -2428,8 +2495,6 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
} else if (fn->ret_type == RET_VOID) {
regs[BPF_REG_0].type = NOT_INIT;
} else if (fn->ret_type == RET_PTR_TO_MAP_VALUE_OR_NULL) {
- struct bpf_insn_aux_data *insn_aux;
-
regs[BPF_REG_0].type = PTR_TO_MAP_VALUE_OR_NULL;
/* There is no offset yet applied, variable or fixed */
mark_reg_known_zero(env, regs, BPF_REG_0);
@@ -2445,11 +2510,6 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
}
regs[BPF_REG_0].map_ptr = meta.map_ptr;
regs[BPF_REG_0].id = ++env->id_gen;
- insn_aux = &env->insn_aux_data[insn_idx];
- if (!insn_aux->map_ptr)
- insn_aux->map_ptr = meta.map_ptr;
- else if (insn_aux->map_ptr != meta.map_ptr)
- insn_aux->map_ptr = BPF_MAP_PTR_POISON;
} else {
verbose(env, "unknown return type %d of func %s#%d\n",
fn->ret_type, func_id_name(func_id), func_id);
@@ -5169,6 +5229,34 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
else
continue;
+ if (type == BPF_WRITE &&
+ env->insn_aux_data[i + delta].sanitize_stack_off) {
+ struct bpf_insn patch[] = {
+ /* Sanitize suspicious stack slot with zero.
+ * There are no memory dependencies for this store,
+ * since it's only using frame pointer and immediate
+ * constant of zero
+ */
+ BPF_ST_MEM(BPF_DW, BPF_REG_FP,
+ env->insn_aux_data[i + delta].sanitize_stack_off,
+ 0),
+ /* the original STX instruction will immediately
+ * overwrite the same stack slot with appropriate value
+ */
+ *insn,
+ };
+
+ cnt = ARRAY_SIZE(patch);
+ new_prog = bpf_patch_insn_data(env, i + delta, patch, cnt);
+ if (!new_prog)
+ return -ENOMEM;
+
+ delta += cnt - 1;
+ env->prog = new_prog;
+ insn = new_prog->insnsi + i + delta;
+ continue;
+ }
+
if (env->insn_aux_data[i + delta].ptr_type != PTR_TO_CTX)
continue;
@@ -5417,6 +5505,7 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
struct bpf_insn *insn = prog->insnsi;
const struct bpf_func_proto *fn;
const int insn_cnt = prog->len;
+ struct bpf_insn_aux_data *aux;
struct bpf_insn insn_buf[16];
struct bpf_prog *new_prog;
struct bpf_map *map_ptr;
@@ -5491,19 +5580,22 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
insn->imm = 0;
insn->code = BPF_JMP | BPF_TAIL_CALL;
+ aux = &env->insn_aux_data[i + delta];
+ if (!bpf_map_ptr_unpriv(aux))
+ continue;
+
/* instead of changing every JIT dealing with tail_call
* emit two extra insns:
* if (index >= max_entries) goto out;
* index &= array->index_mask;
* to avoid out-of-bounds cpu speculation
*/
- map_ptr = env->insn_aux_data[i + delta].map_ptr;
- if (map_ptr == BPF_MAP_PTR_POISON) {
+ if (bpf_map_ptr_poisoned(aux)) {
verbose(env, "tail_call abusing map_ptr\n");
return -EINVAL;
}
- if (!map_ptr->unpriv_array)
- continue;
+
+ map_ptr = BPF_MAP_PTR(aux->map_state);
insn_buf[0] = BPF_JMP_IMM(BPF_JGE, BPF_REG_3,
map_ptr->max_entries, 2);
insn_buf[1] = BPF_ALU32_IMM(BPF_AND, BPF_REG_3,
@@ -5527,9 +5619,12 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
*/
if (prog->jit_requested && BITS_PER_LONG == 64 &&
insn->imm == BPF_FUNC_map_lookup_elem) {
- map_ptr = env->insn_aux_data[i + delta].map_ptr;
- if (map_ptr == BPF_MAP_PTR_POISON ||
- !map_ptr->ops->map_gen_lookup)
+ aux = &env->insn_aux_data[i + delta];
+ if (bpf_map_ptr_poisoned(aux))
+ goto patch_call_imm;
+
+ map_ptr = BPF_MAP_PTR(aux->map_state);
+ if (!map_ptr->ops->map_gen_lookup)
goto patch_call_imm;
cnt = map_ptr->ops->map_gen_lookup(map_ptr, insn_buf);
diff --git a/kernel/cgroup/cgroup-internal.h b/kernel/cgroup/cgroup-internal.h
index b928b27050c6..0808a33d16d3 100644
--- a/kernel/cgroup/cgroup-internal.h
+++ b/kernel/cgroup/cgroup-internal.h
@@ -218,9 +218,9 @@ extern const struct proc_ns_operations cgroupns_operations;
* cgroup-v1.c
*/
extern struct cftype cgroup1_base_files[];
-extern const struct file_operations proc_cgroupstats_operations;
extern struct kernfs_syscall_ops cgroup1_kf_syscall_ops;
+int proc_cgroupstats_show(struct seq_file *m, void *v);
bool cgroup1_ssid_disabled(int ssid);
void cgroup1_pidlist_destroy_all(struct cgroup *cgrp);
void cgroup1_release_agent(struct work_struct *work);
diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c
index a2c05d2476ac..e06c97f3ed1a 100644
--- a/kernel/cgroup/cgroup-v1.c
+++ b/kernel/cgroup/cgroup-v1.c
@@ -682,7 +682,7 @@ struct cftype cgroup1_base_files[] = {
};
/* Display information about each subsystem and each hierarchy */
-static int proc_cgroupstats_show(struct seq_file *m, void *v)
+int proc_cgroupstats_show(struct seq_file *m, void *v)
{
struct cgroup_subsys *ss;
int i;
@@ -705,18 +705,6 @@ static int proc_cgroupstats_show(struct seq_file *m, void *v)
return 0;
}
-static int cgroupstats_open(struct inode *inode, struct file *file)
-{
- return single_open(file, proc_cgroupstats_show, NULL);
-}
-
-const struct file_operations proc_cgroupstats_operations = {
- .open = cgroupstats_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
/**
* cgroupstats_build - build and fill cgroupstats
* @stats: cgroupstats to fill information into
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index a662bfcbea0e..12883656e63e 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -5335,7 +5335,7 @@ int __init cgroup_init(void)
WARN_ON(sysfs_create_mount_point(fs_kobj, "cgroup"));
WARN_ON(register_filesystem(&cgroup_fs_type));
WARN_ON(register_filesystem(&cgroup2_fs_type));
- WARN_ON(!proc_create("cgroups", 0, NULL, &proc_cgroupstats_operations));
+ WARN_ON(!proc_create_single("cgroups", 0, NULL, proc_cgroupstats_show));
return 0;
}
diff --git a/kernel/compat.c b/kernel/compat.c
index 51a081b46832..702aa846ddac 100644
--- a/kernel/compat.c
+++ b/kernel/compat.c
@@ -34,6 +34,7 @@ int compat_get_timex(struct timex *txc, const struct compat_timex __user *utp)
{
struct compat_timex tx32;
+ memset(txc, 0, sizeof(struct timex));
if (copy_from_user(&tx32, utp, sizeof(struct compat_timex)))
return -EFAULT;
diff --git a/kernel/delayacct.c b/kernel/delayacct.c
index e2764d767f18..ca8ac2824f0b 100644
--- a/kernel/delayacct.c
+++ b/kernel/delayacct.c
@@ -44,23 +44,24 @@ void __delayacct_tsk_init(struct task_struct *tsk)
{
tsk->delays = kmem_cache_zalloc(delayacct_cache, GFP_KERNEL);
if (tsk->delays)
- spin_lock_init(&tsk->delays->lock);
+ raw_spin_lock_init(&tsk->delays->lock);
}
/*
* Finish delay accounting for a statistic using its timestamps (@start),
* accumalator (@total) and @count
*/
-static void delayacct_end(spinlock_t *lock, u64 *start, u64 *total, u32 *count)
+static void delayacct_end(raw_spinlock_t *lock, u64 *start, u64 *total,
+ u32 *count)
{
s64 ns = ktime_get_ns() - *start;
unsigned long flags;
if (ns > 0) {
- spin_lock_irqsave(lock, flags);
+ raw_spin_lock_irqsave(lock, flags);
*total += ns;
(*count)++;
- spin_unlock_irqrestore(lock, flags);
+ raw_spin_unlock_irqrestore(lock, flags);
}
}
@@ -127,7 +128,7 @@ int __delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk)
/* zero XXX_total, non-zero XXX_count implies XXX stat overflowed */
- spin_lock_irqsave(&tsk->delays->lock, flags);
+ raw_spin_lock_irqsave(&tsk->delays->lock, flags);
tmp = d->blkio_delay_total + tsk->delays->blkio_delay;
d->blkio_delay_total = (tmp < d->blkio_delay_total) ? 0 : tmp;
tmp = d->swapin_delay_total + tsk->delays->swapin_delay;
@@ -137,7 +138,7 @@ int __delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk)
d->blkio_count += tsk->delays->blkio_count;
d->swapin_count += tsk->delays->swapin_count;
d->freepages_count += tsk->delays->freepages_count;
- spin_unlock_irqrestore(&tsk->delays->lock, flags);
+ raw_spin_unlock_irqrestore(&tsk->delays->lock, flags);
return 0;
}
@@ -147,10 +148,10 @@ __u64 __delayacct_blkio_ticks(struct task_struct *tsk)
__u64 ret;
unsigned long flags;
- spin_lock_irqsave(&tsk->delays->lock, flags);
+ raw_spin_lock_irqsave(&tsk->delays->lock, flags);
ret = nsec_to_clock_t(tsk->delays->blkio_delay +
tsk->delays->swapin_delay);
- spin_unlock_irqrestore(&tsk->delays->lock, flags);
+ raw_spin_unlock_irqrestore(&tsk->delays->lock, flags);
return ret;
}
diff --git a/kernel/dma.c b/kernel/dma.c
index 3506fc34a712..40f152936316 100644
--- a/kernel/dma.c
+++ b/kernel/dma.c
@@ -135,21 +135,9 @@ static int proc_dma_show(struct seq_file *m, void *v)
}
#endif /* MAX_DMA_CHANNELS */
-static int proc_dma_open(struct inode *inode, struct file *file)
-{
- return single_open(file, proc_dma_show, NULL);
-}
-
-static const struct file_operations proc_dma_operations = {
- .open = proc_dma_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
static int __init proc_dma_init(void)
{
- proc_create("dma", 0, NULL, &proc_dma_operations);
+ proc_create_single("dma", 0, NULL, proc_dma_show);
return 0;
}
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 67612ce359ad..08f5e1b42b43 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -5120,6 +5120,8 @@ static long perf_compat_ioctl(struct file *file, unsigned int cmd,
switch (_IOC_NR(cmd)) {
case _IOC_NR(PERF_EVENT_IOC_SET_FILTER):
case _IOC_NR(PERF_EVENT_IOC_ID):
+ case _IOC_NR(PERF_EVENT_IOC_QUERY_BPF):
+ case _IOC_NR(PERF_EVENT_IOC_MODIFY_ATTRIBUTES):
/* Fix up pointer size (usually 4 -> 8 in 32-on-64-bit case */
if (_IOC_SIZE(cmd) == sizeof(compat_uptr_t)) {
cmd &= ~IOCSIZE_MASK;
@@ -6668,7 +6670,7 @@ static void perf_event_addr_filters_exec(struct perf_event *event, void *data)
raw_spin_lock_irqsave(&ifh->lock, flags);
list_for_each_entry(filter, &ifh->list, entry) {
- if (filter->inode) {
+ if (filter->path.dentry) {
event->addr_filters_offs[count] = 0;
restart++;
}
@@ -7333,7 +7335,7 @@ static bool perf_addr_filter_match(struct perf_addr_filter *filter,
struct file *file, unsigned long offset,
unsigned long size)
{
- if (filter->inode != file_inode(file))
+ if (d_inode(filter->path.dentry) != file_inode(file))
return false;
if (filter->offset > offset + size)
@@ -8686,8 +8688,7 @@ static void free_filters_list(struct list_head *filters)
struct perf_addr_filter *filter, *iter;
list_for_each_entry_safe(filter, iter, filters, entry) {
- if (filter->inode)
- iput(filter->inode);
+ path_put(&filter->path);
list_del(&filter->entry);
kfree(filter);
}
@@ -8784,7 +8785,7 @@ static void perf_event_addr_filters_apply(struct perf_event *event)
* Adjust base offset if the filter is associated to a binary
* that needs to be mapped:
*/
- if (filter->inode)
+ if (filter->path.dentry)
event->addr_filters_offs[count] =
perf_addr_filter_apply(filter, mm);
@@ -8858,7 +8859,6 @@ perf_event_parse_addr_filter(struct perf_event *event, char *fstr,
{
struct perf_addr_filter *filter = NULL;
char *start, *orig, *filename = NULL;
- struct path path;
substring_t args[MAX_OPT_ARGS];
int state = IF_STATE_ACTION, token;
unsigned int kernel = 0;
@@ -8971,19 +8971,18 @@ perf_event_parse_addr_filter(struct perf_event *event, char *fstr,
goto fail_free_name;
/* look up the path and grab its inode */
- ret = kern_path(filename, LOOKUP_FOLLOW, &path);
+ ret = kern_path(filename, LOOKUP_FOLLOW,
+ &filter->path);
if (ret)
goto fail_free_name;
- filter->inode = igrab(d_inode(path.dentry));
- path_put(&path);
kfree(filename);
filename = NULL;
ret = -EINVAL;
- if (!filter->inode ||
- !S_ISREG(filter->inode->i_mode))
- /* free_filters_list() will iput() */
+ if (!filter->path.dentry ||
+ !S_ISREG(d_inode(filter->path.dentry)
+ ->i_mode))
goto fail;
event->addr_filters.nr_file_filters++;
@@ -10521,19 +10520,20 @@ SYSCALL_DEFINE5(perf_event_open,
if (pmu->task_ctx_nr == perf_sw_context)
event->event_caps |= PERF_EV_CAP_SOFTWARE;
- if (group_leader &&
- (is_software_event(event) != is_software_event(group_leader))) {
- if (is_software_event(event)) {
+ if (group_leader) {
+ if (is_software_event(event) &&
+ !in_software_context(group_leader)) {
/*
- * If event and group_leader are not both a software
- * event, and event is, then group leader is not.
+ * If the event is a sw event, but the group_leader
+ * is on hw context.
*
- * Allow the addition of software events to !software
- * groups, this is safe because software events never
- * fail to schedule.
+ * Allow the addition of software events to hw
+ * groups, this is safe because software events
+ * never fail to schedule.
*/
- pmu = group_leader->pmu;
- } else if (is_software_event(group_leader) &&
+ pmu = group_leader->ctx->pmu;
+ } else if (!is_software_event(event) &&
+ is_software_event(group_leader) &&
(group_leader->group_caps & PERF_EV_CAP_SOFTWARE)) {
/*
* In case the group is a pure software group, and we
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
index 6c6b3c48db71..1d8ca9ea9979 100644
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -14,6 +14,7 @@
#include <linux/slab.h>
#include <linux/circ_buf.h>
#include <linux/poll.h>
+#include <linux/nospec.h>
#include "internal.h"
@@ -867,8 +868,10 @@ perf_mmap_to_page(struct ring_buffer *rb, unsigned long pgoff)
return NULL;
/* AUX space */
- if (pgoff >= rb->aux_pgoff)
- return virt_to_page(rb->aux_pages[pgoff - rb->aux_pgoff]);
+ if (pgoff >= rb->aux_pgoff) {
+ int aux_pgoff = array_index_nospec(pgoff - rb->aux_pgoff, rb->aux_nr_pages);
+ return virt_to_page(rb->aux_pages[aux_pgoff]);
+ }
}
return __perf_mmap_to_page(rb, pgoff);
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index ce6848e46e94..1725b902983f 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -491,7 +491,7 @@ static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset)
if (!uprobe)
return NULL;
- uprobe->inode = igrab(inode);
+ uprobe->inode = inode;
uprobe->offset = offset;
init_rwsem(&uprobe->register_rwsem);
init_rwsem(&uprobe->consumer_rwsem);
@@ -502,7 +502,6 @@ static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset)
if (cur_uprobe) {
kfree(uprobe);
uprobe = cur_uprobe;
- iput(inode);
}
return uprobe;
@@ -701,7 +700,6 @@ static void delete_uprobe(struct uprobe *uprobe)
rb_erase(&uprobe->rb_node, &uprobes_tree);
spin_unlock(&uprobes_treelock);
RB_CLEAR_NODE(&uprobe->rb_node); /* for uprobe_is_active() */
- iput(uprobe->inode);
put_uprobe(uprobe);
}
@@ -873,7 +871,8 @@ static void __uprobe_unregister(struct uprobe *uprobe, struct uprobe_consumer *u
* tuple). Creation refcount stops uprobe_unregister from freeing the
* @uprobe even before the register operation is complete. Creation
* refcount is released when the last @uc for the @uprobe
- * unregisters.
+ * unregisters. Caller of uprobe_register() is required to keep @inode
+ * (and the containing mount) referenced.
*
* Return errno if it cannot successully install probes
* else return 0 (success)
diff --git a/kernel/exec_domain.c b/kernel/exec_domain.c
index a5697119290e..33f07c5f2515 100644
--- a/kernel/exec_domain.c
+++ b/kernel/exec_domain.c
@@ -27,21 +27,9 @@ static int execdomains_proc_show(struct seq_file *m, void *v)
return 0;
}
-static int execdomains_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, execdomains_proc_show, NULL);
-}
-
-static const struct file_operations execdomains_proc_fops = {
- .open = execdomains_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
static int __init proc_execdomains_init(void)
{
- proc_create("execdomains", 0, NULL, &execdomains_proc_fops);
+ proc_create_single("execdomains", 0, NULL, execdomains_proc_show);
return 0;
}
module_init(proc_execdomains_init);
diff --git a/kernel/irq/irq_sim.c b/kernel/irq/irq_sim.c
index fc4f361a86bb..dd20d0d528d4 100644
--- a/kernel/irq/irq_sim.c
+++ b/kernel/irq/irq_sim.c
@@ -1,11 +1,6 @@
// SPDX-License-Identifier: GPL-2.0+
/*
- * Copyright (C) 2017 Bartosz Golaszewski <brgl@bgdev.pl>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
+ * Copyright (C) 2017-2018 Bartosz Golaszewski <brgl@bgdev.pl>
*/
#include <linux/slab.h>
diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c
index 2a8571f72b17..4ca2fd46645d 100644
--- a/kernel/irq/msi.c
+++ b/kernel/irq/msi.c
@@ -76,6 +76,19 @@ static inline void irq_chip_write_msi_msg(struct irq_data *data,
data->chip->irq_write_msi_msg(data, msg);
}
+static void msi_check_level(struct irq_domain *domain, struct msi_msg *msg)
+{
+ struct msi_domain_info *info = domain->host_data;
+
+ /*
+ * If the MSI provider has messed with the second message and
+ * not advertized that it is level-capable, signal the breakage.
+ */
+ WARN_ON(!((info->flags & MSI_FLAG_LEVEL_CAPABLE) &&
+ (info->chip->flags & IRQCHIP_SUPPORTS_LEVEL_MSI)) &&
+ (msg[1].address_lo || msg[1].address_hi || msg[1].data));
+}
+
/**
* msi_domain_set_affinity - Generic affinity setter function for MSI domains
* @irq_data: The irq data associated to the interrupt
@@ -89,13 +102,14 @@ int msi_domain_set_affinity(struct irq_data *irq_data,
const struct cpumask *mask, bool force)
{
struct irq_data *parent = irq_data->parent_data;
- struct msi_msg msg;
+ struct msi_msg msg[2] = { [1] = { }, };
int ret;
ret = parent->chip->irq_set_affinity(parent, mask, force);
if (ret >= 0 && ret != IRQ_SET_MASK_OK_DONE) {
- BUG_ON(irq_chip_compose_msi_msg(irq_data, &msg));
- irq_chip_write_msi_msg(irq_data, &msg);
+ BUG_ON(irq_chip_compose_msi_msg(irq_data, msg));
+ msi_check_level(irq_data->domain, msg);
+ irq_chip_write_msi_msg(irq_data, msg);
}
return ret;
@@ -104,20 +118,21 @@ int msi_domain_set_affinity(struct irq_data *irq_data,
static int msi_domain_activate(struct irq_domain *domain,
struct irq_data *irq_data, bool early)
{
- struct msi_msg msg;
+ struct msi_msg msg[2] = { [1] = { }, };
- BUG_ON(irq_chip_compose_msi_msg(irq_data, &msg));
- irq_chip_write_msi_msg(irq_data, &msg);
+ BUG_ON(irq_chip_compose_msi_msg(irq_data, msg));
+ msi_check_level(irq_data->domain, msg);
+ irq_chip_write_msi_msg(irq_data, msg);
return 0;
}
static void msi_domain_deactivate(struct irq_domain *domain,
struct irq_data *irq_data)
{
- struct msi_msg msg;
+ struct msi_msg msg[2];
- memset(&msg, 0, sizeof(msg));
- irq_chip_write_msi_msg(irq_data, &msg);
+ memset(msg, 0, sizeof(msg));
+ irq_chip_write_msi_msg(irq_data, msg);
}
static int msi_domain_alloc(struct irq_domain *domain, unsigned int virq,
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
index 7cb091d81d91..37eda10f5c36 100644
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -185,11 +185,6 @@ static int irq_affinity_list_proc_open(struct inode *inode, struct file *file)
return single_open(file, irq_affinity_list_proc_show, PDE_DATA(inode));
}
-static int irq_affinity_hint_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, irq_affinity_hint_proc_show, PDE_DATA(inode));
-}
-
static const struct file_operations irq_affinity_proc_fops = {
.open = irq_affinity_proc_open,
.read = seq_read,
@@ -198,13 +193,6 @@ static const struct file_operations irq_affinity_proc_fops = {
.write = irq_affinity_proc_write,
};
-static const struct file_operations irq_affinity_hint_proc_fops = {
- .open = irq_affinity_hint_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
static const struct file_operations irq_affinity_list_proc_fops = {
.open = irq_affinity_list_proc_open,
.read = seq_read,
@@ -223,32 +211,6 @@ static int irq_effective_aff_list_proc_show(struct seq_file *m, void *v)
{
return show_irq_affinity(EFFECTIVE_LIST, m);
}
-
-static int irq_effective_aff_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, irq_effective_aff_proc_show, PDE_DATA(inode));
-}
-
-static int irq_effective_aff_list_proc_open(struct inode *inode,
- struct file *file)
-{
- return single_open(file, irq_effective_aff_list_proc_show,
- PDE_DATA(inode));
-}
-
-static const struct file_operations irq_effective_aff_proc_fops = {
- .open = irq_effective_aff_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
-static const struct file_operations irq_effective_aff_list_proc_fops = {
- .open = irq_effective_aff_list_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
#endif
static int default_affinity_show(struct seq_file *m, void *v)
@@ -313,18 +275,6 @@ static int irq_node_proc_show(struct seq_file *m, void *v)
seq_printf(m, "%d\n", irq_desc_get_node(desc));
return 0;
}
-
-static int irq_node_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, irq_node_proc_show, PDE_DATA(inode));
-}
-
-static const struct file_operations irq_node_proc_fops = {
- .open = irq_node_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
#endif
static int irq_spurious_proc_show(struct seq_file *m, void *v)
@@ -337,18 +287,6 @@ static int irq_spurious_proc_show(struct seq_file *m, void *v)
return 0;
}
-static int irq_spurious_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, irq_spurious_proc_show, PDE_DATA(inode));
-}
-
-static const struct file_operations irq_spurious_proc_fops = {
- .open = irq_spurious_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
#define MAX_NAMELEN 128
static int name_unique(unsigned int irq, struct irqaction *new_action)
@@ -421,24 +359,24 @@ void register_irq_proc(unsigned int irq, struct irq_desc *desc)
&irq_affinity_proc_fops, irqp);
/* create /proc/irq/<irq>/affinity_hint */
- proc_create_data("affinity_hint", 0444, desc->dir,
- &irq_affinity_hint_proc_fops, irqp);
+ proc_create_single_data("affinity_hint", 0444, desc->dir,
+ irq_affinity_hint_proc_show, irqp);
/* create /proc/irq/<irq>/smp_affinity_list */
proc_create_data("smp_affinity_list", 0644, desc->dir,
&irq_affinity_list_proc_fops, irqp);
- proc_create_data("node", 0444, desc->dir,
- &irq_node_proc_fops, irqp);
+ proc_create_single_data("node", 0444, desc->dir, irq_node_proc_show,
+ irqp);
# ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK
- proc_create_data("effective_affinity", 0444, desc->dir,
- &irq_effective_aff_proc_fops, irqp);
- proc_create_data("effective_affinity_list", 0444, desc->dir,
- &irq_effective_aff_list_proc_fops, irqp);
+ proc_create_single_data("effective_affinity", 0444, desc->dir,
+ irq_effective_aff_proc_show, irqp);
+ proc_create_single_data("effective_affinity_list", 0444, desc->dir,
+ irq_effective_aff_list_proc_show, irqp);
# endif
#endif
- proc_create_data("spurious", 0444, desc->dir,
- &irq_spurious_proc_fops, (void *)(long)irq);
+ proc_create_single_data("spurious", 0444, desc->dir,
+ irq_spurious_proc_show, (void *)(long)irq);
out_unlock:
mutex_unlock(&register_lock);
diff --git a/kernel/kthread.c b/kernel/kthread.c
index cd50e99202b0..481951bf091d 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -55,7 +55,6 @@ enum KTHREAD_BITS {
KTHREAD_IS_PER_CPU = 0,
KTHREAD_SHOULD_STOP,
KTHREAD_SHOULD_PARK,
- KTHREAD_IS_PARKED,
};
static inline void set_kthread_struct(void *kthread)
@@ -177,14 +176,12 @@ void *kthread_probe_data(struct task_struct *task)
static void __kthread_parkme(struct kthread *self)
{
- __set_current_state(TASK_PARKED);
- while (test_bit(KTHREAD_SHOULD_PARK, &self->flags)) {
- if (!test_and_set_bit(KTHREAD_IS_PARKED, &self->flags))
- complete(&self->parked);
+ for (;;) {
+ set_current_state(TASK_PARKED);
+ if (!test_bit(KTHREAD_SHOULD_PARK, &self->flags))
+ break;
schedule();
- __set_current_state(TASK_PARKED);
}
- clear_bit(KTHREAD_IS_PARKED, &self->flags);
__set_current_state(TASK_RUNNING);
}
@@ -194,6 +191,11 @@ void kthread_parkme(void)
}
EXPORT_SYMBOL_GPL(kthread_parkme);
+void kthread_park_complete(struct task_struct *k)
+{
+ complete_all(&to_kthread(k)->parked);
+}
+
static int kthread(void *_create)
{
/* Copy data: it's on kthread's stack */
@@ -450,22 +452,16 @@ void kthread_unpark(struct task_struct *k)
{
struct kthread *kthread = to_kthread(k);
- clear_bit(KTHREAD_SHOULD_PARK, &kthread->flags);
/*
- * We clear the IS_PARKED bit here as we don't wait
- * until the task has left the park code. So if we'd
- * park before that happens we'd see the IS_PARKED bit
- * which might be about to be cleared.
+ * Newly created kthread was parked when the CPU was offline.
+ * The binding was lost and we need to set it again.
*/
- if (test_and_clear_bit(KTHREAD_IS_PARKED, &kthread->flags)) {
- /*
- * Newly created kthread was parked when the CPU was offline.
- * The binding was lost and we need to set it again.
- */
- if (test_bit(KTHREAD_IS_PER_CPU, &kthread->flags))
- __kthread_bind(k, kthread->cpu, TASK_PARKED);
- wake_up_state(k, TASK_PARKED);
- }
+ if (test_bit(KTHREAD_IS_PER_CPU, &kthread->flags))
+ __kthread_bind(k, kthread->cpu, TASK_PARKED);
+
+ reinit_completion(&kthread->parked);
+ clear_bit(KTHREAD_SHOULD_PARK, &kthread->flags);
+ wake_up_state(k, TASK_PARKED);
}
EXPORT_SYMBOL_GPL(kthread_unpark);
@@ -488,12 +484,10 @@ int kthread_park(struct task_struct *k)
if (WARN_ON(k->flags & PF_EXITING))
return -ENOSYS;
- if (!test_bit(KTHREAD_IS_PARKED, &kthread->flags)) {
- set_bit(KTHREAD_SHOULD_PARK, &kthread->flags);
- if (k != current) {
- wake_up_process(k);
- wait_for_completion(&kthread->parked);
- }
+ set_bit(KTHREAD_SHOULD_PARK, &kthread->flags);
+ if (k != current) {
+ wake_up_process(k);
+ wait_for_completion(&kthread->parked);
}
return 0;
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 023386338269..edcac5de7ebc 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -561,20 +561,24 @@ static void print_lock(struct held_lock *hlock)
printk(KERN_CONT ", at: %pS\n", (void *)hlock->acquire_ip);
}
-static void lockdep_print_held_locks(struct task_struct *curr)
+static void lockdep_print_held_locks(struct task_struct *p)
{
- int i, depth = curr->lockdep_depth;
+ int i, depth = READ_ONCE(p->lockdep_depth);
- if (!depth) {
- printk("no locks held by %s/%d.\n", curr->comm, task_pid_nr(curr));
+ if (!depth)
+ printk("no locks held by %s/%d.\n", p->comm, task_pid_nr(p));
+ else
+ printk("%d lock%s held by %s/%d:\n", depth,
+ depth > 1 ? "s" : "", p->comm, task_pid_nr(p));
+ /*
+ * It's not reliable to print a task's held locks if it's not sleeping
+ * and it's not the current task.
+ */
+ if (p->state == TASK_RUNNING && p != current)
return;
- }
- printk("%d lock%s held by %s/%d:\n",
- depth, depth > 1 ? "s" : "", curr->comm, task_pid_nr(curr));
-
for (i = 0; i < depth; i++) {
printk(" #%d: ", i);
- print_lock(curr->held_locks + i);
+ print_lock(p->held_locks + i);
}
}
@@ -4451,8 +4455,6 @@ EXPORT_SYMBOL_GPL(debug_check_no_locks_held);
void debug_show_all_locks(void)
{
struct task_struct *g, *p;
- int count = 10;
- int unlock = 1;
if (unlikely(!debug_locks)) {
pr_warn("INFO: lockdep is turned off.\n");
@@ -4460,50 +4462,18 @@ void debug_show_all_locks(void)
}
pr_warn("\nShowing all locks held in the system:\n");
- /*
- * Here we try to get the tasklist_lock as hard as possible,
- * if not successful after 2 seconds we ignore it (but keep
- * trying). This is to enable a debug printout even if a
- * tasklist_lock-holding task deadlocks or crashes.
- */
-retry:
- if (!read_trylock(&tasklist_lock)) {
- if (count == 10)
- pr_warn("hm, tasklist_lock locked, retrying... ");
- if (count) {
- count--;
- pr_cont(" #%d", 10-count);
- mdelay(200);
- goto retry;
- }
- pr_cont(" ignoring it.\n");
- unlock = 0;
- } else {
- if (count != 10)
- pr_cont(" locked it.\n");
- }
-
- do_each_thread(g, p) {
- /*
- * It's not reliable to print a task's held locks
- * if it's not sleeping (or if it's not the current
- * task):
- */
- if (p->state == TASK_RUNNING && p != current)
+ rcu_read_lock();
+ for_each_process_thread(g, p) {
+ if (!p->lockdep_depth)
continue;
- if (p->lockdep_depth)
- lockdep_print_held_locks(p);
- if (!unlock)
- if (read_trylock(&tasklist_lock))
- unlock = 1;
+ lockdep_print_held_locks(p);
touch_nmi_watchdog();
- } while_each_thread(g, p);
+ touch_all_softlockup_watchdogs();
+ }
+ rcu_read_unlock();
pr_warn("\n");
pr_warn("=============================================\n\n");
-
- if (unlock)
- read_unlock(&tasklist_lock);
}
EXPORT_SYMBOL_GPL(debug_show_all_locks);
#endif
diff --git a/kernel/locking/lockdep_proc.c b/kernel/locking/lockdep_proc.c
index ad69bbc9bd28..3dd980dfba2d 100644
--- a/kernel/locking/lockdep_proc.c
+++ b/kernel/locking/lockdep_proc.c
@@ -101,18 +101,6 @@ static const struct seq_operations lockdep_ops = {
.show = l_show,
};
-static int lockdep_open(struct inode *inode, struct file *file)
-{
- return seq_open(file, &lockdep_ops);
-}
-
-static const struct file_operations proc_lockdep_operations = {
- .open = lockdep_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
#ifdef CONFIG_PROVE_LOCKING
static void *lc_start(struct seq_file *m, loff_t *pos)
{
@@ -170,18 +158,6 @@ static const struct seq_operations lockdep_chains_ops = {
.stop = lc_stop,
.show = lc_show,
};
-
-static int lockdep_chains_open(struct inode *inode, struct file *file)
-{
- return seq_open(file, &lockdep_chains_ops);
-}
-
-static const struct file_operations proc_lockdep_chains_operations = {
- .open = lockdep_chains_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
#endif /* CONFIG_PROVE_LOCKING */
static void lockdep_stats_debug_show(struct seq_file *m)
@@ -355,18 +331,6 @@ static int lockdep_stats_show(struct seq_file *m, void *v)
return 0;
}
-static int lockdep_stats_open(struct inode *inode, struct file *file)
-{
- return single_open(file, lockdep_stats_show, NULL);
-}
-
-static const struct file_operations proc_lockdep_stats_operations = {
- .open = lockdep_stats_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
#ifdef CONFIG_LOCK_STAT
struct lock_stat_data {
@@ -682,14 +646,11 @@ static const struct file_operations proc_lock_stat_operations = {
static int __init lockdep_proc_init(void)
{
- proc_create("lockdep", S_IRUSR, NULL, &proc_lockdep_operations);
+ proc_create_seq("lockdep", S_IRUSR, NULL, &lockdep_ops);
#ifdef CONFIG_PROVE_LOCKING
- proc_create("lockdep_chains", S_IRUSR, NULL,
- &proc_lockdep_chains_operations);
+ proc_create_seq("lockdep_chains", S_IRUSR, NULL, &lockdep_chains_ops);
#endif
- proc_create("lockdep_stats", S_IRUSR, NULL,
- &proc_lockdep_stats_operations);
-
+ proc_create_single("lockdep_stats", S_IRUSR, NULL, lockdep_stats_show);
#ifdef CONFIG_LOCK_STAT
proc_create("lock_stat", S_IRUSR | S_IWUSR, NULL,
&proc_lock_stat_operations);
diff --git a/kernel/locking/mcs_spinlock.h b/kernel/locking/mcs_spinlock.h
index f046b7ce9dd6..5e10153b4d3c 100644
--- a/kernel/locking/mcs_spinlock.h
+++ b/kernel/locking/mcs_spinlock.h
@@ -23,13 +23,15 @@ struct mcs_spinlock {
#ifndef arch_mcs_spin_lock_contended
/*
- * Using smp_load_acquire() provides a memory barrier that ensures
- * subsequent operations happen after the lock is acquired.
+ * Using smp_cond_load_acquire() provides the acquire semantics
+ * required so that subsequent operations happen after the
+ * lock is acquired. Additionally, some architectures such as
+ * ARM64 would like to do spin-waiting instead of purely
+ * spinning, and smp_cond_load_acquire() provides that behavior.
*/
#define arch_mcs_spin_lock_contended(l) \
do { \
- while (!(smp_load_acquire(l))) \
- cpu_relax(); \
+ smp_cond_load_acquire(l, VAL); \
} while (0)
#endif
diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
index 2048359f33d2..f44f658ae629 100644
--- a/kernel/locking/mutex.c
+++ b/kernel/locking/mutex.c
@@ -139,8 +139,9 @@ static inline bool __mutex_trylock(struct mutex *lock)
static __always_inline bool __mutex_trylock_fast(struct mutex *lock)
{
unsigned long curr = (unsigned long)current;
+ unsigned long zero = 0UL;
- if (!atomic_long_cmpxchg_acquire(&lock->owner, 0UL, curr))
+ if (atomic_long_try_cmpxchg_acquire(&lock->owner, &zero, curr))
return true;
return false;
diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c
index d880296245c5..bfaeb05123ff 100644
--- a/kernel/locking/qspinlock.c
+++ b/kernel/locking/qspinlock.c
@@ -12,11 +12,11 @@
* GNU General Public License for more details.
*
* (C) Copyright 2013-2015 Hewlett-Packard Development Company, L.P.
- * (C) Copyright 2013-2014 Red Hat, Inc.
+ * (C) Copyright 2013-2014,2018 Red Hat, Inc.
* (C) Copyright 2015 Intel Corp.
* (C) Copyright 2015 Hewlett-Packard Enterprise Development LP
*
- * Authors: Waiman Long <waiman.long@hpe.com>
+ * Authors: Waiman Long <longman@redhat.com>
* Peter Zijlstra <peterz@infradead.org>
*/
@@ -33,6 +33,11 @@
#include <asm/qspinlock.h>
/*
+ * Include queued spinlock statistics code
+ */
+#include "qspinlock_stat.h"
+
+/*
* The basic principle of a queue-based spinlock can best be understood
* by studying a classic queue-based spinlock implementation called the
* MCS lock. The paper below provides a good description for this kind
@@ -77,6 +82,18 @@
#endif
/*
+ * The pending bit spinning loop count.
+ * This heuristic is used to limit the number of lockword accesses
+ * made by atomic_cond_read_relaxed when waiting for the lock to
+ * transition out of the "== _Q_PENDING_VAL" state. We don't spin
+ * indefinitely because there's no guarantee that we'll make forward
+ * progress.
+ */
+#ifndef _Q_PENDING_LOOPS
+#define _Q_PENDING_LOOPS 1
+#endif
+
+/*
* Per-CPU queue node structures; we can never have more than 4 nested
* contexts: task, softirq, hardirq, nmi.
*
@@ -114,41 +131,18 @@ static inline __pure struct mcs_spinlock *decode_tail(u32 tail)
#define _Q_LOCKED_PENDING_MASK (_Q_LOCKED_MASK | _Q_PENDING_MASK)
-/*
- * By using the whole 2nd least significant byte for the pending bit, we
- * can allow better optimization of the lock acquisition for the pending
- * bit holder.
+#if _Q_PENDING_BITS == 8
+/**
+ * clear_pending - clear the pending bit.
+ * @lock: Pointer to queued spinlock structure
*
- * This internal structure is also used by the set_locked function which
- * is not restricted to _Q_PENDING_BITS == 8.
+ * *,1,* -> *,0,*
*/
-struct __qspinlock {
- union {
- atomic_t val;
-#ifdef __LITTLE_ENDIAN
- struct {
- u8 locked;
- u8 pending;
- };
- struct {
- u16 locked_pending;
- u16 tail;
- };
-#else
- struct {
- u16 tail;
- u16 locked_pending;
- };
- struct {
- u8 reserved[2];
- u8 pending;
- u8 locked;
- };
-#endif
- };
-};
+static __always_inline void clear_pending(struct qspinlock *lock)
+{
+ WRITE_ONCE(lock->pending, 0);
+}
-#if _Q_PENDING_BITS == 8
/**
* clear_pending_set_locked - take ownership and clear the pending bit.
* @lock: Pointer to queued spinlock structure
@@ -159,9 +153,7 @@ struct __qspinlock {
*/
static __always_inline void clear_pending_set_locked(struct qspinlock *lock)
{
- struct __qspinlock *l = (void *)lock;
-
- WRITE_ONCE(l->locked_pending, _Q_LOCKED_VAL);
+ WRITE_ONCE(lock->locked_pending, _Q_LOCKED_VAL);
}
/*
@@ -176,19 +168,28 @@ static __always_inline void clear_pending_set_locked(struct qspinlock *lock)
*/
static __always_inline u32 xchg_tail(struct qspinlock *lock, u32 tail)
{
- struct __qspinlock *l = (void *)lock;
-
/*
- * Use release semantics to make sure that the MCS node is properly
- * initialized before changing the tail code.
+ * We can use relaxed semantics since the caller ensures that the
+ * MCS node is properly initialized before updating the tail.
*/
- return (u32)xchg_release(&l->tail,
+ return (u32)xchg_relaxed(&lock->tail,
tail >> _Q_TAIL_OFFSET) << _Q_TAIL_OFFSET;
}
#else /* _Q_PENDING_BITS == 8 */
/**
+ * clear_pending - clear the pending bit.
+ * @lock: Pointer to queued spinlock structure
+ *
+ * *,1,* -> *,0,*
+ */
+static __always_inline void clear_pending(struct qspinlock *lock)
+{
+ atomic_andnot(_Q_PENDING_VAL, &lock->val);
+}
+
+/**
* clear_pending_set_locked - take ownership and clear the pending bit.
* @lock: Pointer to queued spinlock structure
*
@@ -216,10 +217,11 @@ static __always_inline u32 xchg_tail(struct qspinlock *lock, u32 tail)
for (;;) {
new = (val & _Q_LOCKED_PENDING_MASK) | tail;
/*
- * Use release semantics to make sure that the MCS node is
- * properly initialized before changing the tail code.
+ * We can use relaxed semantics since the caller ensures that
+ * the MCS node is properly initialized before updating the
+ * tail.
*/
- old = atomic_cmpxchg_release(&lock->val, val, new);
+ old = atomic_cmpxchg_relaxed(&lock->val, val, new);
if (old == val)
break;
@@ -237,9 +239,7 @@ static __always_inline u32 xchg_tail(struct qspinlock *lock, u32 tail)
*/
static __always_inline void set_locked(struct qspinlock *lock)
{
- struct __qspinlock *l = (void *)lock;
-
- WRITE_ONCE(l->locked, _Q_LOCKED_VAL);
+ WRITE_ONCE(lock->locked, _Q_LOCKED_VAL);
}
@@ -294,86 +294,83 @@ static __always_inline u32 __pv_wait_head_or_lock(struct qspinlock *lock,
void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
{
struct mcs_spinlock *prev, *next, *node;
- u32 new, old, tail;
+ u32 old, tail;
int idx;
BUILD_BUG_ON(CONFIG_NR_CPUS >= (1U << _Q_TAIL_CPU_BITS));
if (pv_enabled())
- goto queue;
+ goto pv_queue;
if (virt_spin_lock(lock))
return;
/*
- * wait for in-progress pending->locked hand-overs
+ * Wait for in-progress pending->locked hand-overs with a bounded
+ * number of spins so that we guarantee forward progress.
*
* 0,1,0 -> 0,0,1
*/
if (val == _Q_PENDING_VAL) {
- while ((val = atomic_read(&lock->val)) == _Q_PENDING_VAL)
- cpu_relax();
+ int cnt = _Q_PENDING_LOOPS;
+ val = atomic_cond_read_relaxed(&lock->val,
+ (VAL != _Q_PENDING_VAL) || !cnt--);
}
/*
+ * If we observe any contention; queue.
+ */
+ if (val & ~_Q_LOCKED_MASK)
+ goto queue;
+
+ /*
* trylock || pending
*
* 0,0,0 -> 0,0,1 ; trylock
* 0,0,1 -> 0,1,1 ; pending
*/
- for (;;) {
+ val = atomic_fetch_or_acquire(_Q_PENDING_VAL, &lock->val);
+ if (!(val & ~_Q_LOCKED_MASK)) {
/*
- * If we observe any contention; queue.
+ * We're pending, wait for the owner to go away.
+ *
+ * *,1,1 -> *,1,0
+ *
+ * this wait loop must be a load-acquire such that we match the
+ * store-release that clears the locked bit and create lock
+ * sequentiality; this is because not all
+ * clear_pending_set_locked() implementations imply full
+ * barriers.
*/
- if (val & ~_Q_LOCKED_MASK)
- goto queue;
-
- new = _Q_LOCKED_VAL;
- if (val == new)
- new |= _Q_PENDING_VAL;
+ if (val & _Q_LOCKED_MASK) {
+ atomic_cond_read_acquire(&lock->val,
+ !(VAL & _Q_LOCKED_MASK));
+ }
/*
- * Acquire semantic is required here as the function may
- * return immediately if the lock was free.
+ * take ownership and clear the pending bit.
+ *
+ * *,1,0 -> *,0,1
*/
- old = atomic_cmpxchg_acquire(&lock->val, val, new);
- if (old == val)
- break;
-
- val = old;
- }
-
- /*
- * we won the trylock
- */
- if (new == _Q_LOCKED_VAL)
+ clear_pending_set_locked(lock);
+ qstat_inc(qstat_lock_pending, true);
return;
+ }
/*
- * we're pending, wait for the owner to go away.
- *
- * *,1,1 -> *,1,0
- *
- * this wait loop must be a load-acquire such that we match the
- * store-release that clears the locked bit and create lock
- * sequentiality; this is because not all clear_pending_set_locked()
- * implementations imply full barriers.
- */
- smp_cond_load_acquire(&lock->val.counter, !(VAL & _Q_LOCKED_MASK));
-
- /*
- * take ownership and clear the pending bit.
- *
- * *,1,0 -> *,0,1
+ * If pending was clear but there are waiters in the queue, then
+ * we need to undo our setting of pending before we queue ourselves.
*/
- clear_pending_set_locked(lock);
- return;
+ if (!(val & _Q_PENDING_MASK))
+ clear_pending(lock);
/*
* End of pending bit optimistic spinning and beginning of MCS
* queuing.
*/
queue:
+ qstat_inc(qstat_lock_slowpath, true);
+pv_queue:
node = this_cpu_ptr(&mcs_nodes[0]);
idx = node->count++;
tail = encode_tail(smp_processor_id(), idx);
@@ -400,12 +397,18 @@ queue:
goto release;
/*
+ * Ensure that the initialisation of @node is complete before we
+ * publish the updated tail via xchg_tail() and potentially link
+ * @node into the waitqueue via WRITE_ONCE(prev->next, node) below.
+ */
+ smp_wmb();
+
+ /*
+ * Publish the updated tail.
* We have already touched the queueing cacheline; don't bother with
* pending stuff.
*
* p,*,* -> n,*,*
- *
- * RELEASE, such that the stores to @node must be complete.
*/
old = xchg_tail(lock, tail);
next = NULL;
@@ -417,14 +420,8 @@ queue:
if (old & _Q_TAIL_MASK) {
prev = decode_tail(old);
- /*
- * We must ensure that the stores to @node are observed before
- * the write to prev->next. The address dependency from
- * xchg_tail is not sufficient to ensure this because the read
- * component of xchg_tail is unordered with respect to the
- * initialisation of @node.
- */
- smp_store_release(&prev->next, node);
+ /* Link @node into the waitqueue. */
+ WRITE_ONCE(prev->next, node);
pv_wait_node(node, prev);
arch_mcs_spin_lock_contended(&node->locked);
@@ -453,8 +450,8 @@ queue:
*
* The PV pv_wait_head_or_lock function, if active, will acquire
* the lock and return a non-zero value. So we have to skip the
- * smp_cond_load_acquire() call. As the next PV queue head hasn't been
- * designated yet, there is no way for the locked value to become
+ * atomic_cond_read_acquire() call. As the next PV queue head hasn't
+ * been designated yet, there is no way for the locked value to become
* _Q_SLOW_VAL. So both the set_locked() and the
* atomic_cmpxchg_relaxed() calls will be safe.
*
@@ -464,44 +461,38 @@ queue:
if ((val = pv_wait_head_or_lock(lock, node)))
goto locked;
- val = smp_cond_load_acquire(&lock->val.counter, !(VAL & _Q_LOCKED_PENDING_MASK));
+ val = atomic_cond_read_acquire(&lock->val, !(VAL & _Q_LOCKED_PENDING_MASK));
locked:
/*
* claim the lock:
*
* n,0,0 -> 0,0,1 : lock, uncontended
- * *,0,0 -> *,0,1 : lock, contended
+ * *,*,0 -> *,*,1 : lock, contended
*
- * If the queue head is the only one in the queue (lock value == tail),
- * clear the tail code and grab the lock. Otherwise, we only need
- * to grab the lock.
+ * If the queue head is the only one in the queue (lock value == tail)
+ * and nobody is pending, clear the tail code and grab the lock.
+ * Otherwise, we only need to grab the lock.
*/
- for (;;) {
- /* In the PV case we might already have _Q_LOCKED_VAL set */
- if ((val & _Q_TAIL_MASK) != tail) {
- set_locked(lock);
- break;
- }
- /*
- * The smp_cond_load_acquire() call above has provided the
- * necessary acquire semantics required for locking. At most
- * two iterations of this loop may be ran.
- */
- old = atomic_cmpxchg_relaxed(&lock->val, val, _Q_LOCKED_VAL);
- if (old == val)
- goto release; /* No contention */
- val = old;
- }
+ /*
+ * In the PV case we might already have _Q_LOCKED_VAL set.
+ *
+ * The atomic_cond_read_acquire() call above has provided the
+ * necessary acquire semantics required for locking.
+ */
+ if (((val & _Q_TAIL_MASK) == tail) &&
+ atomic_try_cmpxchg_relaxed(&lock->val, &val, _Q_LOCKED_VAL))
+ goto release; /* No contention */
+
+ /* Either somebody is queued behind us or _Q_PENDING_VAL is set */
+ set_locked(lock);
/*
* contended path; wait for next if not observed yet, release.
*/
- if (!next) {
- while (!(next = READ_ONCE(node->next)))
- cpu_relax();
- }
+ if (!next)
+ next = smp_cond_load_relaxed(&node->next, (VAL));
arch_mcs_spin_unlock_contended(&next->locked);
pv_kick_node(lock, next);
diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h
index 6ee477765e6c..5a0cf5f9008c 100644
--- a/kernel/locking/qspinlock_paravirt.h
+++ b/kernel/locking/qspinlock_paravirt.h
@@ -56,11 +56,6 @@ struct pv_node {
};
/*
- * Include queued spinlock statistics code
- */
-#include "qspinlock_stat.h"
-
-/*
* Hybrid PV queued/unfair lock
*
* By replacing the regular queued_spin_trylock() with the function below,
@@ -87,8 +82,6 @@ struct pv_node {
#define queued_spin_trylock(l) pv_hybrid_queued_unfair_trylock(l)
static inline bool pv_hybrid_queued_unfair_trylock(struct qspinlock *lock)
{
- struct __qspinlock *l = (void *)lock;
-
/*
* Stay in unfair lock mode as long as queued mode waiters are
* present in the MCS wait queue but the pending bit isn't set.
@@ -97,7 +90,7 @@ static inline bool pv_hybrid_queued_unfair_trylock(struct qspinlock *lock)
int val = atomic_read(&lock->val);
if (!(val & _Q_LOCKED_PENDING_MASK) &&
- (cmpxchg_acquire(&l->locked, 0, _Q_LOCKED_VAL) == 0)) {
+ (cmpxchg_acquire(&lock->locked, 0, _Q_LOCKED_VAL) == 0)) {
qstat_inc(qstat_pv_lock_stealing, true);
return true;
}
@@ -117,16 +110,7 @@ static inline bool pv_hybrid_queued_unfair_trylock(struct qspinlock *lock)
#if _Q_PENDING_BITS == 8
static __always_inline void set_pending(struct qspinlock *lock)
{
- struct __qspinlock *l = (void *)lock;
-
- WRITE_ONCE(l->pending, 1);
-}
-
-static __always_inline void clear_pending(struct qspinlock *lock)
-{
- struct __qspinlock *l = (void *)lock;
-
- WRITE_ONCE(l->pending, 0);
+ WRITE_ONCE(lock->pending, 1);
}
/*
@@ -136,10 +120,8 @@ static __always_inline void clear_pending(struct qspinlock *lock)
*/
static __always_inline int trylock_clear_pending(struct qspinlock *lock)
{
- struct __qspinlock *l = (void *)lock;
-
- return !READ_ONCE(l->locked) &&
- (cmpxchg_acquire(&l->locked_pending, _Q_PENDING_VAL,
+ return !READ_ONCE(lock->locked) &&
+ (cmpxchg_acquire(&lock->locked_pending, _Q_PENDING_VAL,
_Q_LOCKED_VAL) == _Q_PENDING_VAL);
}
#else /* _Q_PENDING_BITS == 8 */
@@ -148,11 +130,6 @@ static __always_inline void set_pending(struct qspinlock *lock)
atomic_or(_Q_PENDING_VAL, &lock->val);
}
-static __always_inline void clear_pending(struct qspinlock *lock)
-{
- atomic_andnot(_Q_PENDING_VAL, &lock->val);
-}
-
static __always_inline int trylock_clear_pending(struct qspinlock *lock)
{
int val = atomic_read(&lock->val);
@@ -384,7 +361,6 @@ static void pv_wait_node(struct mcs_spinlock *node, struct mcs_spinlock *prev)
static void pv_kick_node(struct qspinlock *lock, struct mcs_spinlock *node)
{
struct pv_node *pn = (struct pv_node *)node;
- struct __qspinlock *l = (void *)lock;
/*
* If the vCPU is indeed halted, advance its state to match that of
@@ -413,7 +389,7 @@ static void pv_kick_node(struct qspinlock *lock, struct mcs_spinlock *node)
* the hash table later on at unlock time, no atomic instruction is
* needed.
*/
- WRITE_ONCE(l->locked, _Q_SLOW_VAL);
+ WRITE_ONCE(lock->locked, _Q_SLOW_VAL);
(void)pv_hash(lock, pn);
}
@@ -428,7 +404,6 @@ static u32
pv_wait_head_or_lock(struct qspinlock *lock, struct mcs_spinlock *node)
{
struct pv_node *pn = (struct pv_node *)node;
- struct __qspinlock *l = (void *)lock;
struct qspinlock **lp = NULL;
int waitcnt = 0;
int loop;
@@ -443,7 +418,7 @@ pv_wait_head_or_lock(struct qspinlock *lock, struct mcs_spinlock *node)
/*
* Tracking # of slowpath locking operations
*/
- qstat_inc(qstat_pv_lock_slowpath, true);
+ qstat_inc(qstat_lock_slowpath, true);
for (;; waitcnt++) {
/*
@@ -479,13 +454,13 @@ pv_wait_head_or_lock(struct qspinlock *lock, struct mcs_spinlock *node)
*
* Matches the smp_rmb() in __pv_queued_spin_unlock().
*/
- if (xchg(&l->locked, _Q_SLOW_VAL) == 0) {
+ if (xchg(&lock->locked, _Q_SLOW_VAL) == 0) {
/*
* The lock was free and now we own the lock.
* Change the lock value back to _Q_LOCKED_VAL
* and unhash the table.
*/
- WRITE_ONCE(l->locked, _Q_LOCKED_VAL);
+ WRITE_ONCE(lock->locked, _Q_LOCKED_VAL);
WRITE_ONCE(*lp, NULL);
goto gotlock;
}
@@ -493,7 +468,7 @@ pv_wait_head_or_lock(struct qspinlock *lock, struct mcs_spinlock *node)
WRITE_ONCE(pn->state, vcpu_hashed);
qstat_inc(qstat_pv_wait_head, true);
qstat_inc(qstat_pv_wait_again, waitcnt);
- pv_wait(&l->locked, _Q_SLOW_VAL);
+ pv_wait(&lock->locked, _Q_SLOW_VAL);
/*
* Because of lock stealing, the queue head vCPU may not be
@@ -518,7 +493,6 @@ gotlock:
__visible void
__pv_queued_spin_unlock_slowpath(struct qspinlock *lock, u8 locked)
{
- struct __qspinlock *l = (void *)lock;
struct pv_node *node;
if (unlikely(locked != _Q_SLOW_VAL)) {
@@ -547,7 +521,7 @@ __pv_queued_spin_unlock_slowpath(struct qspinlock *lock, u8 locked)
* Now that we have a reference to the (likely) blocked pv_node,
* release the lock.
*/
- smp_store_release(&l->locked, 0);
+ smp_store_release(&lock->locked, 0);
/*
* At this point the memory pointed at by lock can be freed/reused,
@@ -573,7 +547,6 @@ __pv_queued_spin_unlock_slowpath(struct qspinlock *lock, u8 locked)
#ifndef __pv_queued_spin_unlock
__visible void __pv_queued_spin_unlock(struct qspinlock *lock)
{
- struct __qspinlock *l = (void *)lock;
u8 locked;
/*
@@ -581,7 +554,7 @@ __visible void __pv_queued_spin_unlock(struct qspinlock *lock)
* unhash. Otherwise it would be possible to have multiple @lock
* entries, which would be BAD.
*/
- locked = cmpxchg_release(&l->locked, _Q_LOCKED_VAL, 0);
+ locked = cmpxchg_release(&lock->locked, _Q_LOCKED_VAL, 0);
if (likely(locked == _Q_LOCKED_VAL))
return;
diff --git a/kernel/locking/qspinlock_stat.h b/kernel/locking/qspinlock_stat.h
index 4a30ef63c607..6bd78c0740fc 100644
--- a/kernel/locking/qspinlock_stat.h
+++ b/kernel/locking/qspinlock_stat.h
@@ -22,13 +22,14 @@
* pv_kick_wake - # of vCPU kicks used for computing pv_latency_wake
* pv_latency_kick - average latency (ns) of vCPU kick operation
* pv_latency_wake - average latency (ns) from vCPU kick to wakeup
- * pv_lock_slowpath - # of locking operations via the slowpath
* pv_lock_stealing - # of lock stealing operations
* pv_spurious_wakeup - # of spurious wakeups in non-head vCPUs
* pv_wait_again - # of wait's after a queue head vCPU kick
* pv_wait_early - # of early vCPU wait's
* pv_wait_head - # of vCPU wait's at the queue head
* pv_wait_node - # of vCPU wait's at a non-head queue node
+ * lock_pending - # of locking operations via pending code
+ * lock_slowpath - # of locking operations via MCS lock queue
*
* Writing to the "reset_counters" file will reset all the above counter
* values.
@@ -46,13 +47,14 @@ enum qlock_stats {
qstat_pv_kick_wake,
qstat_pv_latency_kick,
qstat_pv_latency_wake,
- qstat_pv_lock_slowpath,
qstat_pv_lock_stealing,
qstat_pv_spurious_wakeup,
qstat_pv_wait_again,
qstat_pv_wait_early,
qstat_pv_wait_head,
qstat_pv_wait_node,
+ qstat_lock_pending,
+ qstat_lock_slowpath,
qstat_num, /* Total number of statistical counters */
qstat_reset_cnts = qstat_num,
};
@@ -73,12 +75,13 @@ static const char * const qstat_names[qstat_num + 1] = {
[qstat_pv_spurious_wakeup] = "pv_spurious_wakeup",
[qstat_pv_latency_kick] = "pv_latency_kick",
[qstat_pv_latency_wake] = "pv_latency_wake",
- [qstat_pv_lock_slowpath] = "pv_lock_slowpath",
[qstat_pv_lock_stealing] = "pv_lock_stealing",
[qstat_pv_wait_again] = "pv_wait_again",
[qstat_pv_wait_early] = "pv_wait_early",
[qstat_pv_wait_head] = "pv_wait_head",
[qstat_pv_wait_node] = "pv_wait_node",
+ [qstat_lock_pending] = "lock_pending",
+ [qstat_lock_slowpath] = "lock_slowpath",
[qstat_reset_cnts] = "reset_counters",
};
diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c
index e795908f3607..3064c50e181e 100644
--- a/kernel/locking/rwsem-xadd.c
+++ b/kernel/locking/rwsem-xadd.c
@@ -347,30 +347,31 @@ static inline bool rwsem_try_write_lock_unqueued(struct rw_semaphore *sem)
}
}
+static inline bool owner_on_cpu(struct task_struct *owner)
+{
+ /*
+ * As lock holder preemption issue, we both skip spinning if
+ * task is not on cpu or its cpu is preempted
+ */
+ return owner->on_cpu && !vcpu_is_preempted(task_cpu(owner));
+}
+
static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem)
{
struct task_struct *owner;
bool ret = true;
+ BUILD_BUG_ON(!rwsem_has_anonymous_owner(RWSEM_OWNER_UNKNOWN));
+
if (need_resched())
return false;
rcu_read_lock();
owner = READ_ONCE(sem->owner);
- if (!rwsem_owner_is_writer(owner)) {
- /*
- * Don't spin if the rwsem is readers owned.
- */
- ret = !rwsem_owner_is_reader(owner);
- goto done;
+ if (owner) {
+ ret = is_rwsem_owner_spinnable(owner) &&
+ owner_on_cpu(owner);
}
-
- /*
- * As lock holder preemption issue, we both skip spinning if task is not
- * on cpu or its cpu is preempted
- */
- ret = owner->on_cpu && !vcpu_is_preempted(task_cpu(owner));
-done:
rcu_read_unlock();
return ret;
}
@@ -382,11 +383,11 @@ static noinline bool rwsem_spin_on_owner(struct rw_semaphore *sem)
{
struct task_struct *owner = READ_ONCE(sem->owner);
- if (!rwsem_owner_is_writer(owner))
- goto out;
+ if (!is_rwsem_owner_spinnable(owner))
+ return false;
rcu_read_lock();
- while (sem->owner == owner) {
+ while (owner && (READ_ONCE(sem->owner) == owner)) {
/*
* Ensure we emit the owner->on_cpu, dereference _after_
* checking sem->owner still matches owner, if that fails,
@@ -399,8 +400,7 @@ static noinline bool rwsem_spin_on_owner(struct rw_semaphore *sem)
* abort spinning when need_resched or owner is not running or
* owner's cpu is preempted.
*/
- if (!owner->on_cpu || need_resched() ||
- vcpu_is_preempted(task_cpu(owner))) {
+ if (need_resched() || !owner_on_cpu(owner)) {
rcu_read_unlock();
return false;
}
@@ -408,12 +408,12 @@ static noinline bool rwsem_spin_on_owner(struct rw_semaphore *sem)
cpu_relax();
}
rcu_read_unlock();
-out:
+
/*
* If there is a new owner or the owner is not set, we continue
* spinning.
*/
- return !rwsem_owner_is_reader(READ_ONCE(sem->owner));
+ return is_rwsem_owner_spinnable(READ_ONCE(sem->owner));
}
static bool rwsem_optimistic_spin(struct rw_semaphore *sem)
diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c
index 30465a2f2b6c..bc1e507be9ff 100644
--- a/kernel/locking/rwsem.c
+++ b/kernel/locking/rwsem.c
@@ -221,5 +221,3 @@ void up_read_non_owner(struct rw_semaphore *sem)
EXPORT_SYMBOL(up_read_non_owner);
#endif
-
-
diff --git a/kernel/locking/rwsem.h b/kernel/locking/rwsem.h
index a17cba8d94bb..b9d0e72aa80f 100644
--- a/kernel/locking/rwsem.h
+++ b/kernel/locking/rwsem.h
@@ -1,20 +1,24 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* The owner field of the rw_semaphore structure will be set to
- * RWSEM_READ_OWNED when a reader grabs the lock. A writer will clear
+ * RWSEM_READER_OWNED when a reader grabs the lock. A writer will clear
* the owner field when it unlocks. A reader, on the other hand, will
* not touch the owner field when it unlocks.
*
- * In essence, the owner field now has the following 3 states:
+ * In essence, the owner field now has the following 4 states:
* 1) 0
* - lock is free or the owner hasn't set the field yet
* 2) RWSEM_READER_OWNED
* - lock is currently or previously owned by readers (lock is free
* or not set by owner yet)
- * 3) Other non-zero value
- * - a writer owns the lock
+ * 3) RWSEM_ANONYMOUSLY_OWNED bit set with some other bits set as well
+ * - lock is owned by an anonymous writer, so spinning on the lock
+ * owner should be disabled.
+ * 4) Other non-zero value
+ * - a writer owns the lock and other writers can spin on the lock owner.
*/
-#define RWSEM_READER_OWNED ((struct task_struct *)1UL)
+#define RWSEM_ANONYMOUSLY_OWNED (1UL << 0)
+#define RWSEM_READER_OWNED ((struct task_struct *)RWSEM_ANONYMOUSLY_OWNED)
#ifdef CONFIG_DEBUG_RWSEMS
# define DEBUG_RWSEMS_WARN_ON(c) DEBUG_LOCKS_WARN_ON(c)
@@ -51,14 +55,22 @@ static inline void rwsem_set_reader_owned(struct rw_semaphore *sem)
WRITE_ONCE(sem->owner, RWSEM_READER_OWNED);
}
-static inline bool rwsem_owner_is_writer(struct task_struct *owner)
+/*
+ * Return true if the a rwsem waiter can spin on the rwsem's owner
+ * and steal the lock, i.e. the lock is not anonymously owned.
+ * N.B. !owner is considered spinnable.
+ */
+static inline bool is_rwsem_owner_spinnable(struct task_struct *owner)
{
- return owner && owner != RWSEM_READER_OWNED;
+ return !((unsigned long)owner & RWSEM_ANONYMOUSLY_OWNED);
}
-static inline bool rwsem_owner_is_reader(struct task_struct *owner)
+/*
+ * Return true if rwsem is owned by an anonymous writer or readers.
+ */
+static inline bool rwsem_has_anonymous_owner(struct task_struct *owner)
{
- return owner == RWSEM_READER_OWNED;
+ return (unsigned long)owner & RWSEM_ANONYMOUSLY_OWNED;
}
#else
static inline void rwsem_set_owner(struct rw_semaphore *sem)
diff --git a/kernel/module.c b/kernel/module.c
index ce8066b88178..c9bea7f2b43e 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -3517,6 +3517,11 @@ static noinline int do_init_module(struct module *mod)
* walking this with preempt disabled. In all the failure paths, we
* call synchronize_sched(), but we don't want to slow down the success
* path, so use actual RCU here.
+ * Note that module_alloc() on most architectures creates W+X page
+ * mappings which won't be cleaned up until do_free_init() runs. Any
+ * code such as mark_rodata_ro() which depends on those mappings to
+ * be cleaned up needs to sync with the queued work - ie
+ * rcu_barrier_sched()
*/
call_rcu_sched(&freeinit->rcu, do_free_init);
mutex_unlock(&module_mutex);
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index 11b4282c2d20..1efcb5b0c3ed 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -269,7 +269,7 @@ static int hib_submit_io(int op, int op_flags, pgoff_t page_off, void *addr,
struct bio *bio;
int error = 0;
- bio = bio_alloc(__GFP_RECLAIM | __GFP_HIGH, 1);
+ bio = bio_alloc(GFP_NOIO | __GFP_HIGH, 1);
bio->bi_iter.bi_sector = page_off * (PAGE_SIZE >> 9);
bio_set_dev(bio, hib_resume_bdev);
bio_set_op_attrs(bio, op, op_flags);
@@ -376,7 +376,7 @@ static int write_page(void *buf, sector_t offset, struct hib_bio_batch *hb)
return -ENOSPC;
if (hb) {
- src = (void *)__get_free_page(__GFP_RECLAIM | __GFP_NOWARN |
+ src = (void *)__get_free_page(GFP_NOIO | __GFP_NOWARN |
__GFP_NORETRY);
if (src) {
copy_page(src, buf);
@@ -384,7 +384,7 @@ static int write_page(void *buf, sector_t offset, struct hib_bio_batch *hb)
ret = hib_wait_io(hb); /* Free pages */
if (ret)
return ret;
- src = (void *)__get_free_page(__GFP_RECLAIM |
+ src = (void *)__get_free_page(GFP_NOIO |
__GFP_NOWARN |
__GFP_NORETRY);
if (src) {
@@ -691,7 +691,7 @@ static int save_image_lzo(struct swap_map_handle *handle,
nr_threads = num_online_cpus() - 1;
nr_threads = clamp_val(nr_threads, 1, LZO_THREADS);
- page = (void *)__get_free_page(__GFP_RECLAIM | __GFP_HIGH);
+ page = (void *)__get_free_page(GFP_NOIO | __GFP_HIGH);
if (!page) {
pr_err("Failed to allocate LZO page\n");
ret = -ENOMEM;
@@ -989,7 +989,7 @@ static int get_swap_reader(struct swap_map_handle *handle,
last = tmp;
tmp->map = (struct swap_map_page *)
- __get_free_page(__GFP_RECLAIM | __GFP_HIGH);
+ __get_free_page(GFP_NOIO | __GFP_HIGH);
if (!tmp->map) {
release_swap_reader(handle);
return -ENOMEM;
@@ -1261,8 +1261,8 @@ static int load_image_lzo(struct swap_map_handle *handle,
for (i = 0; i < read_pages; i++) {
page[i] = (void *)__get_free_page(i < LZO_CMP_PAGES ?
- __GFP_RECLAIM | __GFP_HIGH :
- __GFP_RECLAIM | __GFP_NOWARN |
+ GFP_NOIO | __GFP_HIGH :
+ GFP_NOIO | __GFP_NOWARN |
__GFP_NORETRY);
if (!page[i]) {
diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
index 7a693e31184a..40cea6735c2d 100644
--- a/kernel/rcu/rcu.h
+++ b/kernel/rcu/rcu.h
@@ -270,6 +270,12 @@ static inline void rcu_init_levelspread(int *levelspread, const int *levelcnt)
}
}
+/* Returns first leaf rcu_node of the specified RCU flavor. */
+#define rcu_first_leaf_node(rsp) ((rsp)->level[rcu_num_lvls - 1])
+
+/* Is this rcu_node a leaf? */
+#define rcu_is_leaf_node(rnp) ((rnp)->level == rcu_num_lvls - 1)
+
/*
* Do a full breadth-first scan of the rcu_node structures for the
* specified rcu_state structure.
@@ -284,8 +290,7 @@ static inline void rcu_init_levelspread(int *levelspread, const int *levelcnt)
* rcu_node tree with but one rcu_node structure, this loop is a no-op.
*/
#define rcu_for_each_nonleaf_node_breadth_first(rsp, rnp) \
- for ((rnp) = &(rsp)->node[0]; \
- (rnp) < (rsp)->level[rcu_num_lvls - 1]; (rnp)++)
+ for ((rnp) = &(rsp)->node[0]; !rcu_is_leaf_node(rsp, rnp); (rnp)++)
/*
* Scan the leaves of the rcu_node hierarchy for the specified rcu_state
@@ -294,7 +299,7 @@ static inline void rcu_init_levelspread(int *levelspread, const int *levelcnt)
* It is still a leaf node, even if it is also the root node.
*/
#define rcu_for_each_leaf_node(rsp, rnp) \
- for ((rnp) = (rsp)->level[rcu_num_lvls - 1]; \
+ for ((rnp) = rcu_first_leaf_node(rsp); \
(rnp) < &(rsp)->node[rcu_num_nodes]; (rnp)++)
/*
@@ -486,6 +491,7 @@ void rcu_force_quiescent_state(void);
void rcu_bh_force_quiescent_state(void);
void rcu_sched_force_quiescent_state(void);
extern struct workqueue_struct *rcu_gp_wq;
+extern struct workqueue_struct *rcu_par_gp_wq;
#endif /* #else #ifdef CONFIG_TINY_RCU */
#ifdef CONFIG_RCU_NOCB_CPU
diff --git a/kernel/rcu/rcu_segcblist.c b/kernel/rcu/rcu_segcblist.c
index 88cba7c2956c..5aff271adf1e 100644
--- a/kernel/rcu/rcu_segcblist.c
+++ b/kernel/rcu/rcu_segcblist.c
@@ -404,24 +404,6 @@ bool rcu_segcblist_accelerate(struct rcu_segcblist *rsclp, unsigned long seq)
}
/*
- * Scan the specified rcu_segcblist structure for callbacks that need
- * a grace period later than the one specified by "seq". We don't look
- * at the RCU_DONE_TAIL or RCU_NEXT_TAIL segments because they don't
- * have a grace-period sequence number.
- */
-bool rcu_segcblist_future_gp_needed(struct rcu_segcblist *rsclp,
- unsigned long seq)
-{
- int i;
-
- for (i = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++)
- if (rsclp->tails[i - 1] != rsclp->tails[i] &&
- ULONG_CMP_LT(seq, rsclp->gp_seq[i]))
- return true;
- return false;
-}
-
-/*
* Merge the source rcu_segcblist structure into the destination
* rcu_segcblist structure, then initialize the source. Any pending
* callbacks from the source get to start over. It is best to
diff --git a/kernel/rcu/rcu_segcblist.h b/kernel/rcu/rcu_segcblist.h
index 581c12b63544..948470cef385 100644
--- a/kernel/rcu/rcu_segcblist.h
+++ b/kernel/rcu/rcu_segcblist.h
@@ -134,7 +134,5 @@ void rcu_segcblist_insert_pend_cbs(struct rcu_segcblist *rsclp,
struct rcu_cblist *rclp);
void rcu_segcblist_advance(struct rcu_segcblist *rsclp, unsigned long seq);
bool rcu_segcblist_accelerate(struct rcu_segcblist *rsclp, unsigned long seq);
-bool rcu_segcblist_future_gp_needed(struct rcu_segcblist *rsclp,
- unsigned long seq);
void rcu_segcblist_merge(struct rcu_segcblist *dst_rsclp,
struct rcu_segcblist *src_rsclp);
diff --git a/kernel/rcu/rcuperf.c b/kernel/rcu/rcuperf.c
index 777e7a6a0292..e232846516b3 100644
--- a/kernel/rcu/rcuperf.c
+++ b/kernel/rcu/rcuperf.c
@@ -369,7 +369,7 @@ static bool __maybe_unused torturing_tasks(void)
*/
static void rcu_perf_wait_shutdown(void)
{
- cond_resched_rcu_qs();
+ cond_resched_tasks_rcu_qs();
if (atomic_read(&n_rcu_perf_writer_finished) < nrealwriters)
return;
while (!torture_must_stop())
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index 680c96d8c00f..e628fcfd1bde 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -593,7 +593,12 @@ static void srcu_torture_init(void)
static void srcu_torture_cleanup(void)
{
- cleanup_srcu_struct(&srcu_ctld);
+ static DEFINE_TORTURE_RANDOM(rand);
+
+ if (torture_random(&rand) & 0x800)
+ cleanup_srcu_struct(&srcu_ctld);
+ else
+ cleanup_srcu_struct_quiesced(&srcu_ctld);
srcu_ctlp = &srcu_ctl; /* In case of a later rcutorture run. */
}
@@ -1609,6 +1614,9 @@ static enum cpuhp_state rcutor_hp;
static void
rcu_torture_cleanup(void)
{
+ int flags = 0;
+ unsigned long gpnum = 0;
+ unsigned long completed = 0;
int i;
rcutorture_record_test_transition();
@@ -1639,6 +1647,11 @@ rcu_torture_cleanup(void)
fakewriter_tasks = NULL;
}
+ rcutorture_get_gp_data(cur_ops->ttype, &flags, &gpnum, &completed);
+ srcutorture_get_gp_data(cur_ops->ttype, srcu_ctlp,
+ &flags, &gpnum, &completed);
+ pr_alert("%s: End-test grace-period state: g%lu c%lu f%#x\n",
+ cur_ops->name, gpnum, completed, flags);
torture_stop_kthread(rcu_torture_stats, stats_task);
torture_stop_kthread(rcu_torture_fqs, fqs_task);
for (i = 0; i < ncbflooders; i++)
diff --git a/kernel/rcu/srcutiny.c b/kernel/rcu/srcutiny.c
index 76ac5f50b2c7..622792abe41a 100644
--- a/kernel/rcu/srcutiny.c
+++ b/kernel/rcu/srcutiny.c
@@ -86,16 +86,19 @@ EXPORT_SYMBOL_GPL(init_srcu_struct);
* Must invoke this after you are finished using a given srcu_struct that
* was initialized via init_srcu_struct(), else you leak memory.
*/
-void cleanup_srcu_struct(struct srcu_struct *sp)
+void _cleanup_srcu_struct(struct srcu_struct *sp, bool quiesced)
{
WARN_ON(sp->srcu_lock_nesting[0] || sp->srcu_lock_nesting[1]);
- flush_work(&sp->srcu_work);
+ if (quiesced)
+ WARN_ON(work_pending(&sp->srcu_work));
+ else
+ flush_work(&sp->srcu_work);
WARN_ON(sp->srcu_gp_running);
WARN_ON(sp->srcu_gp_waiting);
WARN_ON(sp->srcu_cb_head);
WARN_ON(&sp->srcu_cb_head != sp->srcu_cb_tail);
}
-EXPORT_SYMBOL_GPL(cleanup_srcu_struct);
+EXPORT_SYMBOL_GPL(_cleanup_srcu_struct);
/*
* Removes the count for the old reader from the appropriate element of
diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
index fb560fca9ef4..b4123d7a2cec 100644
--- a/kernel/rcu/srcutree.c
+++ b/kernel/rcu/srcutree.c
@@ -366,24 +366,28 @@ static unsigned long srcu_get_delay(struct srcu_struct *sp)
return SRCU_INTERVAL;
}
-/**
- * cleanup_srcu_struct - deconstruct a sleep-RCU structure
- * @sp: structure to clean up.
- *
- * Must invoke this after you are finished using a given srcu_struct that
- * was initialized via init_srcu_struct(), else you leak memory.
- */
-void cleanup_srcu_struct(struct srcu_struct *sp)
+/* Helper for cleanup_srcu_struct() and cleanup_srcu_struct_quiesced(). */
+void _cleanup_srcu_struct(struct srcu_struct *sp, bool quiesced)
{
int cpu;
if (WARN_ON(!srcu_get_delay(sp)))
- return; /* Leakage unless caller handles error. */
+ return; /* Just leak it! */
if (WARN_ON(srcu_readers_active(sp)))
- return; /* Leakage unless caller handles error. */
- flush_delayed_work(&sp->work);
+ return; /* Just leak it! */
+ if (quiesced) {
+ if (WARN_ON(delayed_work_pending(&sp->work)))
+ return; /* Just leak it! */
+ } else {
+ flush_delayed_work(&sp->work);
+ }
for_each_possible_cpu(cpu)
- flush_delayed_work(&per_cpu_ptr(sp->sda, cpu)->work);
+ if (quiesced) {
+ if (WARN_ON(delayed_work_pending(&per_cpu_ptr(sp->sda, cpu)->work)))
+ return; /* Just leak it! */
+ } else {
+ flush_delayed_work(&per_cpu_ptr(sp->sda, cpu)->work);
+ }
if (WARN_ON(rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)) != SRCU_STATE_IDLE) ||
WARN_ON(srcu_readers_active(sp))) {
pr_info("%s: Active srcu_struct %p state: %d\n", __func__, sp, rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)));
@@ -392,7 +396,7 @@ void cleanup_srcu_struct(struct srcu_struct *sp)
free_percpu(sp->sda);
sp->sda = NULL;
}
-EXPORT_SYMBOL_GPL(cleanup_srcu_struct);
+EXPORT_SYMBOL_GPL(_cleanup_srcu_struct);
/*
* Counts the new reader in the appropriate per-CPU element of the
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 2a734692a581..aa7cade1b9f3 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -524,8 +524,6 @@ module_param(rcu_kick_kthreads, bool, 0644);
static ulong jiffies_till_sched_qs = HZ / 10;
module_param(jiffies_till_sched_qs, ulong, 0444);
-static bool rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp,
- struct rcu_data *rdp);
static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *rsp));
static void force_quiescent_state(struct rcu_state *rsp);
static int rcu_pending(void);
@@ -711,44 +709,6 @@ static struct rcu_node *rcu_get_root(struct rcu_state *rsp)
}
/*
- * Is there any need for future grace periods?
- * Interrupts must be disabled. If the caller does not hold the root
- * rnp_node structure's ->lock, the results are advisory only.
- */
-static int rcu_future_needs_gp(struct rcu_state *rsp)
-{
- struct rcu_node *rnp = rcu_get_root(rsp);
- int idx = (READ_ONCE(rnp->completed) + 1) & 0x1;
- int *fp = &rnp->need_future_gp[idx];
-
- lockdep_assert_irqs_disabled();
- return READ_ONCE(*fp);
-}
-
-/*
- * Does the current CPU require a not-yet-started grace period?
- * The caller must have disabled interrupts to prevent races with
- * normal callback registry.
- */
-static bool
-cpu_needs_another_gp(struct rcu_state *rsp, struct rcu_data *rdp)
-{
- lockdep_assert_irqs_disabled();
- if (rcu_gp_in_progress(rsp))
- return false; /* No, a grace period is already in progress. */
- if (rcu_future_needs_gp(rsp))
- return true; /* Yes, a no-CBs CPU needs one. */
- if (!rcu_segcblist_is_enabled(&rdp->cblist))
- return false; /* No, this is a no-CBs (or offline) CPU. */
- if (!rcu_segcblist_restempty(&rdp->cblist, RCU_NEXT_READY_TAIL))
- return true; /* Yes, CPU has newly registered callbacks. */
- if (rcu_segcblist_future_gp_needed(&rdp->cblist,
- READ_ONCE(rsp->completed)))
- return true; /* Yes, CBs for future grace period. */
- return false; /* No grace period needed. */
-}
-
-/*
* Enter an RCU extended quiescent state, which can be either the
* idle loop or adaptive-tickless usermode execution.
*
@@ -1234,10 +1194,10 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
}
/*
- * Has this CPU encountered a cond_resched_rcu_qs() since the
- * beginning of the grace period? For this to be the case,
- * the CPU has to have noticed the current grace period. This
- * might not be the case for nohz_full CPUs looping in the kernel.
+ * Has this CPU encountered a cond_resched() since the beginning
+ * of the grace period? For this to be the case, the CPU has to
+ * have noticed the current grace period. This might not be the
+ * case for nohz_full CPUs looping in the kernel.
*/
jtsq = jiffies_till_sched_qs;
ruqp = per_cpu_ptr(&rcu_dynticks.rcu_urgent_qs, rdp->cpu);
@@ -1642,18 +1602,30 @@ static unsigned long rcu_cbs_completed(struct rcu_state *rsp,
return rnp->completed + 1;
/*
+ * If the current rcu_node structure believes that RCU is
+ * idle, and if the rcu_state structure does not yet reflect
+ * the start of a new grace period, then the next grace period
+ * will suffice. The memory barrier is needed to accurately
+ * sample the rsp->gpnum, and pairs with the second lock
+ * acquisition in rcu_gp_init(), which is augmented with
+ * smp_mb__after_unlock_lock() for this purpose.
+ */
+ if (rnp->gpnum == rnp->completed) {
+ smp_mb(); /* See above block comment. */
+ if (READ_ONCE(rsp->gpnum) == rnp->completed)
+ return rnp->completed + 1;
+ }
+
+ /*
* Otherwise, wait for a possible partial grace period and
* then the subsequent full grace period.
*/
return rnp->completed + 2;
}
-/*
- * Trace-event helper function for rcu_start_future_gp() and
- * rcu_nocb_wait_gp().
- */
-static void trace_rcu_future_gp(struct rcu_node *rnp, struct rcu_data *rdp,
- unsigned long c, const char *s)
+/* Trace-event wrapper function for trace_rcu_future_grace_period. */
+static void trace_rcu_this_gp(struct rcu_node *rnp, struct rcu_data *rdp,
+ unsigned long c, const char *s)
{
trace_rcu_future_grace_period(rdp->rsp->name, rnp->gpnum,
rnp->completed, c, rnp->level,
@@ -1661,96 +1633,67 @@ static void trace_rcu_future_gp(struct rcu_node *rnp, struct rcu_data *rdp,
}
/*
- * Start some future grace period, as needed to handle newly arrived
+ * Start the specified grace period, as needed to handle newly arrived
* callbacks. The required future grace periods are recorded in each
- * rcu_node structure's ->need_future_gp field. Returns true if there
+ * rcu_node structure's ->need_future_gp[] field. Returns true if there
* is reason to awaken the grace-period kthread.
*
- * The caller must hold the specified rcu_node structure's ->lock.
+ * The caller must hold the specified rcu_node structure's ->lock, which
+ * is why the caller is responsible for waking the grace-period kthread.
*/
-static bool __maybe_unused
-rcu_start_future_gp(struct rcu_node *rnp, struct rcu_data *rdp,
- unsigned long *c_out)
+static bool rcu_start_this_gp(struct rcu_node *rnp, struct rcu_data *rdp,
+ unsigned long c)
{
- unsigned long c;
bool ret = false;
- struct rcu_node *rnp_root = rcu_get_root(rdp->rsp);
-
- raw_lockdep_assert_held_rcu_node(rnp);
-
- /*
- * Pick up grace-period number for new callbacks. If this
- * grace period is already marked as needed, return to the caller.
- */
- c = rcu_cbs_completed(rdp->rsp, rnp);
- trace_rcu_future_gp(rnp, rdp, c, TPS("Startleaf"));
- if (rnp->need_future_gp[c & 0x1]) {
- trace_rcu_future_gp(rnp, rdp, c, TPS("Prestartleaf"));
- goto out;
- }
+ struct rcu_state *rsp = rdp->rsp;
+ struct rcu_node *rnp_root;
/*
- * If either this rcu_node structure or the root rcu_node structure
- * believe that a grace period is in progress, then we must wait
- * for the one following, which is in "c". Because our request
- * will be noticed at the end of the current grace period, we don't
- * need to explicitly start one. We only do the lockless check
- * of rnp_root's fields if the current rcu_node structure thinks
- * there is no grace period in flight, and because we hold rnp->lock,
- * the only possible change is when rnp_root's two fields are
- * equal, in which case rnp_root->gpnum might be concurrently
- * incremented. But that is OK, as it will just result in our
- * doing some extra useless work.
+ * Use funnel locking to either acquire the root rcu_node
+ * structure's lock or bail out if the need for this grace period
+ * has already been recorded -- or has already started. If there
+ * is already a grace period in progress in a non-leaf node, no
+ * recording is needed because the end of the grace period will
+ * scan the leaf rcu_node structures. Note that rnp->lock must
+ * not be released.
*/
- if (rnp->gpnum != rnp->completed ||
- READ_ONCE(rnp_root->gpnum) != READ_ONCE(rnp_root->completed)) {
- rnp->need_future_gp[c & 0x1]++;
- trace_rcu_future_gp(rnp, rdp, c, TPS("Startedleaf"));
- goto out;
+ raw_lockdep_assert_held_rcu_node(rnp);
+ trace_rcu_this_gp(rnp, rdp, c, TPS("Startleaf"));
+ for (rnp_root = rnp; 1; rnp_root = rnp_root->parent) {
+ if (rnp_root != rnp)
+ raw_spin_lock_rcu_node(rnp_root);
+ WARN_ON_ONCE(ULONG_CMP_LT(rnp_root->gpnum +
+ need_future_gp_mask(), c));
+ if (need_future_gp_element(rnp_root, c) ||
+ ULONG_CMP_GE(rnp_root->gpnum, c) ||
+ (rnp != rnp_root &&
+ rnp_root->gpnum != rnp_root->completed)) {
+ trace_rcu_this_gp(rnp_root, rdp, c, TPS("Prestarted"));
+ goto unlock_out;
+ }
+ need_future_gp_element(rnp_root, c) = true;
+ if (rnp_root != rnp && rnp_root->parent != NULL)
+ raw_spin_unlock_rcu_node(rnp_root);
+ if (!rnp_root->parent)
+ break; /* At root, and perhaps also leaf. */
}
- /*
- * There might be no grace period in progress. If we don't already
- * hold it, acquire the root rcu_node structure's lock in order to
- * start one (if needed).
- */
- if (rnp != rnp_root)
- raw_spin_lock_rcu_node(rnp_root);
-
- /*
- * Get a new grace-period number. If there really is no grace
- * period in progress, it will be smaller than the one we obtained
- * earlier. Adjust callbacks as needed.
- */
- c = rcu_cbs_completed(rdp->rsp, rnp_root);
- if (!rcu_is_nocb_cpu(rdp->cpu))
- (void)rcu_segcblist_accelerate(&rdp->cblist, c);
-
- /*
- * If the needed for the required grace period is already
- * recorded, trace and leave.
- */
- if (rnp_root->need_future_gp[c & 0x1]) {
- trace_rcu_future_gp(rnp, rdp, c, TPS("Prestartedroot"));
+ /* If GP already in progress, just leave, otherwise start one. */
+ if (rnp_root->gpnum != rnp_root->completed) {
+ trace_rcu_this_gp(rnp_root, rdp, c, TPS("Startedleafroot"));
goto unlock_out;
}
-
- /* Record the need for the future grace period. */
- rnp_root->need_future_gp[c & 0x1]++;
-
- /* If a grace period is not already in progress, start one. */
- if (rnp_root->gpnum != rnp_root->completed) {
- trace_rcu_future_gp(rnp, rdp, c, TPS("Startedleafroot"));
- } else {
- trace_rcu_future_gp(rnp, rdp, c, TPS("Startedroot"));
- ret = rcu_start_gp_advanced(rdp->rsp, rnp_root, rdp);
+ trace_rcu_this_gp(rnp_root, rdp, c, TPS("Startedroot"));
+ WRITE_ONCE(rsp->gp_flags, rsp->gp_flags | RCU_GP_FLAG_INIT);
+ if (!rsp->gp_kthread) {
+ trace_rcu_this_gp(rnp_root, rdp, c, TPS("NoGPkthread"));
+ goto unlock_out;
}
+ trace_rcu_grace_period(rsp->name, READ_ONCE(rsp->gpnum), TPS("newreq"));
+ ret = true; /* Caller must wake GP kthread. */
unlock_out:
if (rnp != rnp_root)
raw_spin_unlock_rcu_node(rnp_root);
-out:
- if (c_out != NULL)
- *c_out = c;
return ret;
}
@@ -1758,16 +1701,16 @@ out:
* Clean up any old requests for the just-ended grace period. Also return
* whether any additional grace periods have been requested.
*/
-static int rcu_future_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp)
+static bool rcu_future_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp)
{
- int c = rnp->completed;
- int needmore;
+ unsigned long c = rnp->completed;
+ bool needmore;
struct rcu_data *rdp = this_cpu_ptr(rsp->rda);
- rnp->need_future_gp[c & 0x1] = 0;
- needmore = rnp->need_future_gp[(c + 1) & 0x1];
- trace_rcu_future_gp(rnp, rdp, c,
- needmore ? TPS("CleanupMore") : TPS("Cleanup"));
+ need_future_gp_element(rnp, c) = false;
+ needmore = need_any_future_gp(rnp);
+ trace_rcu_this_gp(rnp, rdp, c,
+ needmore ? TPS("CleanupMore") : TPS("Cleanup"));
return needmore;
}
@@ -1802,6 +1745,7 @@ static void rcu_gp_kthread_wake(struct rcu_state *rsp)
static bool rcu_accelerate_cbs(struct rcu_state *rsp, struct rcu_node *rnp,
struct rcu_data *rdp)
{
+ unsigned long c;
bool ret = false;
raw_lockdep_assert_held_rcu_node(rnp);
@@ -1820,8 +1764,9 @@ static bool rcu_accelerate_cbs(struct rcu_state *rsp, struct rcu_node *rnp,
* accelerating callback invocation to an earlier grace-period
* number.
*/
- if (rcu_segcblist_accelerate(&rdp->cblist, rcu_cbs_completed(rsp, rnp)))
- ret = rcu_start_future_gp(rnp, rdp, NULL);
+ c = rcu_cbs_completed(rsp, rnp);
+ if (rcu_segcblist_accelerate(&rdp->cblist, c))
+ ret = rcu_start_this_gp(rnp, rdp, c);
/* Trace depending on how much we were able to accelerate. */
if (rcu_segcblist_restempty(&rdp->cblist, RCU_WAIT_TAIL))
@@ -2049,7 +1994,7 @@ static bool rcu_gp_init(struct rcu_state *rsp)
rnp->level, rnp->grplo,
rnp->grphi, rnp->qsmask);
raw_spin_unlock_irq_rcu_node(rnp);
- cond_resched_rcu_qs();
+ cond_resched_tasks_rcu_qs();
WRITE_ONCE(rsp->gp_activity, jiffies);
}
@@ -2108,7 +2053,6 @@ static void rcu_gp_cleanup(struct rcu_state *rsp)
{
unsigned long gp_duration;
bool needgp = false;
- int nocb = 0;
struct rcu_data *rdp;
struct rcu_node *rnp = rcu_get_root(rsp);
struct swait_queue_head *sq;
@@ -2147,31 +2091,35 @@ static void rcu_gp_cleanup(struct rcu_state *rsp)
if (rnp == rdp->mynode)
needgp = __note_gp_changes(rsp, rnp, rdp) || needgp;
/* smp_mb() provided by prior unlock-lock pair. */
- nocb += rcu_future_gp_cleanup(rsp, rnp);
+ needgp = rcu_future_gp_cleanup(rsp, rnp) || needgp;
sq = rcu_nocb_gp_get(rnp);
raw_spin_unlock_irq_rcu_node(rnp);
rcu_nocb_gp_cleanup(sq);
- cond_resched_rcu_qs();
+ cond_resched_tasks_rcu_qs();
WRITE_ONCE(rsp->gp_activity, jiffies);
rcu_gp_slow(rsp, gp_cleanup_delay);
}
rnp = rcu_get_root(rsp);
raw_spin_lock_irq_rcu_node(rnp); /* Order GP before ->completed update. */
- rcu_nocb_gp_set(rnp, nocb);
/* Declare grace period done. */
WRITE_ONCE(rsp->completed, rsp->gpnum);
trace_rcu_grace_period(rsp->name, rsp->completed, TPS("end"));
rsp->gp_state = RCU_GP_IDLE;
+ /* Check for GP requests since above loop. */
rdp = this_cpu_ptr(rsp->rda);
+ if (need_any_future_gp(rnp)) {
+ trace_rcu_this_gp(rnp, rdp, rsp->completed - 1,
+ TPS("CleanupMore"));
+ needgp = true;
+ }
/* Advance CBs to reduce false positives below. */
- needgp = rcu_advance_cbs(rsp, rnp, rdp) || needgp;
- if (needgp || cpu_needs_another_gp(rsp, rdp)) {
+ if (!rcu_accelerate_cbs(rsp, rnp, rdp) && needgp) {
WRITE_ONCE(rsp->gp_flags, RCU_GP_FLAG_INIT);
- trace_rcu_grace_period(rsp->name,
- READ_ONCE(rsp->gpnum),
+ trace_rcu_grace_period(rsp->name, READ_ONCE(rsp->gpnum),
TPS("newreq"));
}
+ WRITE_ONCE(rsp->gp_flags, rsp->gp_flags & RCU_GP_FLAG_INIT);
raw_spin_unlock_irq_rcu_node(rnp);
}
@@ -2202,7 +2150,7 @@ static int __noreturn rcu_gp_kthread(void *arg)
/* Locking provides needed memory barrier. */
if (rcu_gp_init(rsp))
break;
- cond_resched_rcu_qs();
+ cond_resched_tasks_rcu_qs();
WRITE_ONCE(rsp->gp_activity, jiffies);
WARN_ON(signal_pending(current));
trace_rcu_grace_period(rsp->name,
@@ -2247,7 +2195,7 @@ static int __noreturn rcu_gp_kthread(void *arg)
trace_rcu_grace_period(rsp->name,
READ_ONCE(rsp->gpnum),
TPS("fqsend"));
- cond_resched_rcu_qs();
+ cond_resched_tasks_rcu_qs();
WRITE_ONCE(rsp->gp_activity, jiffies);
ret = 0; /* Force full wait till next FQS. */
j = jiffies_till_next_fqs;
@@ -2260,7 +2208,7 @@ static int __noreturn rcu_gp_kthread(void *arg)
}
} else {
/* Deal with stray signal. */
- cond_resched_rcu_qs();
+ cond_resched_tasks_rcu_qs();
WRITE_ONCE(rsp->gp_activity, jiffies);
WARN_ON(signal_pending(current));
trace_rcu_grace_period(rsp->name,
@@ -2283,71 +2231,6 @@ static int __noreturn rcu_gp_kthread(void *arg)
}
/*
- * Start a new RCU grace period if warranted, re-initializing the hierarchy
- * in preparation for detecting the next grace period. The caller must hold
- * the root node's ->lock and hard irqs must be disabled.
- *
- * Note that it is legal for a dying CPU (which is marked as offline) to
- * invoke this function. This can happen when the dying CPU reports its
- * quiescent state.
- *
- * Returns true if the grace-period kthread must be awakened.
- */
-static bool
-rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp,
- struct rcu_data *rdp)
-{
- raw_lockdep_assert_held_rcu_node(rnp);
- if (!rsp->gp_kthread || !cpu_needs_another_gp(rsp, rdp)) {
- /*
- * Either we have not yet spawned the grace-period
- * task, this CPU does not need another grace period,
- * or a grace period is already in progress.
- * Either way, don't start a new grace period.
- */
- return false;
- }
- WRITE_ONCE(rsp->gp_flags, RCU_GP_FLAG_INIT);
- trace_rcu_grace_period(rsp->name, READ_ONCE(rsp->gpnum),
- TPS("newreq"));
-
- /*
- * We can't do wakeups while holding the rnp->lock, as that
- * could cause possible deadlocks with the rq->lock. Defer
- * the wakeup to our caller.
- */
- return true;
-}
-
-/*
- * Similar to rcu_start_gp_advanced(), but also advance the calling CPU's
- * callbacks. Note that rcu_start_gp_advanced() cannot do this because it
- * is invoked indirectly from rcu_advance_cbs(), which would result in
- * endless recursion -- or would do so if it wasn't for the self-deadlock
- * that is encountered beforehand.
- *
- * Returns true if the grace-period kthread needs to be awakened.
- */
-static bool rcu_start_gp(struct rcu_state *rsp)
-{
- struct rcu_data *rdp = this_cpu_ptr(rsp->rda);
- struct rcu_node *rnp = rcu_get_root(rsp);
- bool ret = false;
-
- /*
- * If there is no grace period in progress right now, any
- * callbacks we have up to this point will be satisfied by the
- * next grace period. Also, advancing the callbacks reduces the
- * probability of false positives from cpu_needs_another_gp()
- * resulting in pointless grace periods. So, advance callbacks
- * then start the grace period!
- */
- ret = rcu_advance_cbs(rsp, rnp, rdp) || ret;
- ret = rcu_start_gp_advanced(rsp, rnp, rdp) || ret;
- return ret;
-}
-
-/*
* Report a full set of quiescent states to the specified rcu_state data
* structure. Invoke rcu_gp_kthread_wake() to awaken the grace-period
* kthread if another grace period is required. Whether we wake
@@ -2398,7 +2281,7 @@ rcu_report_qs_rnp(unsigned long mask, struct rcu_state *rsp,
return;
}
WARN_ON_ONCE(oldmask); /* Any child must be all zeroed! */
- WARN_ON_ONCE(rnp->level != rcu_num_lvls - 1 &&
+ WARN_ON_ONCE(!rcu_is_leaf_node(rnp) &&
rcu_preempt_blocked_readers_cgp(rnp));
rnp->qsmask &= ~mask;
trace_rcu_quiescent_state_report(rsp->name, rnp->gpnum,
@@ -2782,7 +2665,7 @@ static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *rsp))
struct rcu_node *rnp;
rcu_for_each_leaf_node(rsp, rnp) {
- cond_resched_rcu_qs();
+ cond_resched_tasks_rcu_qs();
mask = 0;
raw_spin_lock_irqsave_rcu_node(rnp, flags);
if (rnp->qsmask == 0) {
@@ -2874,22 +2757,27 @@ __rcu_process_callbacks(struct rcu_state *rsp)
unsigned long flags;
bool needwake;
struct rcu_data *rdp = raw_cpu_ptr(rsp->rda);
+ struct rcu_node *rnp;
WARN_ON_ONCE(!rdp->beenonline);
/* Update RCU state based on any recent quiescent states. */
rcu_check_quiescent_state(rsp, rdp);
- /* Does this CPU require a not-yet-started grace period? */
- local_irq_save(flags);
- if (cpu_needs_another_gp(rsp, rdp)) {
- raw_spin_lock_rcu_node(rcu_get_root(rsp)); /* irqs disabled. */
- needwake = rcu_start_gp(rsp);
- raw_spin_unlock_irqrestore_rcu_node(rcu_get_root(rsp), flags);
- if (needwake)
- rcu_gp_kthread_wake(rsp);
- } else {
- local_irq_restore(flags);
+ /* No grace period and unregistered callbacks? */
+ if (!rcu_gp_in_progress(rsp) &&
+ rcu_segcblist_is_enabled(&rdp->cblist)) {
+ local_irq_save(flags);
+ if (rcu_segcblist_restempty(&rdp->cblist, RCU_NEXT_READY_TAIL)) {
+ local_irq_restore(flags);
+ } else {
+ rnp = rdp->mynode;
+ raw_spin_lock_rcu_node(rnp); /* irqs disabled. */
+ needwake = rcu_accelerate_cbs(rsp, rnp, rdp);
+ raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
+ if (needwake)
+ rcu_gp_kthread_wake(rsp);
+ }
}
/* If there are callbacks ready, invoke them. */
@@ -2973,11 +2861,11 @@ static void __call_rcu_core(struct rcu_state *rsp, struct rcu_data *rdp,
/* Start a new grace period if one not already started. */
if (!rcu_gp_in_progress(rsp)) {
- struct rcu_node *rnp_root = rcu_get_root(rsp);
+ struct rcu_node *rnp = rdp->mynode;
- raw_spin_lock_rcu_node(rnp_root);
- needwake = rcu_start_gp(rsp);
- raw_spin_unlock_rcu_node(rnp_root);
+ raw_spin_lock_rcu_node(rnp);
+ needwake = rcu_accelerate_cbs(rsp, rnp, rdp);
+ raw_spin_unlock_rcu_node(rnp);
if (needwake)
rcu_gp_kthread_wake(rsp);
} else {
@@ -3368,7 +3256,9 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp)
return 1;
/* Has RCU gone idle with this CPU needing another grace period? */
- if (cpu_needs_another_gp(rsp, rdp))
+ if (!rcu_gp_in_progress(rsp) &&
+ rcu_segcblist_is_enabled(&rdp->cblist) &&
+ !rcu_segcblist_restempty(&rdp->cblist, RCU_NEXT_READY_TAIL))
return 1;
/* Has another RCU grace period completed? */
@@ -3775,6 +3665,8 @@ int rcutree_dead_cpu(unsigned int cpu)
return 0;
}
+static DEFINE_PER_CPU(int, rcu_cpu_started);
+
/*
* Mark the specified CPU as being online so that subsequent grace periods
* (both expedited and normal) will wait on it. Note that this means that
@@ -3796,6 +3688,11 @@ void rcu_cpu_starting(unsigned int cpu)
struct rcu_node *rnp;
struct rcu_state *rsp;
+ if (per_cpu(rcu_cpu_started, cpu))
+ return;
+
+ per_cpu(rcu_cpu_started, cpu) = 1;
+
for_each_rcu_flavor(rsp) {
rdp = per_cpu_ptr(rsp->rda, cpu);
rnp = rdp->mynode;
@@ -3852,6 +3749,8 @@ void rcu_report_dead(unsigned int cpu)
preempt_enable();
for_each_rcu_flavor(rsp)
rcu_cleanup_dying_idle_cpu(cpu, rsp);
+
+ per_cpu(rcu_cpu_started, cpu) = 0;
}
/* Migrate the dead CPU's callbacks to the current CPU. */
@@ -3861,6 +3760,7 @@ static void rcu_migrate_callbacks(int cpu, struct rcu_state *rsp)
struct rcu_data *my_rdp;
struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
struct rcu_node *rnp_root = rcu_get_root(rdp->rsp);
+ bool needwake;
if (rcu_is_nocb_cpu(cpu) || rcu_segcblist_empty(&rdp->cblist))
return; /* No callbacks to migrate. */
@@ -3872,12 +3772,15 @@ static void rcu_migrate_callbacks(int cpu, struct rcu_state *rsp)
return;
}
raw_spin_lock_rcu_node(rnp_root); /* irqs already disabled. */
- rcu_advance_cbs(rsp, rnp_root, rdp); /* Leverage recent GPs. */
- rcu_advance_cbs(rsp, rnp_root, my_rdp); /* Assign GP to pending CBs. */
+ /* Leverage recent GPs and set GP for new callbacks. */
+ needwake = rcu_advance_cbs(rsp, rnp_root, rdp) ||
+ rcu_advance_cbs(rsp, rnp_root, my_rdp);
rcu_segcblist_merge(&my_rdp->cblist, &rdp->cblist);
WARN_ON_ONCE(rcu_segcblist_empty(&my_rdp->cblist) !=
!rcu_segcblist_n_cbs(&my_rdp->cblist));
raw_spin_unlock_irqrestore_rcu_node(rnp_root, flags);
+ if (needwake)
+ rcu_gp_kthread_wake(rsp);
WARN_ONCE(rcu_segcblist_n_cbs(&rdp->cblist) != 0 ||
!rcu_segcblist_empty(&rdp->cblist),
"rcu_cleanup_dead_cpu: Callbacks on offline CPU %d: qlen=%lu, 1stCB=%p\n",
@@ -4056,7 +3959,7 @@ static void __init rcu_init_one(struct rcu_state *rsp)
init_swait_queue_head(&rsp->gp_wq);
init_swait_queue_head(&rsp->expedited_wq);
- rnp = rsp->level[rcu_num_lvls - 1];
+ rnp = rcu_first_leaf_node(rsp);
for_each_possible_cpu(i) {
while (i > rnp->grphi)
rnp++;
@@ -4168,6 +4071,7 @@ static void __init rcu_dump_rcu_node_tree(struct rcu_state *rsp)
}
struct workqueue_struct *rcu_gp_wq;
+struct workqueue_struct *rcu_par_gp_wq;
void __init rcu_init(void)
{
@@ -4199,6 +4103,8 @@ void __init rcu_init(void)
/* Create workqueue for expedited GPs and for Tree SRCU. */
rcu_gp_wq = alloc_workqueue("rcu_gp", WQ_MEM_RECLAIM, 0);
WARN_ON(!rcu_gp_wq);
+ rcu_par_gp_wq = alloc_workqueue("rcu_par_gp", WQ_MEM_RECLAIM, 0);
+ WARN_ON(!rcu_par_gp_wq);
}
#include "tree_exp.h"
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index f491ab4f2e8e..78e051dffc5b 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -58,6 +58,14 @@ struct rcu_dynticks {
#endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */
};
+/* Communicate arguments to a workqueue handler. */
+struct rcu_exp_work {
+ smp_call_func_t rew_func;
+ struct rcu_state *rew_rsp;
+ unsigned long rew_s;
+ struct work_struct rew_work;
+};
+
/* RCU's kthread states for tracing. */
#define RCU_KTHREAD_STOPPED 0
#define RCU_KTHREAD_RUNNING 1
@@ -150,15 +158,32 @@ struct rcu_node {
struct swait_queue_head nocb_gp_wq[2];
/* Place for rcu_nocb_kthread() to wait GP. */
#endif /* #ifdef CONFIG_RCU_NOCB_CPU */
- int need_future_gp[2];
- /* Counts of upcoming no-CB GP requests. */
+ u8 need_future_gp[4]; /* Counts of upcoming GP requests. */
raw_spinlock_t fqslock ____cacheline_internodealigned_in_smp;
spinlock_t exp_lock ____cacheline_internodealigned_in_smp;
unsigned long exp_seq_rq;
wait_queue_head_t exp_wq[4];
+ struct rcu_exp_work rew;
+ bool exp_need_flush; /* Need to flush workitem? */
} ____cacheline_internodealigned_in_smp;
+/* Accessors for ->need_future_gp[] array. */
+#define need_future_gp_mask() \
+ (ARRAY_SIZE(((struct rcu_node *)NULL)->need_future_gp) - 1)
+#define need_future_gp_element(rnp, c) \
+ ((rnp)->need_future_gp[(c) & need_future_gp_mask()])
+#define need_any_future_gp(rnp) \
+({ \
+ int __i; \
+ bool __nonzero = false; \
+ \
+ for (__i = 0; __i < ARRAY_SIZE((rnp)->need_future_gp); __i++) \
+ __nonzero = __nonzero || \
+ READ_ONCE((rnp)->need_future_gp[__i]); \
+ __nonzero; \
+})
+
/*
* Bitmasks in an rcu_node cover the interval [grplo, grphi] of CPU IDs, and
* are indexed relative to this interval rather than the global CPU ID space.
@@ -224,10 +249,6 @@ struct rcu_data {
#ifdef CONFIG_RCU_FAST_NO_HZ
struct rcu_head oom_head;
#endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */
- atomic_long_t exp_workdone0; /* # done by workqueue. */
- atomic_long_t exp_workdone1; /* # done by others #1. */
- atomic_long_t exp_workdone2; /* # done by others #2. */
- atomic_long_t exp_workdone3; /* # done by others #3. */
int exp_dynticks_snap; /* Double-check need for IPI. */
/* 6) Callback offloading. */
@@ -408,7 +429,6 @@ extern struct rcu_state rcu_preempt_state;
#endif /* #ifdef CONFIG_PREEMPT_RCU */
int rcu_dynticks_snap(struct rcu_dynticks *rdtp);
-bool rcu_eqs_special_set(int cpu);
#ifdef CONFIG_RCU_BOOST
DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_status);
@@ -438,7 +458,6 @@ static void rcu_preempt_boost_start_gp(struct rcu_node *rnp);
static void invoke_rcu_callbacks_kthread(void);
static bool rcu_is_callbacks_kthread(void);
#ifdef CONFIG_RCU_BOOST
-static void rcu_preempt_do_callbacks(void);
static int rcu_spawn_one_boost_kthread(struct rcu_state *rsp,
struct rcu_node *rnp);
#endif /* #ifdef CONFIG_RCU_BOOST */
@@ -454,7 +473,6 @@ static void print_cpu_stall_info_end(void);
static void zero_cpu_stall_ticks(struct rcu_data *rdp);
static void increment_cpu_stall_ticks(void);
static bool rcu_nocb_cpu_needs_barrier(struct rcu_state *rsp, int cpu);
-static void rcu_nocb_gp_set(struct rcu_node *rnp, int nrq);
static struct swait_queue_head *rcu_nocb_gp_get(struct rcu_node *rnp);
static void rcu_nocb_gp_cleanup(struct swait_queue_head *sq);
static void rcu_init_one_nocb(struct rcu_node *rnp);
diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h
index f72eefab8543..d40708e8c5d6 100644
--- a/kernel/rcu/tree_exp.h
+++ b/kernel/rcu/tree_exp.h
@@ -20,6 +20,8 @@
* Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
*/
+#include <linux/lockdep.h>
+
/*
* Record the start of an expedited grace period.
*/
@@ -154,15 +156,35 @@ static void __maybe_unused sync_exp_reset_tree(struct rcu_state *rsp)
* for the current expedited grace period. Works only for preemptible
* RCU -- other RCU implementation use other means.
*
- * Caller must hold the rcu_state's exp_mutex.
+ * Caller must hold the specificed rcu_node structure's ->lock
*/
static bool sync_rcu_preempt_exp_done(struct rcu_node *rnp)
{
+ raw_lockdep_assert_held_rcu_node(rnp);
+
return rnp->exp_tasks == NULL &&
READ_ONCE(rnp->expmask) == 0;
}
/*
+ * Like sync_rcu_preempt_exp_done(), but this function assumes the caller
+ * doesn't hold the rcu_node's ->lock, and will acquire and release the lock
+ * itself
+ */
+static bool sync_rcu_preempt_exp_done_unlocked(struct rcu_node *rnp)
+{
+ unsigned long flags;
+ bool ret;
+
+ raw_spin_lock_irqsave_rcu_node(rnp, flags);
+ ret = sync_rcu_preempt_exp_done(rnp);
+ raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
+
+ return ret;
+}
+
+
+/*
* Report the exit from RCU read-side critical section for the last task
* that queued itself during or before the current expedited preemptible-RCU
* grace period. This event is reported either to the rcu_node structure on
@@ -170,8 +192,7 @@ static bool sync_rcu_preempt_exp_done(struct rcu_node *rnp)
* recursively up the tree. (Calm down, calm down, we do the recursion
* iteratively!)
*
- * Caller must hold the rcu_state's exp_mutex and the specified rcu_node
- * structure's ->lock.
+ * Caller must hold the specified rcu_node structure's ->lock.
*/
static void __rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp,
bool wake, unsigned long flags)
@@ -207,8 +228,6 @@ static void __rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp,
/*
* Report expedited quiescent state for specified node. This is a
* lock-acquisition wrapper function for __rcu_report_exp_rnp().
- *
- * Caller must hold the rcu_state's exp_mutex.
*/
static void __maybe_unused rcu_report_exp_rnp(struct rcu_state *rsp,
struct rcu_node *rnp, bool wake)
@@ -221,8 +240,7 @@ static void __maybe_unused rcu_report_exp_rnp(struct rcu_state *rsp,
/*
* Report expedited quiescent state for multiple CPUs, all covered by the
- * specified leaf rcu_node structure. Caller must hold the rcu_state's
- * exp_mutex.
+ * specified leaf rcu_node structure.
*/
static void rcu_report_exp_cpu_mult(struct rcu_state *rsp, struct rcu_node *rnp,
unsigned long mask, bool wake)
@@ -248,14 +266,12 @@ static void rcu_report_exp_rdp(struct rcu_state *rsp, struct rcu_data *rdp,
}
/* Common code for synchronize_{rcu,sched}_expedited() work-done checking. */
-static bool sync_exp_work_done(struct rcu_state *rsp, atomic_long_t *stat,
- unsigned long s)
+static bool sync_exp_work_done(struct rcu_state *rsp, unsigned long s)
{
if (rcu_exp_gp_seq_done(rsp, s)) {
trace_rcu_exp_grace_period(rsp->name, s, TPS("done"));
/* Ensure test happens before caller kfree(). */
smp_mb__before_atomic(); /* ^^^ */
- atomic_long_inc(stat);
return true;
}
return false;
@@ -289,7 +305,7 @@ static bool exp_funnel_lock(struct rcu_state *rsp, unsigned long s)
* promoting locality and is not strictly needed for correctness.
*/
for (; rnp != NULL; rnp = rnp->parent) {
- if (sync_exp_work_done(rsp, &rdp->exp_workdone1, s))
+ if (sync_exp_work_done(rsp, s))
return true;
/* Work not done, either wait here or go up. */
@@ -302,8 +318,7 @@ static bool exp_funnel_lock(struct rcu_state *rsp, unsigned long s)
rnp->grplo, rnp->grphi,
TPS("wait"));
wait_event(rnp->exp_wq[rcu_seq_ctr(s) & 0x3],
- sync_exp_work_done(rsp,
- &rdp->exp_workdone2, s));
+ sync_exp_work_done(rsp, s));
return true;
}
rnp->exp_seq_rq = s; /* Followers can wait on us. */
@@ -313,7 +328,7 @@ static bool exp_funnel_lock(struct rcu_state *rsp, unsigned long s)
}
mutex_lock(&rsp->exp_mutex);
fastpath:
- if (sync_exp_work_done(rsp, &rdp->exp_workdone3, s)) {
+ if (sync_exp_work_done(rsp, s)) {
mutex_unlock(&rsp->exp_mutex);
return true;
}
@@ -362,93 +377,129 @@ static void sync_sched_exp_online_cleanup(int cpu)
}
/*
- * Select the nodes that the upcoming expedited grace period needs
- * to wait for.
+ * Select the CPUs within the specified rcu_node that the upcoming
+ * expedited grace period needs to wait for.
*/
-static void sync_rcu_exp_select_cpus(struct rcu_state *rsp,
- smp_call_func_t func)
+static void sync_rcu_exp_select_node_cpus(struct work_struct *wp)
{
int cpu;
unsigned long flags;
+ smp_call_func_t func;
unsigned long mask_ofl_test;
unsigned long mask_ofl_ipi;
int ret;
- struct rcu_node *rnp;
-
- trace_rcu_exp_grace_period(rsp->name, rcu_exp_gp_seq_endval(rsp), TPS("reset"));
- sync_exp_reset_tree(rsp);
- trace_rcu_exp_grace_period(rsp->name, rcu_exp_gp_seq_endval(rsp), TPS("select"));
- rcu_for_each_leaf_node(rsp, rnp) {
- raw_spin_lock_irqsave_rcu_node(rnp, flags);
+ struct rcu_exp_work *rewp =
+ container_of(wp, struct rcu_exp_work, rew_work);
+ struct rcu_node *rnp = container_of(rewp, struct rcu_node, rew);
+ struct rcu_state *rsp = rewp->rew_rsp;
- /* Each pass checks a CPU for identity, offline, and idle. */
- mask_ofl_test = 0;
- for_each_leaf_node_cpu_mask(rnp, cpu, rnp->expmask) {
- unsigned long mask = leaf_node_cpu_bit(rnp, cpu);
- struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
- struct rcu_dynticks *rdtp = per_cpu_ptr(&rcu_dynticks, cpu);
- int snap;
+ func = rewp->rew_func;
+ raw_spin_lock_irqsave_rcu_node(rnp, flags);
- if (raw_smp_processor_id() == cpu ||
- !(rnp->qsmaskinitnext & mask)) {
+ /* Each pass checks a CPU for identity, offline, and idle. */
+ mask_ofl_test = 0;
+ for_each_leaf_node_cpu_mask(rnp, cpu, rnp->expmask) {
+ unsigned long mask = leaf_node_cpu_bit(rnp, cpu);
+ struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
+ struct rcu_dynticks *rdtp = per_cpu_ptr(&rcu_dynticks, cpu);
+ int snap;
+
+ if (raw_smp_processor_id() == cpu ||
+ !(rnp->qsmaskinitnext & mask)) {
+ mask_ofl_test |= mask;
+ } else {
+ snap = rcu_dynticks_snap(rdtp);
+ if (rcu_dynticks_in_eqs(snap))
mask_ofl_test |= mask;
- } else {
- snap = rcu_dynticks_snap(rdtp);
- if (rcu_dynticks_in_eqs(snap))
- mask_ofl_test |= mask;
- else
- rdp->exp_dynticks_snap = snap;
- }
+ else
+ rdp->exp_dynticks_snap = snap;
}
- mask_ofl_ipi = rnp->expmask & ~mask_ofl_test;
-
- /*
- * Need to wait for any blocked tasks as well. Note that
- * additional blocking tasks will also block the expedited
- * GP until such time as the ->expmask bits are cleared.
- */
- if (rcu_preempt_has_tasks(rnp))
- rnp->exp_tasks = rnp->blkd_tasks.next;
- raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
+ }
+ mask_ofl_ipi = rnp->expmask & ~mask_ofl_test;
- /* IPI the remaining CPUs for expedited quiescent state. */
- for_each_leaf_node_cpu_mask(rnp, cpu, rnp->expmask) {
- unsigned long mask = leaf_node_cpu_bit(rnp, cpu);
- struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
+ /*
+ * Need to wait for any blocked tasks as well. Note that
+ * additional blocking tasks will also block the expedited GP
+ * until such time as the ->expmask bits are cleared.
+ */
+ if (rcu_preempt_has_tasks(rnp))
+ rnp->exp_tasks = rnp->blkd_tasks.next;
+ raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
- if (!(mask_ofl_ipi & mask))
- continue;
+ /* IPI the remaining CPUs for expedited quiescent state. */
+ for_each_leaf_node_cpu_mask(rnp, cpu, rnp->expmask) {
+ unsigned long mask = leaf_node_cpu_bit(rnp, cpu);
+ struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
+
+ if (!(mask_ofl_ipi & mask))
+ continue;
retry_ipi:
- if (rcu_dynticks_in_eqs_since(rdp->dynticks,
- rdp->exp_dynticks_snap)) {
- mask_ofl_test |= mask;
- continue;
- }
- ret = smp_call_function_single(cpu, func, rsp, 0);
- if (!ret) {
- mask_ofl_ipi &= ~mask;
- continue;
- }
- /* Failed, raced with CPU hotplug operation. */
- raw_spin_lock_irqsave_rcu_node(rnp, flags);
- if ((rnp->qsmaskinitnext & mask) &&
- (rnp->expmask & mask)) {
- /* Online, so delay for a bit and try again. */
- raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
- trace_rcu_exp_grace_period(rsp->name, rcu_exp_gp_seq_endval(rsp), TPS("selectofl"));
- schedule_timeout_uninterruptible(1);
- goto retry_ipi;
- }
- /* CPU really is offline, so we can ignore it. */
- if (!(rnp->expmask & mask))
- mask_ofl_ipi &= ~mask;
+ if (rcu_dynticks_in_eqs_since(rdp->dynticks,
+ rdp->exp_dynticks_snap)) {
+ mask_ofl_test |= mask;
+ continue;
+ }
+ ret = smp_call_function_single(cpu, func, rsp, 0);
+ if (!ret) {
+ mask_ofl_ipi &= ~mask;
+ continue;
+ }
+ /* Failed, raced with CPU hotplug operation. */
+ raw_spin_lock_irqsave_rcu_node(rnp, flags);
+ if ((rnp->qsmaskinitnext & mask) &&
+ (rnp->expmask & mask)) {
+ /* Online, so delay for a bit and try again. */
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
+ trace_rcu_exp_grace_period(rsp->name, rcu_exp_gp_seq_endval(rsp), TPS("selectofl"));
+ schedule_timeout_uninterruptible(1);
+ goto retry_ipi;
+ }
+ /* CPU really is offline, so we can ignore it. */
+ if (!(rnp->expmask & mask))
+ mask_ofl_ipi &= ~mask;
+ raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
+ }
+ /* Report quiescent states for those that went offline. */
+ mask_ofl_test |= mask_ofl_ipi;
+ if (mask_ofl_test)
+ rcu_report_exp_cpu_mult(rsp, rnp, mask_ofl_test, false);
+}
+
+/*
+ * Select the nodes that the upcoming expedited grace period needs
+ * to wait for.
+ */
+static void sync_rcu_exp_select_cpus(struct rcu_state *rsp,
+ smp_call_func_t func)
+{
+ struct rcu_node *rnp;
+
+ trace_rcu_exp_grace_period(rsp->name, rcu_exp_gp_seq_endval(rsp), TPS("reset"));
+ sync_exp_reset_tree(rsp);
+ trace_rcu_exp_grace_period(rsp->name, rcu_exp_gp_seq_endval(rsp), TPS("select"));
+
+ /* Schedule work for each leaf rcu_node structure. */
+ rcu_for_each_leaf_node(rsp, rnp) {
+ rnp->exp_need_flush = false;
+ if (!READ_ONCE(rnp->expmask))
+ continue; /* Avoid early boot non-existent wq. */
+ rnp->rew.rew_func = func;
+ rnp->rew.rew_rsp = rsp;
+ if (!READ_ONCE(rcu_par_gp_wq) ||
+ rcu_scheduler_active != RCU_SCHEDULER_RUNNING) {
+ /* No workqueues yet. */
+ sync_rcu_exp_select_node_cpus(&rnp->rew.rew_work);
+ continue;
}
- /* Report quiescent states for those that went offline. */
- mask_ofl_test |= mask_ofl_ipi;
- if (mask_ofl_test)
- rcu_report_exp_cpu_mult(rsp, rnp, mask_ofl_test, false);
+ INIT_WORK(&rnp->rew.rew_work, sync_rcu_exp_select_node_cpus);
+ queue_work_on(rnp->grplo, rcu_par_gp_wq, &rnp->rew.rew_work);
+ rnp->exp_need_flush = true;
}
+
+ /* Wait for workqueue jobs (if any) to complete. */
+ rcu_for_each_leaf_node(rsp, rnp)
+ if (rnp->exp_need_flush)
+ flush_work(&rnp->rew.rew_work);
}
static void synchronize_sched_expedited_wait(struct rcu_state *rsp)
@@ -469,9 +520,9 @@ static void synchronize_sched_expedited_wait(struct rcu_state *rsp)
for (;;) {
ret = swait_event_timeout(
rsp->expedited_wq,
- sync_rcu_preempt_exp_done(rnp_root),
+ sync_rcu_preempt_exp_done_unlocked(rnp_root),
jiffies_stall);
- if (ret > 0 || sync_rcu_preempt_exp_done(rnp_root))
+ if (ret > 0 || sync_rcu_preempt_exp_done_unlocked(rnp_root))
return;
WARN_ON(ret < 0); /* workqueues should not be signaled. */
if (rcu_cpu_stall_suppress)
@@ -504,7 +555,7 @@ static void synchronize_sched_expedited_wait(struct rcu_state *rsp)
rcu_for_each_node_breadth_first(rsp, rnp) {
if (rnp == rnp_root)
continue; /* printed unconditionally */
- if (sync_rcu_preempt_exp_done(rnp))
+ if (sync_rcu_preempt_exp_done_unlocked(rnp))
continue;
pr_cont(" l=%u:%d-%d:%#lx/%c",
rnp->level, rnp->grplo, rnp->grphi,
@@ -560,14 +611,6 @@ static void rcu_exp_wait_wake(struct rcu_state *rsp, unsigned long s)
mutex_unlock(&rsp->exp_wake_mutex);
}
-/* Let the workqueue handler know what it is supposed to do. */
-struct rcu_exp_work {
- smp_call_func_t rew_func;
- struct rcu_state *rew_rsp;
- unsigned long rew_s;
- struct work_struct rew_work;
-};
-
/*
* Common code to drive an expedited grace period forward, used by
* workqueues and mid-boot-time tasks.
@@ -633,7 +676,7 @@ static void _synchronize_rcu_expedited(struct rcu_state *rsp,
rdp = per_cpu_ptr(rsp->rda, raw_smp_processor_id());
rnp = rcu_get_root(rsp);
wait_event(rnp->exp_wq[rcu_seq_ctr(s) & 0x3],
- sync_exp_work_done(rsp, &rdp->exp_workdone0, s));
+ sync_exp_work_done(rsp, s));
smp_mb(); /* Workqueue actions happen before return. */
/* Let the next expedited grace period start. */
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 84fbee4686d3..7fd12039e512 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -182,7 +182,7 @@ static void rcu_preempt_ctxt_queue(struct rcu_node *rnp, struct rcu_data *rdp)
raw_lockdep_assert_held_rcu_node(rnp);
WARN_ON_ONCE(rdp->mynode != rnp);
- WARN_ON_ONCE(rnp->level != rcu_num_lvls - 1);
+ WARN_ON_ONCE(!rcu_is_leaf_node(rnp));
/*
* Decide where to queue the newly blocked task. In theory,
@@ -384,6 +384,50 @@ static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp)
}
/*
+ * Preemptible RCU implementation for rcu_read_lock().
+ * Just increment ->rcu_read_lock_nesting, shared state will be updated
+ * if we block.
+ */
+void __rcu_read_lock(void)
+{
+ current->rcu_read_lock_nesting++;
+ barrier(); /* critical section after entry code. */
+}
+EXPORT_SYMBOL_GPL(__rcu_read_lock);
+
+/*
+ * Preemptible RCU implementation for rcu_read_unlock().
+ * Decrement ->rcu_read_lock_nesting. If the result is zero (outermost
+ * rcu_read_unlock()) and ->rcu_read_unlock_special is non-zero, then
+ * invoke rcu_read_unlock_special() to clean up after a context switch
+ * in an RCU read-side critical section and other special cases.
+ */
+void __rcu_read_unlock(void)
+{
+ struct task_struct *t = current;
+
+ if (t->rcu_read_lock_nesting != 1) {
+ --t->rcu_read_lock_nesting;
+ } else {
+ barrier(); /* critical section before exit code. */
+ t->rcu_read_lock_nesting = INT_MIN;
+ barrier(); /* assign before ->rcu_read_unlock_special load */
+ if (unlikely(READ_ONCE(t->rcu_read_unlock_special.s)))
+ rcu_read_unlock_special(t);
+ barrier(); /* ->rcu_read_unlock_special load before assign */
+ t->rcu_read_lock_nesting = 0;
+ }
+#ifdef CONFIG_PROVE_LOCKING
+ {
+ int rrln = READ_ONCE(t->rcu_read_lock_nesting);
+
+ WARN_ON_ONCE(rrln < 0 && rrln > INT_MIN / 2);
+ }
+#endif /* #ifdef CONFIG_PROVE_LOCKING */
+}
+EXPORT_SYMBOL_GPL(__rcu_read_unlock);
+
+/*
* Advance a ->blkd_tasks-list pointer to the next entry, instead
* returning NULL if at the end of the list.
*/
@@ -489,7 +533,7 @@ void rcu_read_unlock_special(struct task_struct *t)
rnp = t->rcu_blocked_node;
raw_spin_lock_rcu_node(rnp); /* irqs already disabled. */
WARN_ON_ONCE(rnp != t->rcu_blocked_node);
- WARN_ON_ONCE(rnp->level != rcu_num_lvls - 1);
+ WARN_ON_ONCE(!rcu_is_leaf_node(rnp));
empty_norm = !rcu_preempt_blocked_readers_cgp(rnp);
empty_exp = sync_rcu_preempt_exp_done(rnp);
smp_mb(); /* ensure expedited fastpath sees end of RCU c-s. */
@@ -685,15 +729,6 @@ static void rcu_preempt_check_callbacks(void)
t->rcu_read_unlock_special.b.need_qs = true;
}
-#ifdef CONFIG_RCU_BOOST
-
-static void rcu_preempt_do_callbacks(void)
-{
- rcu_do_batch(rcu_state_p, this_cpu_ptr(rcu_data_p));
-}
-
-#endif /* #ifdef CONFIG_RCU_BOOST */
-
/**
* call_rcu() - Queue an RCU callback for invocation after a grace period.
* @head: structure to be used for queueing the RCU updates.
@@ -1140,7 +1175,7 @@ static void rcu_kthread_do_work(void)
{
rcu_do_batch(&rcu_sched_state, this_cpu_ptr(&rcu_sched_data));
rcu_do_batch(&rcu_bh_state, this_cpu_ptr(&rcu_bh_data));
- rcu_preempt_do_callbacks();
+ rcu_do_batch(&rcu_preempt_state, this_cpu_ptr(&rcu_preempt_data));
}
static void rcu_cpu_kthread_setup(unsigned int cpu)
@@ -1607,7 +1642,7 @@ static int rcu_oom_notify(struct notifier_block *self,
for_each_online_cpu(cpu) {
smp_call_function_single(cpu, rcu_oom_notify_cpu, NULL, 1);
- cond_resched_rcu_qs();
+ cond_resched_tasks_rcu_qs();
}
/* Unconditionally decrement: no need to wake ourselves up. */
@@ -1780,19 +1815,6 @@ static void rcu_nocb_gp_cleanup(struct swait_queue_head *sq)
swake_up_all(sq);
}
-/*
- * Set the root rcu_node structure's ->need_future_gp field
- * based on the sum of those of all rcu_node structures. This does
- * double-count the root rcu_node structure's requests, but this
- * is necessary to handle the possibility of a rcu_nocb_kthread()
- * having awakened during the time that the rcu_node structures
- * were being updated for the end of the previous grace period.
- */
-static void rcu_nocb_gp_set(struct rcu_node *rnp, int nrq)
-{
- rnp->need_future_gp[(rnp->completed + 1) & 0x1] += nrq;
-}
-
static struct swait_queue_head *rcu_nocb_gp_get(struct rcu_node *rnp)
{
return &rnp->nocb_gp_wq[rnp->completed & 0x1];
@@ -1966,7 +1988,7 @@ static void __call_rcu_nocb_enqueue(struct rcu_data *rdp,
trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
TPS("WakeOvf"));
} else {
- wake_nocb_leader_defer(rdp, RCU_NOCB_WAKE,
+ wake_nocb_leader_defer(rdp, RCU_NOCB_WAKE_FORCE,
TPS("WakeOvfIsDeferred"));
}
rdp->qlen_last_fqs_check = LONG_MAX / 2;
@@ -2048,7 +2070,8 @@ static void rcu_nocb_wait_gp(struct rcu_data *rdp)
struct rcu_node *rnp = rdp->mynode;
raw_spin_lock_irqsave_rcu_node(rnp, flags);
- needwake = rcu_start_future_gp(rnp, rdp, &c);
+ c = rcu_cbs_completed(rdp->rsp, rnp);
+ needwake = rcu_start_this_gp(rnp, rdp, c);
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
if (needwake)
rcu_gp_kthread_wake(rdp->rsp);
@@ -2057,7 +2080,7 @@ static void rcu_nocb_wait_gp(struct rcu_data *rdp)
* Wait for the grace period. Do so interruptibly to avoid messing
* up the load average.
*/
- trace_rcu_future_gp(rnp, rdp, c, TPS("StartWait"));
+ trace_rcu_this_gp(rnp, rdp, c, TPS("StartWait"));
for (;;) {
swait_event_interruptible(
rnp->nocb_gp_wq[c & 0x1],
@@ -2065,9 +2088,9 @@ static void rcu_nocb_wait_gp(struct rcu_data *rdp)
if (likely(d))
break;
WARN_ON(signal_pending(current));
- trace_rcu_future_gp(rnp, rdp, c, TPS("ResumeWait"));
+ trace_rcu_this_gp(rnp, rdp, c, TPS("ResumeWait"));
}
- trace_rcu_future_gp(rnp, rdp, c, TPS("EndWait"));
+ trace_rcu_this_gp(rnp, rdp, c, TPS("EndWait"));
smp_mb(); /* Ensure that CB invocation happens after GP end. */
}
@@ -2236,7 +2259,7 @@ static int rcu_nocb_kthread(void *arg)
cl++;
c++;
local_bh_enable();
- cond_resched_rcu_qs();
+ cond_resched_tasks_rcu_qs();
list = next;
}
trace_rcu_batch_end(rdp->rsp->name, c, !!list, 0, 0, 1);
@@ -2292,7 +2315,7 @@ static void do_nocb_deferred_wakeup(struct rcu_data *rdp)
void __init rcu_init_nohz(void)
{
int cpu;
- bool need_rcu_nocb_mask = true;
+ bool need_rcu_nocb_mask = false;
struct rcu_state *rsp;
#if defined(CONFIG_NO_HZ_FULL)
@@ -2315,7 +2338,7 @@ void __init rcu_init_nohz(void)
#endif /* #if defined(CONFIG_NO_HZ_FULL) */
if (!cpumask_subset(rcu_nocb_mask, cpu_possible_mask)) {
- pr_info("\tNote: kernel parameter 'rcu_nocbs=' contains nonexistent CPUs.\n");
+ pr_info("\tNote: kernel parameter 'rcu_nocbs=', 'nohz_full', or 'isolcpus=' contains nonexistent CPUs.\n");
cpumask_and(rcu_nocb_mask, cpu_possible_mask,
rcu_nocb_mask);
}
@@ -2495,10 +2518,6 @@ static void rcu_nocb_gp_cleanup(struct swait_queue_head *sq)
{
}
-static void rcu_nocb_gp_set(struct rcu_node *rnp, int nrq)
-{
-}
-
static struct swait_queue_head *rcu_nocb_gp_get(struct rcu_node *rnp)
{
return NULL;
@@ -2587,8 +2606,7 @@ static bool rcu_nohz_full_cpu(struct rcu_state *rsp)
}
/*
- * Bind the grace-period kthread for the sysidle flavor of RCU to the
- * timekeeping CPU.
+ * Bind the RCU grace-period kthreads to the housekeeping CPU.
*/
static void rcu_bind_gp_kthread(void)
{
diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
index 68fa19a5e7bd..4c230a60ece4 100644
--- a/kernel/rcu/update.c
+++ b/kernel/rcu/update.c
@@ -226,54 +226,6 @@ core_initcall(rcu_set_runtime_mode);
#endif /* #if !defined(CONFIG_TINY_RCU) || defined(CONFIG_SRCU) */
-#ifdef CONFIG_PREEMPT_RCU
-
-/*
- * Preemptible RCU implementation for rcu_read_lock().
- * Just increment ->rcu_read_lock_nesting, shared state will be updated
- * if we block.
- */
-void __rcu_read_lock(void)
-{
- current->rcu_read_lock_nesting++;
- barrier(); /* critical section after entry code. */
-}
-EXPORT_SYMBOL_GPL(__rcu_read_lock);
-
-/*
- * Preemptible RCU implementation for rcu_read_unlock().
- * Decrement ->rcu_read_lock_nesting. If the result is zero (outermost
- * rcu_read_unlock()) and ->rcu_read_unlock_special is non-zero, then
- * invoke rcu_read_unlock_special() to clean up after a context switch
- * in an RCU read-side critical section and other special cases.
- */
-void __rcu_read_unlock(void)
-{
- struct task_struct *t = current;
-
- if (t->rcu_read_lock_nesting != 1) {
- --t->rcu_read_lock_nesting;
- } else {
- barrier(); /* critical section before exit code. */
- t->rcu_read_lock_nesting = INT_MIN;
- barrier(); /* assign before ->rcu_read_unlock_special load */
- if (unlikely(READ_ONCE(t->rcu_read_unlock_special.s)))
- rcu_read_unlock_special(t);
- barrier(); /* ->rcu_read_unlock_special load before assign */
- t->rcu_read_lock_nesting = 0;
- }
-#ifdef CONFIG_PROVE_LOCKING
- {
- int rrln = READ_ONCE(t->rcu_read_lock_nesting);
-
- WARN_ON_ONCE(rrln < 0 && rrln > INT_MIN / 2);
- }
-#endif /* #ifdef CONFIG_PROVE_LOCKING */
-}
-EXPORT_SYMBOL_GPL(__rcu_read_unlock);
-
-#endif /* #ifdef CONFIG_PREEMPT_RCU */
-
#ifdef CONFIG_DEBUG_LOCK_ALLOC
static struct lock_class_key rcu_lock_key;
struct lockdep_map rcu_lock_map =
@@ -624,7 +576,7 @@ EXPORT_SYMBOL_GPL(call_rcu_tasks);
* grace period has elapsed, in other words after all currently
* executing rcu-tasks read-side critical sections have elapsed. These
* read-side critical sections are delimited by calls to schedule(),
- * cond_resched_rcu_qs(), idle execution, userspace execution, calls
+ * cond_resched_tasks_rcu_qs(), idle execution, userspace execution, calls
* to synchronize_rcu_tasks(), and (in theory, anyway) cond_resched().
*
* This is a very specialized primitive, intended only for a few uses in
diff --git a/kernel/resource.c b/kernel/resource.c
index 2af6c03858b9..b589dda910b3 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -87,7 +87,7 @@ enum { MAX_IORES_LEVEL = 5 };
static void *r_start(struct seq_file *m, loff_t *pos)
__acquires(resource_lock)
{
- struct resource *p = m->private;
+ struct resource *p = PDE_DATA(file_inode(m->file));
loff_t l = 0;
read_lock(&resource_lock);
for (p = p->child; p && l < *pos; p = r_next(m, p, &l))
@@ -103,7 +103,7 @@ static void r_stop(struct seq_file *m, void *v)
static int r_show(struct seq_file *m, void *v)
{
- struct resource *root = m->private;
+ struct resource *root = PDE_DATA(file_inode(m->file));
struct resource *r = v, *p;
unsigned long long start, end;
int width = root->end < 0x10000 ? 4 : 8;
@@ -135,44 +135,11 @@ static const struct seq_operations resource_op = {
.show = r_show,
};
-static int ioports_open(struct inode *inode, struct file *file)
-{
- int res = seq_open(file, &resource_op);
- if (!res) {
- struct seq_file *m = file->private_data;
- m->private = &ioport_resource;
- }
- return res;
-}
-
-static int iomem_open(struct inode *inode, struct file *file)
-{
- int res = seq_open(file, &resource_op);
- if (!res) {
- struct seq_file *m = file->private_data;
- m->private = &iomem_resource;
- }
- return res;
-}
-
-static const struct file_operations proc_ioports_operations = {
- .open = ioports_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
-static const struct file_operations proc_iomem_operations = {
- .open = iomem_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
static int __init ioresources_init(void)
{
- proc_create("ioports", 0, NULL, &proc_ioports_operations);
- proc_create("iomem", 0, NULL, &proc_iomem_operations);
+ proc_create_seq_data("ioports", 0, NULL, &resource_op,
+ &ioport_resource);
+ proc_create_seq_data("iomem", 0, NULL, &resource_op, &iomem_resource);
return 0;
}
__initcall(ioresources_init);
diff --git a/kernel/sched/autogroup.c b/kernel/sched/autogroup.c
index 6be6c575b6cd..2d4ff5353ded 100644
--- a/kernel/sched/autogroup.c
+++ b/kernel/sched/autogroup.c
@@ -2,6 +2,7 @@
/*
* Auto-group scheduling implementation:
*/
+#include <linux/nospec.h>
#include "sched.h"
unsigned int __read_mostly sysctl_sched_autogroup_enabled = 1;
@@ -209,7 +210,7 @@ int proc_sched_autogroup_set_nice(struct task_struct *p, int nice)
static unsigned long next = INITIAL_JIFFIES;
struct autogroup *ag;
unsigned long shares;
- int err;
+ int err, idx;
if (nice < MIN_NICE || nice > MAX_NICE)
return -EINVAL;
@@ -227,7 +228,9 @@ int proc_sched_autogroup_set_nice(struct task_struct *p, int nice)
next = HZ / 10 + jiffies;
ag = autogroup_task_get(p);
- shares = scale_load(sched_prio_to_weight[nice + 20]);
+
+ idx = array_index_nospec(nice + 20, 40);
+ shares = scale_load(sched_prio_to_weight[idx]);
down_write(&ag->lock);
err = sched_group_set_shares(ag->tg, shares);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 5e10aaeebfcc..e9866f86f304 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -7,6 +7,9 @@
*/
#include "sched.h"
+#include <linux/kthread.h>
+#include <linux/nospec.h>
+
#include <asm/switch_to.h>
#include <asm/tlb.h>
@@ -878,6 +881,33 @@ void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)
}
#ifdef CONFIG_SMP
+
+static inline bool is_per_cpu_kthread(struct task_struct *p)
+{
+ if (!(p->flags & PF_KTHREAD))
+ return false;
+
+ if (p->nr_cpus_allowed != 1)
+ return false;
+
+ return true;
+}
+
+/*
+ * Per-CPU kthreads are allowed to run on !actie && online CPUs, see
+ * __set_cpus_allowed_ptr() and select_fallback_rq().
+ */
+static inline bool is_cpu_allowed(struct task_struct *p, int cpu)
+{
+ if (!cpumask_test_cpu(cpu, &p->cpus_allowed))
+ return false;
+
+ if (is_per_cpu_kthread(p))
+ return cpu_online(cpu);
+
+ return cpu_active(cpu);
+}
+
/*
* This is how migration works:
*
@@ -935,16 +965,8 @@ struct migration_arg {
static struct rq *__migrate_task(struct rq *rq, struct rq_flags *rf,
struct task_struct *p, int dest_cpu)
{
- if (p->flags & PF_KTHREAD) {
- if (unlikely(!cpu_online(dest_cpu)))
- return rq;
- } else {
- if (unlikely(!cpu_active(dest_cpu)))
- return rq;
- }
-
/* Affinity changed (again). */
- if (!cpumask_test_cpu(dest_cpu, &p->cpus_allowed))
+ if (!is_cpu_allowed(p, dest_cpu))
return rq;
update_rq_clock(rq);
@@ -1473,10 +1495,9 @@ static int select_fallback_rq(int cpu, struct task_struct *p)
for (;;) {
/* Any allowed, online CPU? */
for_each_cpu(dest_cpu, &p->cpus_allowed) {
- if (!(p->flags & PF_KTHREAD) && !cpu_active(dest_cpu))
- continue;
- if (!cpu_online(dest_cpu))
+ if (!is_cpu_allowed(p, dest_cpu))
continue;
+
goto out;
}
@@ -1539,8 +1560,7 @@ int select_task_rq(struct task_struct *p, int cpu, int sd_flags, int wake_flags)
* [ this allows ->select_task() to simply return task_cpu(p) and
* not worry about this generic constraint ]
*/
- if (unlikely(!cpumask_test_cpu(cpu, &p->cpus_allowed) ||
- !cpu_online(cpu)))
+ if (unlikely(!is_cpu_allowed(p, cpu)))
cpu = select_fallback_rq(task_cpu(p), p);
return cpu;
@@ -2174,27 +2194,7 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
INIT_HLIST_HEAD(&p->preempt_notifiers);
#endif
-#ifdef CONFIG_NUMA_BALANCING
- if (p->mm && atomic_read(&p->mm->mm_users) == 1) {
- p->mm->numa_next_scan = jiffies + msecs_to_jiffies(sysctl_numa_balancing_scan_delay);
- p->mm->numa_scan_seq = 0;
- }
-
- if (clone_flags & CLONE_VM)
- p->numa_preferred_nid = current->numa_preferred_nid;
- else
- p->numa_preferred_nid = -1;
-
- p->node_stamp = 0ULL;
- p->numa_scan_seq = p->mm ? p->mm->numa_scan_seq : 0;
- p->numa_scan_period = sysctl_numa_balancing_scan_delay;
- p->numa_work.next = &p->numa_work;
- p->numa_faults = NULL;
- p->last_task_numa_placement = 0;
- p->last_sum_exec_runtime = 0;
-
- p->numa_group = NULL;
-#endif /* CONFIG_NUMA_BALANCING */
+ init_numa_balancing(clone_flags, p);
}
DEFINE_STATIC_KEY_FALSE(sched_numa_balancing);
@@ -2718,20 +2718,28 @@ static struct rq *finish_task_switch(struct task_struct *prev)
membarrier_mm_sync_core_before_usermode(mm);
mmdrop(mm);
}
- if (unlikely(prev_state == TASK_DEAD)) {
- if (prev->sched_class->task_dead)
- prev->sched_class->task_dead(prev);
+ if (unlikely(prev_state & (TASK_DEAD|TASK_PARKED))) {
+ switch (prev_state) {
+ case TASK_DEAD:
+ if (prev->sched_class->task_dead)
+ prev->sched_class->task_dead(prev);
- /*
- * Remove function-return probe instances associated with this
- * task and put them back on the free list.
- */
- kprobe_flush_task(prev);
+ /*
+ * Remove function-return probe instances associated with this
+ * task and put them back on the free list.
+ */
+ kprobe_flush_task(prev);
- /* Task is done with its stack. */
- put_task_stack(prev);
+ /* Task is done with its stack. */
+ put_task_stack(prev);
- put_task_struct(prev);
+ put_task_struct(prev);
+ break;
+
+ case TASK_PARKED:
+ kthread_park_complete(prev);
+ break;
+ }
}
tick_nohz_task_switch();
@@ -3498,23 +3506,8 @@ static void __sched notrace __schedule(bool preempt)
void __noreturn do_task_dead(void)
{
- /*
- * The setting of TASK_RUNNING by try_to_wake_up() may be delayed
- * when the following two conditions become true.
- * - There is race condition of mmap_sem (It is acquired by
- * exit_mm()), and
- * - SMI occurs before setting TASK_RUNINNG.
- * (or hypervisor of virtual machine switches to other guest)
- * As a result, we may become TASK_RUNNING after becoming TASK_DEAD
- *
- * To avoid it, we have to wait for releasing tsk->pi_lock which
- * is held by try_to_wake_up()
- */
- raw_spin_lock_irq(&current->pi_lock);
- raw_spin_unlock_irq(&current->pi_lock);
-
/* Causes final put_task_struct in finish_task_switch(): */
- __set_current_state(TASK_DEAD);
+ set_special_state(TASK_DEAD);
/* Tell freezer to ignore us: */
current->flags |= PF_NOFREEZE;
@@ -4037,6 +4030,23 @@ int idle_cpu(int cpu)
}
/**
+ * available_idle_cpu - is a given CPU idle for enqueuing work.
+ * @cpu: the CPU in question.
+ *
+ * Return: 1 if the CPU is currently idle. 0 otherwise.
+ */
+int available_idle_cpu(int cpu)
+{
+ if (!idle_cpu(cpu))
+ return 0;
+
+ if (vcpu_is_preempted(cpu))
+ return 0;
+
+ return 1;
+}
+
+/**
* idle_task - return the idle task for a given CPU.
* @cpu: the processor in question.
*
@@ -5012,20 +5022,6 @@ int __cond_resched_lock(spinlock_t *lock)
}
EXPORT_SYMBOL(__cond_resched_lock);
-int __sched __cond_resched_softirq(void)
-{
- BUG_ON(!in_softirq());
-
- if (should_resched(SOFTIRQ_DISABLE_OFFSET)) {
- local_bh_enable();
- preempt_schedule_common();
- local_bh_disable();
- return 1;
- }
- return 0;
-}
-EXPORT_SYMBOL(__cond_resched_softirq);
-
/**
* yield - yield the current processor to other threads.
*
@@ -6928,11 +6924,15 @@ static int cpu_weight_nice_write_s64(struct cgroup_subsys_state *css,
struct cftype *cft, s64 nice)
{
unsigned long weight;
+ int idx;
if (nice < MIN_NICE || nice > MAX_NICE)
return -ERANGE;
- weight = sched_prio_to_weight[NICE_TO_PRIO(nice) - MAX_RT_PRIO];
+ idx = NICE_TO_PRIO(nice) - MAX_RT_PRIO;
+ idx = array_index_nospec(idx, 40);
+ weight = sched_prio_to_weight[idx];
+
return sched_group_set_shares(css_tg(css), scale_load(weight));
}
#endif
diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index d2c6083304b4..28592b62b1d5 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -183,22 +183,21 @@ static void sugov_get_util(struct sugov_cpu *sg_cpu)
static unsigned long sugov_aggregate_util(struct sugov_cpu *sg_cpu)
{
struct rq *rq = cpu_rq(sg_cpu->cpu);
- unsigned long util;
- if (rq->rt.rt_nr_running) {
- util = sg_cpu->max;
- } else {
- util = sg_cpu->util_dl;
- if (rq->cfs.h_nr_running)
- util += sg_cpu->util_cfs;
- }
+ if (rq->rt.rt_nr_running)
+ return sg_cpu->max;
/*
+ * Utilization required by DEADLINE must always be granted while, for
+ * FAIR, we use blocked utilization of IDLE CPUs as a mechanism to
+ * gracefully reduce the frequency when no tasks show up for longer
+ * periods of time.
+ *
* Ideally we would like to set util_dl as min/guaranteed freq and
* util_cfs + util_dl as requested freq. However, cpufreq is not yet
* ready for such an interface. So, we only do the latter for now.
*/
- return min(util, sg_cpu->max);
+ return min(sg_cpu->max, (sg_cpu->util_dl + sg_cpu->util_cfs));
}
static void sugov_set_iowait_boost(struct sugov_cpu *sg_cpu, u64 time, unsigned int flags)
@@ -305,7 +304,8 @@ static void sugov_update_single(struct update_util_data *hook, u64 time,
* Do not reduce the frequency if the CPU has not been idle
* recently, as the reduction is likely to be premature then.
*/
- if (busy && next_f < sg_policy->next_freq) {
+ if (busy && next_f < sg_policy->next_freq &&
+ sg_policy->next_freq != UINT_MAX) {
next_f = sg_policy->next_freq;
/* Reset cached freq as next_freq has changed */
@@ -396,19 +396,6 @@ static void sugov_irq_work(struct irq_work *irq_work)
sg_policy = container_of(irq_work, struct sugov_policy, irq_work);
- /*
- * For RT tasks, the schedutil governor shoots the frequency to maximum.
- * Special care must be taken to ensure that this kthread doesn't result
- * in the same behavior.
- *
- * This is (mostly) guaranteed by the work_in_progress flag. The flag is
- * updated only at the end of the sugov_work() function and before that
- * the schedutil governor rejects all other frequency scaling requests.
- *
- * There is a very rare case though, where the RT thread yields right
- * after the work_in_progress flag is cleared. The effects of that are
- * neglected for now.
- */
kthread_queue_work(&sg_policy->worker, &sg_policy->work);
}
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index e7b3008b85bb..fbfc3f1d368a 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -1117,7 +1117,7 @@ extern bool sched_rt_bandwidth_account(struct rt_rq *rt_rq);
* should be larger than 2^(64 - 20 - 8), which is more than 64 seconds.
* So, overflow is not an issue here.
*/
-u64 grub_reclaim(u64 delta, struct rq *rq, struct sched_dl_entity *dl_se)
+static u64 grub_reclaim(u64 delta, struct rq *rq, struct sched_dl_entity *dl_se)
{
u64 u_inact = rq->dl.this_bw - rq->dl.running_bw; /* Utot - Uact */
u64 u_act;
@@ -1259,6 +1259,9 @@ static enum hrtimer_restart inactive_task_timer(struct hrtimer *timer)
rq = task_rq_lock(p, &rf);
+ sched_clock_tick();
+ update_rq_clock(rq);
+
if (!dl_task(p) || p->state == TASK_DEAD) {
struct dl_bw *dl_b = dl_bw_of(task_cpu(p));
@@ -1278,9 +1281,6 @@ static enum hrtimer_restart inactive_task_timer(struct hrtimer *timer)
if (dl_se->dl_non_contending == 0)
goto unlock;
- sched_clock_tick();
- update_rq_clock(rq);
-
sub_running_bw(dl_se, &rq->dl);
dl_se->dl_non_contending = 0;
unlock:
@@ -2731,8 +2731,6 @@ bool dl_cpu_busy(unsigned int cpu)
#endif
#ifdef CONFIG_SCHED_DEBUG
-extern void print_dl_rq(struct seq_file *m, int cpu, struct dl_rq *dl_rq);
-
void print_dl_stats(struct seq_file *m, int cpu)
{
print_dl_rq(m, cpu, &cpu_rq(cpu)->dl);
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index 15b10e210a6b..e593b4118578 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -823,35 +823,9 @@ static const struct seq_operations sched_debug_sops = {
.show = sched_debug_show,
};
-static int sched_debug_release(struct inode *inode, struct file *file)
-{
- seq_release(inode, file);
-
- return 0;
-}
-
-static int sched_debug_open(struct inode *inode, struct file *filp)
-{
- int ret = 0;
-
- ret = seq_open(filp, &sched_debug_sops);
-
- return ret;
-}
-
-static const struct file_operations sched_debug_fops = {
- .open = sched_debug_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = sched_debug_release,
-};
-
static int __init init_sched_debug_procfs(void)
{
- struct proc_dir_entry *pe;
-
- pe = proc_create("sched_debug", 0444, NULL, &sched_debug_fops);
- if (!pe)
+ if (!proc_create_seq("sched_debug", 0444, NULL, &sched_debug_sops))
return -ENOMEM;
return 0;
}
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 54dc31e7ab9b..e497c05aab7f 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1139,6 +1139,47 @@ static unsigned int task_scan_max(struct task_struct *p)
return max(smin, smax);
}
+void init_numa_balancing(unsigned long clone_flags, struct task_struct *p)
+{
+ int mm_users = 0;
+ struct mm_struct *mm = p->mm;
+
+ if (mm) {
+ mm_users = atomic_read(&mm->mm_users);
+ if (mm_users == 1) {
+ mm->numa_next_scan = jiffies + msecs_to_jiffies(sysctl_numa_balancing_scan_delay);
+ mm->numa_scan_seq = 0;
+ }
+ }
+ p->node_stamp = 0;
+ p->numa_scan_seq = mm ? mm->numa_scan_seq : 0;
+ p->numa_scan_period = sysctl_numa_balancing_scan_delay;
+ p->numa_work.next = &p->numa_work;
+ p->numa_faults = NULL;
+ p->numa_group = NULL;
+ p->last_task_numa_placement = 0;
+ p->last_sum_exec_runtime = 0;
+
+ /* New address space, reset the preferred nid */
+ if (!(clone_flags & CLONE_VM)) {
+ p->numa_preferred_nid = -1;
+ return;
+ }
+
+ /*
+ * New thread, keep existing numa_preferred_nid which should be copied
+ * already by arch_dup_task_struct but stagger when scans start.
+ */
+ if (mm) {
+ unsigned int delay;
+
+ delay = min_t(unsigned int, task_scan_max(current),
+ current->numa_scan_period * mm_users * NSEC_PER_MSEC);
+ delay += 2 * TICK_NSEC;
+ p->node_stamp = delay;
+ }
+}
+
static void account_numa_enqueue(struct rq *rq, struct task_struct *p)
{
rq->nr_numa_running += (p->numa_preferred_nid != -1);
@@ -1854,7 +1895,6 @@ static int task_numa_migrate(struct task_struct *p)
static void numa_migrate_preferred(struct task_struct *p)
{
unsigned long interval = HZ;
- unsigned long numa_migrate_retry;
/* This task has no NUMA fault statistics yet */
if (unlikely(p->numa_preferred_nid == -1 || !p->numa_faults))
@@ -1862,18 +1902,7 @@ static void numa_migrate_preferred(struct task_struct *p)
/* Periodically retry migrating the task to the preferred node */
interval = min(interval, msecs_to_jiffies(p->numa_scan_period) / 16);
- numa_migrate_retry = jiffies + interval;
-
- /*
- * Check that the new retry threshold is after the current one. If
- * the retry is in the future, it implies that wake_affine has
- * temporarily asked NUMA balancing to backoff from placement.
- */
- if (numa_migrate_retry > p->numa_migrate_retry)
- return;
-
- /* Safe to try placing the task on the preferred node */
- p->numa_migrate_retry = numa_migrate_retry;
+ p->numa_migrate_retry = jiffies + interval;
/* Success if task is already running on preferred CPU */
if (task_node(p) == p->numa_preferred_nid)
@@ -5357,6 +5386,14 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
struct sched_entity *se = &p->se;
/*
+ * The code below (indirectly) updates schedutil which looks at
+ * the cfs_rq utilization to select a frequency.
+ * Let's add the task's estimated utilization to the cfs_rq's
+ * estimated utilization, before we update schedutil.
+ */
+ util_est_enqueue(&rq->cfs, p);
+
+ /*
* If in_iowait is set, the code below may not trigger any cpufreq
* utilization updates, so do it here explicitly with the IOWAIT flag
* passed.
@@ -5397,7 +5434,6 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
if (!se)
add_nr_running(rq, 1);
- util_est_enqueue(&rq->cfs, p);
hrtick_update(rq);
}
@@ -5870,8 +5906,8 @@ wake_affine_idle(int this_cpu, int prev_cpu, int sync)
* a cpufreq perspective, it's better to have higher utilisation
* on one CPU.
*/
- if (idle_cpu(this_cpu) && cpus_share_cache(this_cpu, prev_cpu))
- return idle_cpu(prev_cpu) ? prev_cpu : this_cpu;
+ if (available_idle_cpu(this_cpu) && cpus_share_cache(this_cpu, prev_cpu))
+ return available_idle_cpu(prev_cpu) ? prev_cpu : this_cpu;
if (sync && cpu_rq(this_cpu)->nr_running == 1)
return this_cpu;
@@ -5922,48 +5958,6 @@ wake_affine_weight(struct sched_domain *sd, struct task_struct *p,
return this_eff_load < prev_eff_load ? this_cpu : nr_cpumask_bits;
}
-#ifdef CONFIG_NUMA_BALANCING
-static void
-update_wa_numa_placement(struct task_struct *p, int prev_cpu, int target)
-{
- unsigned long interval;
-
- if (!static_branch_likely(&sched_numa_balancing))
- return;
-
- /* If balancing has no preference then continue gathering data */
- if (p->numa_preferred_nid == -1)
- return;
-
- /*
- * If the wakeup is not affecting locality then it is neutral from
- * the perspective of NUMA balacing so continue gathering data.
- */
- if (cpu_to_node(prev_cpu) == cpu_to_node(target))
- return;
-
- /*
- * Temporarily prevent NUMA balancing trying to place waker/wakee after
- * wakee has been moved by wake_affine. This will potentially allow
- * related tasks to converge and update their data placement. The
- * 4 * numa_scan_period is to allow the two-pass filter to migrate
- * hot data to the wakers node.
- */
- interval = max(sysctl_numa_balancing_scan_delay,
- p->numa_scan_period << 2);
- p->numa_migrate_retry = jiffies + msecs_to_jiffies(interval);
-
- interval = max(sysctl_numa_balancing_scan_delay,
- current->numa_scan_period << 2);
- current->numa_migrate_retry = jiffies + msecs_to_jiffies(interval);
-}
-#else
-static void
-update_wa_numa_placement(struct task_struct *p, int prev_cpu, int target)
-{
-}
-#endif
-
static int wake_affine(struct sched_domain *sd, struct task_struct *p,
int this_cpu, int prev_cpu, int sync)
{
@@ -5979,7 +5973,6 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p,
if (target == nr_cpumask_bits)
return prev_cpu;
- update_wa_numa_placement(p, prev_cpu, target);
schedstat_inc(sd->ttwu_move_affine);
schedstat_inc(p->se.statistics.nr_wakeups_affine);
return target;
@@ -6157,7 +6150,7 @@ find_idlest_group_cpu(struct sched_group *group, struct task_struct *p, int this
/* Traverse only the allowed CPUs */
for_each_cpu_and(i, sched_group_span(group), &p->cpus_allowed) {
- if (idle_cpu(i)) {
+ if (available_idle_cpu(i)) {
struct rq *rq = cpu_rq(i);
struct cpuidle_state *idle = idle_get_state(rq);
if (idle && idle->exit_latency < min_exit_latency) {
@@ -6199,6 +6192,13 @@ static inline int find_idlest_cpu(struct sched_domain *sd, struct task_struct *p
if (!cpumask_intersects(sched_domain_span(sd), &p->cpus_allowed))
return prev_cpu;
+ /*
+ * We need task's util for capacity_spare_wake, sync it up to prev_cpu's
+ * last_update_time.
+ */
+ if (!(sd_flag & SD_BALANCE_FORK))
+ sync_entity_load_avg(&p->se);
+
while (sd) {
struct sched_group *group;
struct sched_domain *tmp;
@@ -6279,7 +6279,7 @@ void __update_idle_core(struct rq *rq)
if (cpu == core)
continue;
- if (!idle_cpu(cpu))
+ if (!available_idle_cpu(cpu))
goto unlock;
}
@@ -6311,7 +6311,7 @@ static int select_idle_core(struct task_struct *p, struct sched_domain *sd, int
for_each_cpu(cpu, cpu_smt_mask(core)) {
cpumask_clear_cpu(cpu, cpus);
- if (!idle_cpu(cpu))
+ if (!available_idle_cpu(cpu))
idle = false;
}
@@ -6340,7 +6340,7 @@ static int select_idle_smt(struct task_struct *p, struct sched_domain *sd, int t
for_each_cpu(cpu, cpu_smt_mask(target)) {
if (!cpumask_test_cpu(cpu, &p->cpus_allowed))
continue;
- if (idle_cpu(cpu))
+ if (available_idle_cpu(cpu))
return cpu;
}
@@ -6403,7 +6403,7 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t
return -1;
if (!cpumask_test_cpu(cpu, &p->cpus_allowed))
continue;
- if (idle_cpu(cpu))
+ if (available_idle_cpu(cpu))
break;
}
@@ -6423,13 +6423,13 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
struct sched_domain *sd;
int i, recent_used_cpu;
- if (idle_cpu(target))
+ if (available_idle_cpu(target))
return target;
/*
* If the previous CPU is cache affine and idle, don't be stupid:
*/
- if (prev != target && cpus_share_cache(prev, target) && idle_cpu(prev))
+ if (prev != target && cpus_share_cache(prev, target) && available_idle_cpu(prev))
return prev;
/* Check a recently used CPU as a potential idle candidate: */
@@ -6437,7 +6437,7 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
if (recent_used_cpu != prev &&
recent_used_cpu != target &&
cpus_share_cache(recent_used_cpu, target) &&
- idle_cpu(recent_used_cpu) &&
+ available_idle_cpu(recent_used_cpu) &&
cpumask_test_cpu(p->recent_used_cpu, &p->cpus_allowed)) {
/*
* Replace recent_used_cpu with prev as it is a potential
@@ -6613,7 +6613,7 @@ static int wake_cap(struct task_struct *p, int cpu, int prev_cpu)
static int
select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_flags)
{
- struct sched_domain *tmp, *affine_sd = NULL, *sd = NULL;
+ struct sched_domain *tmp, *sd = NULL;
int cpu = smp_processor_id();
int new_cpu = prev_cpu;
int want_affine = 0;
@@ -6636,7 +6636,10 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f
*/
if (want_affine && (tmp->flags & SD_WAKE_AFFINE) &&
cpumask_test_cpu(prev_cpu, sched_domain_span(tmp))) {
- affine_sd = tmp;
+ if (cpu != prev_cpu)
+ new_cpu = wake_affine(tmp, p, cpu, prev_cpu, sync);
+
+ sd = NULL; /* Prefer wake_affine over balance flags */
break;
}
@@ -6646,33 +6649,16 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f
break;
}
- if (affine_sd) {
- sd = NULL; /* Prefer wake_affine over balance flags */
- if (cpu == prev_cpu)
- goto pick_cpu;
-
- new_cpu = wake_affine(affine_sd, p, cpu, prev_cpu, sync);
- }
-
- if (sd && !(sd_flag & SD_BALANCE_FORK)) {
- /*
- * We're going to need the task's util for capacity_spare_wake
- * in find_idlest_group. Sync it up to prev_cpu's
- * last_update_time.
- */
- sync_entity_load_avg(&p->se);
- }
+ if (unlikely(sd)) {
+ /* Slow path */
+ new_cpu = find_idlest_cpu(sd, p, cpu, prev_cpu, sd_flag);
+ } else if (sd_flag & SD_BALANCE_WAKE) { /* XXX always ? */
+ /* Fast path */
- if (!sd) {
-pick_cpu:
- if (sd_flag & SD_BALANCE_WAKE) { /* XXX always ? */
- new_cpu = select_idle_sibling(p, prev_cpu, new_cpu);
+ new_cpu = select_idle_sibling(p, prev_cpu, new_cpu);
- if (want_affine)
- current->recent_used_cpu = cpu;
- }
- } else {
- new_cpu = find_idlest_cpu(sd, p, cpu, prev_cpu, sd_flag);
+ if (want_affine)
+ current->recent_used_cpu = cpu;
}
rcu_read_unlock();
@@ -9847,6 +9833,7 @@ static int idle_balance(struct rq *this_rq, struct rq_flags *rf)
if (curr_cost > this_rq->max_idle_balance_cost)
this_rq->max_idle_balance_cost = curr_cost;
+out:
/*
* While browsing the domains, we released the rq lock, a task could
* have been enqueued in the meantime. Since we're not going idle,
@@ -9855,7 +9842,6 @@ static int idle_balance(struct rq *this_rq, struct rq_flags *rf)
if (this_rq->cfs.h_nr_running && !pulled_task)
pulled_task = 1;
-out:
/* Move the next balance forward */
if (time_after(this_rq->next_balance, next_balance))
this_rq->next_balance = next_balance;
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 7aef6b4e885a..ef3c4e6f5345 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -2701,8 +2701,6 @@ int sched_rr_handler(struct ctl_table *table, int write,
}
#ifdef CONFIG_SCHED_DEBUG
-extern void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq);
-
void print_rt_stats(struct seq_file *m, int cpu)
{
rt_rq_iter_t iter;
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 15750c222ca2..6601baf2361c 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -983,7 +983,7 @@ static inline void rq_clock_skip_update(struct rq *rq)
}
/*
- * See rt task throttoling, which is the only time a skip
+ * See rt task throttling, which is the only time a skip
* request is cancelled.
*/
static inline void rq_clock_cancel_skipupdate(struct rq *rq)
@@ -1069,6 +1069,12 @@ enum numa_faults_stats {
extern void sched_setnuma(struct task_struct *p, int node);
extern int migrate_task_to(struct task_struct *p, int cpu);
extern int migrate_swap(struct task_struct *, struct task_struct *);
+extern void init_numa_balancing(unsigned long clone_flags, struct task_struct *p);
+#else
+static inline void
+init_numa_balancing(unsigned long clone_flags, struct task_struct *p)
+{
+}
#endif /* CONFIG_NUMA_BALANCING */
#ifdef CONFIG_SMP
@@ -2025,8 +2031,9 @@ extern bool sched_debug_enabled;
extern void print_cfs_stats(struct seq_file *m, int cpu);
extern void print_rt_stats(struct seq_file *m, int cpu);
extern void print_dl_stats(struct seq_file *m, int cpu);
-extern void
-print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq);
+extern void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq);
+extern void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq);
+extern void print_dl_rq(struct seq_file *m, int cpu, struct dl_rq *dl_rq);
#ifdef CONFIG_NUMA_BALANCING
extern void
show_numa_stats(struct task_struct *p, struct seq_file *m);
diff --git a/kernel/sched/stats.c b/kernel/sched/stats.c
index ab112cbfd7c8..750fb3c67eed 100644
--- a/kernel/sched/stats.c
+++ b/kernel/sched/stats.c
@@ -120,22 +120,9 @@ static const struct seq_operations schedstat_sops = {
.show = show_schedstat,
};
-static int schedstat_open(struct inode *inode, struct file *file)
-{
- return seq_open(file, &schedstat_sops);
-}
-
-static const struct file_operations proc_schedstat_operations = {
- .open = schedstat_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
static int __init proc_schedstat_init(void)
{
- proc_create("schedstat", 0, NULL, &proc_schedstat_operations);
-
+ proc_create_seq("schedstat", 0, NULL, &schedstat_sops);
return 0;
}
subsys_initcall(proc_schedstat_init);
diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
index 64cc564f5255..61a1125c1ae4 100644
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -1708,7 +1708,7 @@ build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *att
rcu_read_unlock();
if (rq && sched_debug_enabled) {
- pr_info("span: %*pbl (max cpu_capacity = %lu)\n",
+ pr_info("root domain span: %*pbl (max cpu_capacity = %lu)\n",
cpumask_pr_args(cpu_map), rq->rd->max_cpu_capacity);
}
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index dc77548167ef..e691d9a6c58d 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -19,6 +19,8 @@
#include <linux/compat.h>
#include <linux/coredump.h>
#include <linux/kmemleak.h>
+#include <linux/nospec.h>
+#include <linux/prctl.h>
#include <linux/sched.h>
#include <linux/sched/task_stack.h>
#include <linux/seccomp.h>
@@ -227,8 +229,11 @@ static inline bool seccomp_may_assign_mode(unsigned long seccomp_mode)
return true;
}
+void __weak arch_seccomp_spec_mitigate(struct task_struct *task) { }
+
static inline void seccomp_assign_mode(struct task_struct *task,
- unsigned long seccomp_mode)
+ unsigned long seccomp_mode,
+ unsigned long flags)
{
assert_spin_locked(&task->sighand->siglock);
@@ -238,6 +243,9 @@ static inline void seccomp_assign_mode(struct task_struct *task,
* filter) is set.
*/
smp_mb__before_atomic();
+ /* Assume default seccomp processes want spec flaw mitigation. */
+ if ((flags & SECCOMP_FILTER_FLAG_SPEC_ALLOW) == 0)
+ arch_seccomp_spec_mitigate(task);
set_tsk_thread_flag(task, TIF_SECCOMP);
}
@@ -305,7 +313,7 @@ static inline pid_t seccomp_can_sync_threads(void)
* without dropping the locks.
*
*/
-static inline void seccomp_sync_threads(void)
+static inline void seccomp_sync_threads(unsigned long flags)
{
struct task_struct *thread, *caller;
@@ -346,7 +354,8 @@ static inline void seccomp_sync_threads(void)
* allow one thread to transition the other.
*/
if (thread->seccomp.mode == SECCOMP_MODE_DISABLED)
- seccomp_assign_mode(thread, SECCOMP_MODE_FILTER);
+ seccomp_assign_mode(thread, SECCOMP_MODE_FILTER,
+ flags);
}
}
@@ -469,7 +478,7 @@ static long seccomp_attach_filter(unsigned int flags,
/* Now that the new filter is in place, synchronize to all threads. */
if (flags & SECCOMP_FILTER_FLAG_TSYNC)
- seccomp_sync_threads();
+ seccomp_sync_threads(flags);
return 0;
}
@@ -818,7 +827,7 @@ static long seccomp_set_mode_strict(void)
#ifdef TIF_NOTSC
disable_TSC();
#endif
- seccomp_assign_mode(current, seccomp_mode);
+ seccomp_assign_mode(current, seccomp_mode, 0);
ret = 0;
out:
@@ -876,7 +885,7 @@ static long seccomp_set_mode_filter(unsigned int flags,
/* Do not free the successfully attached filter. */
prepared = NULL;
- seccomp_assign_mode(current, seccomp_mode);
+ seccomp_assign_mode(current, seccomp_mode, flags);
out:
spin_unlock_irq(&current->sighand->siglock);
if (flags & SECCOMP_FILTER_FLAG_TSYNC)
diff --git a/kernel/signal.c b/kernel/signal.c
index d4ccea599692..0f865d67415d 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1539,7 +1539,6 @@ int send_sig_fault(int sig, int code, void __user *addr
return send_sig_info(info.si_signo, &info, t);
}
-#if defined(BUS_MCEERR_AO) && defined(BUS_MCEERR_AR)
int force_sig_mceerr(int code, void __user *addr, short lsb, struct task_struct *t)
{
struct siginfo info;
@@ -1568,9 +1567,7 @@ int send_sig_mceerr(int code, void __user *addr, short lsb, struct task_struct *
return send_sig_info(info.si_signo, &info, t);
}
EXPORT_SYMBOL(send_sig_mceerr);
-#endif
-#ifdef SEGV_BNDERR
int force_sig_bnderr(void __user *addr, void __user *lower, void __user *upper)
{
struct siginfo info;
@@ -1584,7 +1581,6 @@ int force_sig_bnderr(void __user *addr, void __user *lower, void __user *upper)
info.si_upper = upper;
return force_sig_info(info.si_signo, &info, current);
}
-#endif
#ifdef SEGV_PKUERR
int force_sig_pkuerr(void __user *addr, u32 pkey)
@@ -1961,14 +1957,27 @@ static void ptrace_stop(int exit_code, int why, int clear_code, siginfo_t *info)
return;
}
+ set_special_state(TASK_TRACED);
+
/*
* We're committing to trapping. TRACED should be visible before
* TRAPPING is cleared; otherwise, the tracer might fail do_wait().
* Also, transition to TRACED and updates to ->jobctl should be
* atomic with respect to siglock and should be done after the arch
* hook as siglock is released and regrabbed across it.
+ *
+ * TRACER TRACEE
+ *
+ * ptrace_attach()
+ * [L] wait_on_bit(JOBCTL_TRAPPING) [S] set_special_state(TRACED)
+ * do_wait()
+ * set_current_state() smp_wmb();
+ * ptrace_do_wait()
+ * wait_task_stopped()
+ * task_stopped_code()
+ * [L] task_is_traced() [S] task_clear_jobctl_trapping();
*/
- set_current_state(TASK_TRACED);
+ smp_wmb();
current->last_siginfo = info;
current->exit_code = exit_code;
@@ -2176,7 +2185,7 @@ static bool do_signal_stop(int signr)
if (task_participate_group_stop(current))
notify = CLD_STOPPED;
- __set_current_state(TASK_STOPPED);
+ set_special_state(TASK_STOPPED);
spin_unlock_irq(&current->sighand->siglock);
/*
@@ -2824,8 +2833,19 @@ enum siginfo_layout siginfo_layout(int sig, int si_code)
[SIGPOLL] = { NSIGPOLL, SIL_POLL },
[SIGSYS] = { NSIGSYS, SIL_SYS },
};
- if ((sig < ARRAY_SIZE(filter)) && (si_code <= filter[sig].limit))
+ if ((sig < ARRAY_SIZE(filter)) && (si_code <= filter[sig].limit)) {
layout = filter[sig].layout;
+ /* Handle the exceptions */
+ if ((sig == SIGBUS) &&
+ (si_code >= BUS_MCEERR_AR) && (si_code <= BUS_MCEERR_AO))
+ layout = SIL_FAULT_MCEERR;
+ else if ((sig == SIGSEGV) && (si_code == SEGV_BNDERR))
+ layout = SIL_FAULT_BNDERR;
+#ifdef SEGV_PKUERR
+ else if ((sig == SIGSEGV) && (si_code == SEGV_PKUERR))
+ layout = SIL_FAULT_PKUERR;
+#endif
+ }
else if (si_code <= NSIGPOLL)
layout = SIL_POLL;
} else {
@@ -2835,104 +2855,15 @@ enum siginfo_layout siginfo_layout(int sig, int si_code)
layout = SIL_POLL;
else if (si_code < 0)
layout = SIL_RT;
- /* Tests to support buggy kernel ABIs */
-#ifdef TRAP_FIXME
- if ((sig == SIGTRAP) && (si_code == TRAP_FIXME))
- layout = SIL_FAULT;
-#endif
-#ifdef FPE_FIXME
- if ((sig == SIGFPE) && (si_code == FPE_FIXME))
- layout = SIL_FAULT;
-#endif
}
return layout;
}
int copy_siginfo_to_user(siginfo_t __user *to, const siginfo_t *from)
{
- int err;
-
- if (!access_ok (VERIFY_WRITE, to, sizeof(siginfo_t)))
+ if (copy_to_user(to, from , sizeof(struct siginfo)))
return -EFAULT;
- if (from->si_code < 0)
- return __copy_to_user(to, from, sizeof(siginfo_t))
- ? -EFAULT : 0;
- /*
- * If you change siginfo_t structure, please be sure
- * this code is fixed accordingly.
- * Please remember to update the signalfd_copyinfo() function
- * inside fs/signalfd.c too, in case siginfo_t changes.
- * It should never copy any pad contained in the structure
- * to avoid security leaks, but must copy the generic
- * 3 ints plus the relevant union member.
- */
- err = __put_user(from->si_signo, &to->si_signo);
- err |= __put_user(from->si_errno, &to->si_errno);
- err |= __put_user(from->si_code, &to->si_code);
- switch (siginfo_layout(from->si_signo, from->si_code)) {
- case SIL_KILL:
- err |= __put_user(from->si_pid, &to->si_pid);
- err |= __put_user(from->si_uid, &to->si_uid);
- break;
- case SIL_TIMER:
- /* Unreached SI_TIMER is negative */
- break;
- case SIL_POLL:
- err |= __put_user(from->si_band, &to->si_band);
- err |= __put_user(from->si_fd, &to->si_fd);
- break;
- case SIL_FAULT:
- err |= __put_user(from->si_addr, &to->si_addr);
-#ifdef __ARCH_SI_TRAPNO
- err |= __put_user(from->si_trapno, &to->si_trapno);
-#endif
-#ifdef __ia64__
- err |= __put_user(from->si_imm, &to->si_imm);
- err |= __put_user(from->si_flags, &to->si_flags);
- err |= __put_user(from->si_isr, &to->si_isr);
-#endif
- /*
- * Other callers might not initialize the si_lsb field,
- * so check explicitly for the right codes here.
- */
-#ifdef BUS_MCEERR_AR
- if (from->si_signo == SIGBUS && from->si_code == BUS_MCEERR_AR)
- err |= __put_user(from->si_addr_lsb, &to->si_addr_lsb);
-#endif
-#ifdef BUS_MCEERR_AO
- if (from->si_signo == SIGBUS && from->si_code == BUS_MCEERR_AO)
- err |= __put_user(from->si_addr_lsb, &to->si_addr_lsb);
-#endif
-#ifdef SEGV_BNDERR
- if (from->si_signo == SIGSEGV && from->si_code == SEGV_BNDERR) {
- err |= __put_user(from->si_lower, &to->si_lower);
- err |= __put_user(from->si_upper, &to->si_upper);
- }
-#endif
-#ifdef SEGV_PKUERR
- if (from->si_signo == SIGSEGV && from->si_code == SEGV_PKUERR)
- err |= __put_user(from->si_pkey, &to->si_pkey);
-#endif
- break;
- case SIL_CHLD:
- err |= __put_user(from->si_pid, &to->si_pid);
- err |= __put_user(from->si_uid, &to->si_uid);
- err |= __put_user(from->si_status, &to->si_status);
- err |= __put_user(from->si_utime, &to->si_utime);
- err |= __put_user(from->si_stime, &to->si_stime);
- break;
- case SIL_RT:
- err |= __put_user(from->si_pid, &to->si_pid);
- err |= __put_user(from->si_uid, &to->si_uid);
- err |= __put_user(from->si_ptr, &to->si_ptr);
- break;
- case SIL_SYS:
- err |= __put_user(from->si_call_addr, &to->si_call_addr);
- err |= __put_user(from->si_syscall, &to->si_syscall);
- err |= __put_user(from->si_arch, &to->si_arch);
- break;
- }
- return err;
+ return 0;
}
#ifdef CONFIG_COMPAT
@@ -2971,27 +2902,28 @@ int __copy_siginfo_to_user32(struct compat_siginfo __user *to,
#ifdef __ARCH_SI_TRAPNO
new.si_trapno = from->si_trapno;
#endif
-#ifdef BUS_MCEERR_AR
- if ((from->si_signo == SIGBUS) && (from->si_code == BUS_MCEERR_AR))
- new.si_addr_lsb = from->si_addr_lsb;
-#endif
-#ifdef BUS_MCEERR_AO
- if ((from->si_signo == SIGBUS) && (from->si_code == BUS_MCEERR_AO))
- new.si_addr_lsb = from->si_addr_lsb;
+ break;
+ case SIL_FAULT_MCEERR:
+ new.si_addr = ptr_to_compat(from->si_addr);
+#ifdef __ARCH_SI_TRAPNO
+ new.si_trapno = from->si_trapno;
#endif
-#ifdef SEGV_BNDERR
- if ((from->si_signo == SIGSEGV) &&
- (from->si_code == SEGV_BNDERR)) {
- new.si_lower = ptr_to_compat(from->si_lower);
- new.si_upper = ptr_to_compat(from->si_upper);
- }
+ new.si_addr_lsb = from->si_addr_lsb;
+ break;
+ case SIL_FAULT_BNDERR:
+ new.si_addr = ptr_to_compat(from->si_addr);
+#ifdef __ARCH_SI_TRAPNO
+ new.si_trapno = from->si_trapno;
#endif
-#ifdef SEGV_PKUERR
- if ((from->si_signo == SIGSEGV) &&
- (from->si_code == SEGV_PKUERR))
- new.si_pkey = from->si_pkey;
+ new.si_lower = ptr_to_compat(from->si_lower);
+ new.si_upper = ptr_to_compat(from->si_upper);
+ break;
+ case SIL_FAULT_PKUERR:
+ new.si_addr = ptr_to_compat(from->si_addr);
+#ifdef __ARCH_SI_TRAPNO
+ new.si_trapno = from->si_trapno;
#endif
-
+ new.si_pkey = from->si_pkey;
break;
case SIL_CHLD:
new.si_pid = from->si_pid;
@@ -3057,24 +2989,28 @@ int copy_siginfo_from_user32(struct siginfo *to,
#ifdef __ARCH_SI_TRAPNO
to->si_trapno = from.si_trapno;
#endif
-#ifdef BUS_MCEERR_AR
- if ((from.si_signo == SIGBUS) && (from.si_code == BUS_MCEERR_AR))
- to->si_addr_lsb = from.si_addr_lsb;
-#endif
-#ifdef BUS_MCEER_AO
- if ((from.si_signo == SIGBUS) && (from.si_code == BUS_MCEERR_AO))
- to->si_addr_lsb = from.si_addr_lsb;
+ break;
+ case SIL_FAULT_MCEERR:
+ to->si_addr = compat_ptr(from.si_addr);
+#ifdef __ARCH_SI_TRAPNO
+ to->si_trapno = from.si_trapno;
#endif
-#ifdef SEGV_BNDERR
- if ((from.si_signo == SIGSEGV) && (from.si_code == SEGV_BNDERR)) {
- to->si_lower = compat_ptr(from.si_lower);
- to->si_upper = compat_ptr(from.si_upper);
- }
+ to->si_addr_lsb = from.si_addr_lsb;
+ break;
+ case SIL_FAULT_BNDERR:
+ to->si_addr = compat_ptr(from.si_addr);
+#ifdef __ARCH_SI_TRAPNO
+ to->si_trapno = from.si_trapno;
#endif
-#ifdef SEGV_PKUERR
- if ((from.si_signo == SIGSEGV) && (from.si_code == SEGV_PKUERR))
- to->si_pkey = from.si_pkey;
+ to->si_lower = compat_ptr(from.si_lower);
+ to->si_upper = compat_ptr(from.si_upper);
+ break;
+ case SIL_FAULT_PKUERR:
+ to->si_addr = compat_ptr(from.si_addr);
+#ifdef __ARCH_SI_TRAPNO
+ to->si_trapno = from.si_trapno;
#endif
+ to->si_pkey = from.si_pkey;
break;
case SIL_CHLD:
to->si_pid = from.si_pid;
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 177de3640c78..de2f57fddc04 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -49,8 +49,8 @@
*/
#ifndef __ARCH_IRQ_STAT
-irq_cpustat_t irq_stat[NR_CPUS] ____cacheline_aligned;
-EXPORT_SYMBOL(irq_stat);
+DEFINE_PER_CPU_ALIGNED(irq_cpustat_t, irq_stat);
+EXPORT_PER_CPU_SYMBOL(irq_stat);
#endif
static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp;
@@ -145,8 +145,7 @@ static void __local_bh_enable(unsigned int cnt)
}
/*
- * Special-case - softirqs can safely be enabled in
- * cond_resched_softirq(), or by __do_softirq(),
+ * Special-case - softirqs can safely be enabled by __do_softirq(),
* without processing still-pending softirqs:
*/
void _local_bh_enable(void)
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index b7591261652d..f89014a2c238 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -21,6 +21,7 @@
#include <linux/smpboot.h>
#include <linux/atomic.h>
#include <linux/nmi.h>
+#include <linux/sched/wake_q.h>
/*
* Structure to determine completion condition and record errors. May
@@ -36,7 +37,7 @@ struct cpu_stop_done {
struct cpu_stopper {
struct task_struct *thread;
- spinlock_t lock;
+ raw_spinlock_t lock;
bool enabled; /* is this stopper enabled? */
struct list_head works; /* list of pending works */
@@ -65,26 +66,30 @@ static void cpu_stop_signal_done(struct cpu_stop_done *done)
}
static void __cpu_stop_queue_work(struct cpu_stopper *stopper,
- struct cpu_stop_work *work)
+ struct cpu_stop_work *work,
+ struct wake_q_head *wakeq)
{
list_add_tail(&work->list, &stopper->works);
- wake_up_process(stopper->thread);
+ wake_q_add(wakeq, stopper->thread);
}
/* queue @work to @stopper. if offline, @work is completed immediately */
static bool cpu_stop_queue_work(unsigned int cpu, struct cpu_stop_work *work)
{
struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
+ DEFINE_WAKE_Q(wakeq);
unsigned long flags;
bool enabled;
- spin_lock_irqsave(&stopper->lock, flags);
+ raw_spin_lock_irqsave(&stopper->lock, flags);
enabled = stopper->enabled;
if (enabled)
- __cpu_stop_queue_work(stopper, work);
+ __cpu_stop_queue_work(stopper, work, &wakeq);
else if (work->done)
cpu_stop_signal_done(work->done);
- spin_unlock_irqrestore(&stopper->lock, flags);
+ raw_spin_unlock_irqrestore(&stopper->lock, flags);
+
+ wake_up_q(&wakeq);
return enabled;
}
@@ -229,10 +234,11 @@ static int cpu_stop_queue_two_works(int cpu1, struct cpu_stop_work *work1,
{
struct cpu_stopper *stopper1 = per_cpu_ptr(&cpu_stopper, cpu1);
struct cpu_stopper *stopper2 = per_cpu_ptr(&cpu_stopper, cpu2);
+ DEFINE_WAKE_Q(wakeq);
int err;
retry:
- spin_lock_irq(&stopper1->lock);
- spin_lock_nested(&stopper2->lock, SINGLE_DEPTH_NESTING);
+ raw_spin_lock_irq(&stopper1->lock);
+ raw_spin_lock_nested(&stopper2->lock, SINGLE_DEPTH_NESTING);
err = -ENOENT;
if (!stopper1->enabled || !stopper2->enabled)
@@ -252,17 +258,20 @@ retry:
goto unlock;
err = 0;
- __cpu_stop_queue_work(stopper1, work1);
- __cpu_stop_queue_work(stopper2, work2);
+ __cpu_stop_queue_work(stopper1, work1, &wakeq);
+ __cpu_stop_queue_work(stopper2, work2, &wakeq);
unlock:
- spin_unlock(&stopper2->lock);
- spin_unlock_irq(&stopper1->lock);
+ raw_spin_unlock(&stopper2->lock);
+ raw_spin_unlock_irq(&stopper1->lock);
if (unlikely(err == -EDEADLK)) {
while (stop_cpus_in_progress)
cpu_relax();
goto retry;
}
+
+ wake_up_q(&wakeq);
+
return err;
}
/**
@@ -448,9 +457,9 @@ static int cpu_stop_should_run(unsigned int cpu)
unsigned long flags;
int run;
- spin_lock_irqsave(&stopper->lock, flags);
+ raw_spin_lock_irqsave(&stopper->lock, flags);
run = !list_empty(&stopper->works);
- spin_unlock_irqrestore(&stopper->lock, flags);
+ raw_spin_unlock_irqrestore(&stopper->lock, flags);
return run;
}
@@ -461,13 +470,13 @@ static void cpu_stopper_thread(unsigned int cpu)
repeat:
work = NULL;
- spin_lock_irq(&stopper->lock);
+ raw_spin_lock_irq(&stopper->lock);
if (!list_empty(&stopper->works)) {
work = list_first_entry(&stopper->works,
struct cpu_stop_work, list);
list_del_init(&work->list);
}
- spin_unlock_irq(&stopper->lock);
+ raw_spin_unlock_irq(&stopper->lock);
if (work) {
cpu_stop_fn_t fn = work->fn;
@@ -541,7 +550,7 @@ static int __init cpu_stop_init(void)
for_each_possible_cpu(cpu) {
struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
- spin_lock_init(&stopper->lock);
+ raw_spin_lock_init(&stopper->lock);
INIT_LIST_HEAD(&stopper->works);
}
diff --git a/kernel/sys.c b/kernel/sys.c
index ad692183dfe9..d1b2b8d934bb 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -61,6 +61,8 @@
#include <linux/uidgid.h>
#include <linux/cred.h>
+#include <linux/nospec.h>
+
#include <linux/kmsg_dump.h>
/* Move somewhere else to avoid recompiling? */
#include <generated/utsrelease.h>
@@ -69,6 +71,9 @@
#include <asm/io.h>
#include <asm/unistd.h>
+/* Hardening for Spectre-v1 */
+#include <linux/nospec.h>
+
#include "uid16.h"
#ifndef SET_UNALIGN_CTL
@@ -1451,6 +1456,7 @@ SYSCALL_DEFINE2(old_getrlimit, unsigned int, resource,
if (resource >= RLIM_NLIMITS)
return -EINVAL;
+ resource = array_index_nospec(resource, RLIM_NLIMITS);
task_lock(current->group_leader);
x = current->signal->rlim[resource];
task_unlock(current->group_leader);
@@ -1470,6 +1476,7 @@ COMPAT_SYSCALL_DEFINE2(old_getrlimit, unsigned int, resource,
if (resource >= RLIM_NLIMITS)
return -EINVAL;
+ resource = array_index_nospec(resource, RLIM_NLIMITS);
task_lock(current->group_leader);
r = current->signal->rlim[resource];
task_unlock(current->group_leader);
@@ -2242,6 +2249,17 @@ static int propagate_has_child_subreaper(struct task_struct *p, void *data)
return 1;
}
+int __weak arch_prctl_spec_ctrl_get(struct task_struct *t, unsigned long which)
+{
+ return -EINVAL;
+}
+
+int __weak arch_prctl_spec_ctrl_set(struct task_struct *t, unsigned long which,
+ unsigned long ctrl)
+{
+ return -EINVAL;
+}
+
SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
unsigned long, arg4, unsigned long, arg5)
{
@@ -2450,6 +2468,16 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
case PR_SVE_GET_VL:
error = SVE_GET_VL();
break;
+ case PR_GET_SPECULATION_CTRL:
+ if (arg3 || arg4 || arg5)
+ return -EINVAL;
+ error = arch_prctl_spec_ctrl_get(me, arg2);
+ break;
+ case PR_SET_SPECULATION_CTRL:
+ if (arg4 || arg5)
+ return -EINVAL;
+ error = arch_prctl_spec_ctrl_set(me, arg2, arg3);
+ break;
default:
error = -EINVAL;
break;
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 9791364925dc..183169c2a75b 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -43,7 +43,9 @@ COND_SYSCALL(io_submit);
COND_SYSCALL_COMPAT(io_submit);
COND_SYSCALL(io_cancel);
COND_SYSCALL(io_getevents);
+COND_SYSCALL(io_pgetevents);
COND_SYSCALL_COMPAT(io_getevents);
+COND_SYSCALL_COMPAT(io_pgetevents);
/* fs/xattr.c */
diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index b398c2ea69b2..aa2094d5dd27 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -612,6 +612,14 @@ static void tick_handle_oneshot_broadcast(struct clock_event_device *dev)
now = ktime_get();
/* Find all expired events */
for_each_cpu(cpu, tick_broadcast_oneshot_mask) {
+ /*
+ * Required for !SMP because for_each_cpu() reports
+ * unconditionally CPU0 as set on UP kernels.
+ */
+ if (!IS_ENABLED(CONFIG_SMP) &&
+ cpumask_empty(tick_broadcast_oneshot_mask))
+ break;
+
td = &per_cpu(tick_cpu_device, cpu);
if (td->evtdev->next_event <= now) {
cpumask_set_cpu(cpu, tmpmask);
diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c
index ddc1d58f96c7..d647dabdac97 100644
--- a/kernel/time/timer_list.c
+++ b/kernel/time/timer_list.c
@@ -370,24 +370,12 @@ static const struct seq_operations timer_list_sops = {
.show = timer_list_show,
};
-static int timer_list_open(struct inode *inode, struct file *filp)
-{
- return seq_open_private(filp, &timer_list_sops,
- sizeof(struct timer_list_iter));
-}
-
-static const struct file_operations timer_list_fops = {
- .open = timer_list_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release_private,
-};
-
static int __init init_timer_list_procfs(void)
{
struct proc_dir_entry *pe;
- pe = proc_create("timer_list", 0400, NULL, &timer_list_fops);
+ pe = proc_create_seq_private("timer_list", 0400, NULL, &timer_list_sops,
+ sizeof(struct timer_list_iter), NULL);
if (!pe)
return -ENOMEM;
return 0;
diff --git a/kernel/torture.c b/kernel/torture.c
index 37b94012a3f8..3de1efbecd6a 100644
--- a/kernel/torture.c
+++ b/kernel/torture.c
@@ -574,7 +574,7 @@ void stutter_wait(const char *title)
{
int spt;
- cond_resched_rcu_qs();
+ cond_resched_tasks_rcu_qs();
spt = READ_ONCE(stutter_pause_test);
for (; spt; spt = READ_ONCE(stutter_pause_test)) {
if (spt == 1) {
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 16bbf062018f..8d83bcf9ef69 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -5514,10 +5514,10 @@ static __init int ftrace_init_dyn_tracefs(struct dentry *d_tracer)
ftrace_create_filter_files(&global_ops, d_tracer);
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
- trace_create_file("set_graph_function", 0444, d_tracer,
+ trace_create_file("set_graph_function", 0644, d_tracer,
NULL,
&ftrace_graph_fops);
- trace_create_file("set_graph_notrace", 0444, d_tracer,
+ trace_create_file("set_graph_notrace", 0644, d_tracer,
NULL,
&ftrace_graph_notrace_fops);
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 414d7210b2ec..bcd93031d042 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -893,7 +893,7 @@ int __trace_bputs(unsigned long ip, const char *str)
EXPORT_SYMBOL_GPL(__trace_bputs);
#ifdef CONFIG_TRACER_SNAPSHOT
-static void tracing_snapshot_instance(struct trace_array *tr)
+void tracing_snapshot_instance(struct trace_array *tr)
{
struct tracer *tracer = tr->current_trace;
unsigned long flags;
@@ -949,7 +949,7 @@ static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
struct trace_buffer *size_buf, int cpu_id);
static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
-static int alloc_snapshot(struct trace_array *tr)
+int tracing_alloc_snapshot_instance(struct trace_array *tr)
{
int ret;
@@ -995,7 +995,7 @@ int tracing_alloc_snapshot(void)
struct trace_array *tr = &global_trace;
int ret;
- ret = alloc_snapshot(tr);
+ ret = tracing_alloc_snapshot_instance(tr);
WARN_ON(ret < 0);
return ret;
@@ -5408,7 +5408,7 @@ static int tracing_set_tracer(struct trace_array *tr, const char *buf)
#ifdef CONFIG_TRACER_MAX_TRACE
if (t->use_max_tr && !had_max_tr) {
- ret = alloc_snapshot(tr);
+ ret = tracing_alloc_snapshot_instance(tr);
if (ret < 0)
goto out;
}
@@ -6451,7 +6451,7 @@ tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
}
#endif
if (!tr->allocated_snapshot) {
- ret = alloc_snapshot(tr);
+ ret = tracing_alloc_snapshot_instance(tr);
if (ret < 0)
break;
}
@@ -7179,7 +7179,7 @@ ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
return ret;
out_reg:
- ret = alloc_snapshot(tr);
+ ret = tracing_alloc_snapshot_instance(tr);
if (ret < 0)
goto out;
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 6fb46a06c9dc..507954b4e058 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -1817,6 +1817,17 @@ static inline void __init trace_event_init(void) { }
static inline void trace_event_eval_update(struct trace_eval_map **map, int len) { }
#endif
+#ifdef CONFIG_TRACER_SNAPSHOT
+void tracing_snapshot_instance(struct trace_array *tr);
+int tracing_alloc_snapshot_instance(struct trace_array *tr);
+#else
+static inline void tracing_snapshot_instance(struct trace_array *tr) { }
+static inline int tracing_alloc_snapshot_instance(struct trace_array *tr)
+{
+ return 0;
+}
+#endif
+
extern struct trace_iterator *tracepoint_print_iter;
#endif /* _LINUX_KERNEL_TRACE_H */
diff --git a/kernel/trace/trace_benchmark.c b/kernel/trace/trace_benchmark.c
index 22fee766081b..80e0b2aca703 100644
--- a/kernel/trace/trace_benchmark.c
+++ b/kernel/trace/trace_benchmark.c
@@ -159,13 +159,13 @@ static int benchmark_event_kthread(void *arg)
* wants to run, schedule in, but if the CPU is idle,
* we'll keep burning cycles.
*
- * Note the _rcu_qs() version of cond_resched() will
+ * Note the tasks_rcu_qs() version of cond_resched() will
* notify synchronize_rcu_tasks() that this thread has
* passed a quiescent state for rcu_tasks. Otherwise
* this thread will never voluntarily schedule which would
* block synchronize_rcu_tasks() indefinitely.
*/
- cond_resched();
+ cond_resched_tasks_rcu_qs();
}
return 0;
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index 1f951b3df60c..7d306b74230f 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -762,6 +762,9 @@ static int regex_match_full(char *str, struct regex *r, int len)
static int regex_match_front(char *str, struct regex *r, int len)
{
+ if (len < r->len)
+ return 0;
+
if (strncmp(str, r->pattern, r->len) == 0)
return 1;
return 0;
diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
index 0d7b3ffbecc2..b9061ed59bbd 100644
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -2466,6 +2466,7 @@ parse_field(struct hist_trigger_data *hist_data, struct trace_event_file *file,
else if (strcmp(modifier, "usecs") == 0)
*flags |= HIST_FIELD_FL_TIMESTAMP_USECS;
else {
+ hist_err("Invalid field modifier: ", modifier);
field = ERR_PTR(-EINVAL);
goto out;
}
@@ -2481,6 +2482,7 @@ parse_field(struct hist_trigger_data *hist_data, struct trace_event_file *file,
else {
field = trace_find_event_field(file->event_call, field_name);
if (!field || !field->size) {
+ hist_err("Couldn't find field: ", field_name);
field = ERR_PTR(-EINVAL);
goto out;
}
@@ -4913,6 +4915,16 @@ static void hist_field_print(struct seq_file *m, struct hist_field *hist_field)
seq_printf(m, "%s", field_name);
} else if (hist_field->flags & HIST_FIELD_FL_TIMESTAMP)
seq_puts(m, "common_timestamp");
+
+ if (hist_field->flags) {
+ if (!(hist_field->flags & HIST_FIELD_FL_VAR_REF) &&
+ !(hist_field->flags & HIST_FIELD_FL_EXPR)) {
+ const char *flags = get_hist_field_flags(hist_field);
+
+ if (flags)
+ seq_printf(m, ".%s", flags);
+ }
+ }
}
static int event_hist_trigger_print(struct seq_file *m,
diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c
index d251cabcf69a..8b5bdcf64871 100644
--- a/kernel/trace/trace_events_trigger.c
+++ b/kernel/trace/trace_events_trigger.c
@@ -483,9 +483,10 @@ clear_event_triggers(struct trace_array *tr)
struct trace_event_file *file;
list_for_each_entry(file, &tr->events, list) {
- struct event_trigger_data *data;
- list_for_each_entry_rcu(data, &file->triggers, list) {
+ struct event_trigger_data *data, *n;
+ list_for_each_entry_safe(data, n, &file->triggers, list) {
trace_event_trigger_enable_disable(file, 0);
+ list_del_rcu(&data->list);
if (data->ops->free)
data->ops->free(data->ops, data);
}
@@ -642,6 +643,7 @@ event_trigger_callback(struct event_command *cmd_ops,
trigger_data->count = -1;
trigger_data->ops = trigger_ops;
trigger_data->cmd_ops = cmd_ops;
+ trigger_data->private_data = file;
INIT_LIST_HEAD(&trigger_data->list);
INIT_LIST_HEAD(&trigger_data->named_list);
@@ -1053,7 +1055,12 @@ static void
snapshot_trigger(struct event_trigger_data *data, void *rec,
struct ring_buffer_event *event)
{
- tracing_snapshot();
+ struct trace_event_file *file = data->private_data;
+
+ if (file)
+ tracing_snapshot_instance(file->tr);
+ else
+ tracing_snapshot();
}
static void
@@ -1076,7 +1083,7 @@ register_snapshot_trigger(char *glob, struct event_trigger_ops *ops,
{
int ret = register_trigger(glob, ops, data, file);
- if (ret > 0 && tracing_alloc_snapshot() != 0) {
+ if (ret > 0 && tracing_alloc_snapshot_instance(file->tr) != 0) {
unregister_trigger(glob, ops, data, file);
ret = 0;
}
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index 3c7bfc4bf5e9..4237eba4ef20 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -472,7 +472,7 @@ static __init int stack_trace_init(void)
NULL, &stack_trace_fops);
#ifdef CONFIG_DYNAMIC_FTRACE
- trace_create_file("stack_trace_filter", 0444, d_tracer,
+ trace_create_file("stack_trace_filter", 0644, d_tracer,
&trace_ops, &stack_trace_filter_fops);
#endif
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
index 34fd0e0ec51d..ac892878dbe6 100644
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c
@@ -55,6 +55,7 @@ struct trace_uprobe {
struct list_head list;
struct trace_uprobe_filter filter;
struct uprobe_consumer consumer;
+ struct path path;
struct inode *inode;
char *filename;
unsigned long offset;
@@ -289,7 +290,7 @@ static void free_trace_uprobe(struct trace_uprobe *tu)
for (i = 0; i < tu->tp.nr_args; i++)
traceprobe_free_probe_arg(&tu->tp.args[i]);
- iput(tu->inode);
+ path_put(&tu->path);
kfree(tu->tp.call.class->system);
kfree(tu->tp.call.name);
kfree(tu->filename);
@@ -363,7 +364,6 @@ end:
static int create_trace_uprobe(int argc, char **argv)
{
struct trace_uprobe *tu;
- struct inode *inode;
char *arg, *event, *group, *filename;
char buf[MAX_EVENT_NAME_LEN];
struct path path;
@@ -371,7 +371,6 @@ static int create_trace_uprobe(int argc, char **argv)
bool is_delete, is_return;
int i, ret;
- inode = NULL;
ret = 0;
is_delete = false;
is_return = false;
@@ -437,21 +436,16 @@ static int create_trace_uprobe(int argc, char **argv)
}
/* Find the last occurrence, in case the path contains ':' too. */
arg = strrchr(argv[1], ':');
- if (!arg) {
- ret = -EINVAL;
- goto fail_address_parse;
- }
+ if (!arg)
+ return -EINVAL;
*arg++ = '\0';
filename = argv[1];
ret = kern_path(filename, LOOKUP_FOLLOW, &path);
if (ret)
- goto fail_address_parse;
-
- inode = igrab(d_real_inode(path.dentry));
- path_put(&path);
+ return ret;
- if (!inode || !S_ISREG(inode->i_mode)) {
+ if (!d_is_reg(path.dentry)) {
ret = -EINVAL;
goto fail_address_parse;
}
@@ -490,7 +484,7 @@ static int create_trace_uprobe(int argc, char **argv)
goto fail_address_parse;
}
tu->offset = offset;
- tu->inode = inode;
+ tu->path = path;
tu->filename = kstrdup(filename, GFP_KERNEL);
if (!tu->filename) {
@@ -558,7 +552,7 @@ error:
return ret;
fail_address_parse:
- iput(inode);
+ path_put(&path);
pr_info("Failed to parse address or file.\n");
@@ -922,6 +916,7 @@ probe_event_enable(struct trace_uprobe *tu, struct trace_event_file *file,
goto err_flags;
tu->consumer.filter = filter;
+ tu->inode = d_real_inode(tu->path.dentry);
ret = uprobe_register(tu->inode, tu->offset, &tu->consumer);
if (ret)
goto err_buffer;
@@ -967,6 +962,7 @@ probe_event_disable(struct trace_uprobe *tu, struct trace_event_file *file)
WARN_ON(!uprobe_filter_is_empty(&tu->filter));
uprobe_unregister(tu->inode, tu->offset, &tu->consumer);
+ tu->inode = NULL;
tu->tp.flags &= file ? ~TP_FLAG_TRACE : ~TP_FLAG_PROFILE;
uprobe_buffer_disable();
@@ -1337,7 +1333,6 @@ struct trace_event_call *
create_local_trace_uprobe(char *name, unsigned long offs, bool is_return)
{
struct trace_uprobe *tu;
- struct inode *inode;
struct path path;
int ret;
@@ -1345,11 +1340,8 @@ create_local_trace_uprobe(char *name, unsigned long offs, bool is_return)
if (ret)
return ERR_PTR(ret);
- inode = igrab(d_inode(path.dentry));
- path_put(&path);
-
- if (!inode || !S_ISREG(inode->i_mode)) {
- iput(inode);
+ if (!d_is_reg(path.dentry)) {
+ path_put(&path);
return ERR_PTR(-EINVAL);
}
@@ -1364,11 +1356,12 @@ create_local_trace_uprobe(char *name, unsigned long offs, bool is_return)
if (IS_ERR(tu)) {
pr_info("Failed to allocate trace_uprobe.(%d)\n",
(int)PTR_ERR(tu));
+ path_put(&path);
return ERR_CAST(tu);
}
tu->offset = offs;
- tu->inode = inode;
+ tu->path = path;
tu->filename = kstrdup(name, GFP_KERNEL);
init_trace_event_call(tu, &tu->tp.call);
diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c
index 671b13457387..1e37da2e0c25 100644
--- a/kernel/tracepoint.c
+++ b/kernel/tracepoint.c
@@ -207,7 +207,7 @@ static int tracepoint_add_func(struct tracepoint *tp,
lockdep_is_held(&tracepoints_mutex));
old = func_add(&tp_funcs, func, prio);
if (IS_ERR(old)) {
- WARN_ON_ONCE(1);
+ WARN_ON_ONCE(PTR_ERR(old) != -ENOMEM);
return PTR_ERR(old);
}
@@ -239,7 +239,7 @@ static int tracepoint_remove_func(struct tracepoint *tp,
lockdep_is_held(&tracepoints_mutex));
old = func_remove(&tp_funcs, func);
if (IS_ERR(old)) {
- WARN_ON_ONCE(1);
+ WARN_ON_ONCE(PTR_ERR(old) != -ENOMEM);
return PTR_ERR(old);
}
diff --git a/lib/Kconfig b/lib/Kconfig
index 5fe577673b98..7a913937888b 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -429,15 +429,50 @@ config SGL_ALLOC
bool
default n
+config NEED_SG_DMA_LENGTH
+ bool
+
+config NEED_DMA_MAP_STATE
+ bool
+
+config ARCH_DMA_ADDR_T_64BIT
+ def_bool 64BIT || PHYS_ADDR_T_64BIT
+
+config IOMMU_HELPER
+ bool
+
+config ARCH_HAS_SYNC_DMA_FOR_DEVICE
+ bool
+
+config ARCH_HAS_SYNC_DMA_FOR_CPU
+ bool
+ select NEED_DMA_MAP_STATE
+
config DMA_DIRECT_OPS
bool
- depends on HAS_DMA && (!64BIT || ARCH_DMA_ADDR_T_64BIT)
- default n
+ depends on HAS_DMA
+
+config DMA_NONCOHERENT_OPS
+ bool
+ depends on HAS_DMA
+ select DMA_DIRECT_OPS
+
+config DMA_NONCOHERENT_MMAP
+ bool
+ depends on DMA_NONCOHERENT_OPS
+
+config DMA_NONCOHERENT_CACHE_SYNC
+ bool
+ depends on DMA_NONCOHERENT_OPS
config DMA_VIRT_OPS
bool
- depends on HAS_DMA && (!64BIT || ARCH_DMA_ADDR_T_64BIT)
- default n
+ depends on HAS_DMA
+
+config SWIOTLB
+ bool
+ select DMA_DIRECT_OPS
+ select NEED_DMA_MAP_STATE
config CHECK_SIGNATURE
bool
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index c40c7b734cd1..76555479ae36 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1634,7 +1634,7 @@ config PROVIDE_OHCI1394_DMA_INIT
config DMA_API_DEBUG
bool "Enable debugging of DMA-API usage"
- depends on HAVE_DMA_API_DEBUG
+ select NEED_DMA_MAP_STATE
help
Enable this option to debug the use of the DMA API by device drivers.
With this option you will be able to detect common bugs in device
@@ -1651,6 +1651,23 @@ config DMA_API_DEBUG
If unsure, say N.
+config DMA_API_DEBUG_SG
+ bool "Debug DMA scatter-gather usage"
+ default y
+ depends on DMA_API_DEBUG
+ help
+ Perform extra checking that callers of dma_map_sg() have respected the
+ appropriate segment length/boundary limits for the given device when
+ preparing DMA scatterlists.
+
+ This is particularly likely to have been overlooked in cases where the
+ dma_map_sg() API is used for general bulk mapping of pages rather than
+ preparing literal scatter-gather descriptors, where there is a risk of
+ unexpected behaviour from DMA API implementations if the scatterlist
+ is technically out-of-spec.
+
+ If unsure, say N.
+
menuconfig RUNTIME_TESTING_MENU
bool "Runtime Testing"
def_bool y
diff --git a/lib/Makefile b/lib/Makefile
index ce20696d5a92..9f18c8152281 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -30,6 +30,7 @@ lib-$(CONFIG_PRINTK) += dump_stack.o
lib-$(CONFIG_MMU) += ioremap.o
lib-$(CONFIG_SMP) += cpumask.o
lib-$(CONFIG_DMA_DIRECT_OPS) += dma-direct.o
+lib-$(CONFIG_DMA_NONCOHERENT_OPS) += dma-noncoherent.o
lib-$(CONFIG_DMA_VIRT_OPS) += dma-virt.o
lib-y += kobject.o klist.o
@@ -147,7 +148,7 @@ obj-$(CONFIG_AUDIT_GENERIC) += audit.o
obj-$(CONFIG_AUDIT_COMPAT_GENERIC) += compat_audit.o
obj-$(CONFIG_SWIOTLB) += swiotlb.o
-obj-$(CONFIG_IOMMU_HELPER) += iommu-helper.o iommu-common.o
+obj-$(CONFIG_IOMMU_HELPER) += iommu-helper.o
obj-$(CONFIG_FAULT_INJECTION) += fault-inject.o
obj-$(CONFIG_NOTIFIER_ERROR_INJECTION) += notifier-error-inject.o
obj-$(CONFIG_PM_NOTIFIER_ERROR_INJECT) += pm-notifier-error-inject.o
diff --git a/lib/dma-debug.c b/lib/dma-debug.c
index 7f5cdc1e6b29..c007d25bee09 100644
--- a/lib/dma-debug.c
+++ b/lib/dma-debug.c
@@ -41,6 +41,11 @@
#define HASH_FN_SHIFT 13
#define HASH_FN_MASK (HASH_SIZE - 1)
+/* allow architectures to override this if absolutely required */
+#ifndef PREALLOC_DMA_DEBUG_ENTRIES
+#define PREALLOC_DMA_DEBUG_ENTRIES (1 << 16)
+#endif
+
enum {
dma_debug_single,
dma_debug_page,
@@ -127,7 +132,7 @@ static u32 min_free_entries;
static u32 nr_total_entries;
/* number of preallocated entries requested by kernel cmdline */
-static u32 req_entries;
+static u32 nr_prealloc_entries = PREALLOC_DMA_DEBUG_ENTRIES;
/* debugfs dentry's for the stuff above */
static struct dentry *dma_debug_dent __read_mostly;
@@ -439,7 +444,6 @@ void debug_dma_dump_mappings(struct device *dev)
spin_unlock_irqrestore(&bucket->lock, flags);
}
}
-EXPORT_SYMBOL(debug_dma_dump_mappings);
/*
* For each mapping (initial cacheline in the case of
@@ -748,7 +752,6 @@ int dma_debug_resize_entries(u32 num_entries)
return ret;
}
-EXPORT_SYMBOL(dma_debug_resize_entries);
/*
* DMA-API debugging init code
@@ -1004,10 +1007,7 @@ void dma_debug_add_bus(struct bus_type *bus)
bus_register_notifier(bus, nb);
}
-/*
- * Let the architectures decide how many entries should be preallocated.
- */
-void dma_debug_init(u32 num_entries)
+static int dma_debug_init(void)
{
int i;
@@ -1015,7 +1015,7 @@ void dma_debug_init(u32 num_entries)
* called to set dma_debug_initialized
*/
if (global_disable)
- return;
+ return 0;
for (i = 0; i < HASH_SIZE; ++i) {
INIT_LIST_HEAD(&dma_entry_hash[i].list);
@@ -1026,17 +1026,14 @@ void dma_debug_init(u32 num_entries)
pr_err("DMA-API: error creating debugfs entries - disabling\n");
global_disable = true;
- return;
+ return 0;
}
- if (req_entries)
- num_entries = req_entries;
-
- if (prealloc_memory(num_entries) != 0) {
+ if (prealloc_memory(nr_prealloc_entries) != 0) {
pr_err("DMA-API: debugging out of memory error - disabled\n");
global_disable = true;
- return;
+ return 0;
}
nr_total_entries = num_free_entries;
@@ -1044,7 +1041,9 @@ void dma_debug_init(u32 num_entries)
dma_debug_initialized = true;
pr_info("DMA-API: debugging enabled by kernel config\n");
+ return 0;
}
+core_initcall(dma_debug_init);
static __init int dma_debug_cmdline(char *str)
{
@@ -1061,16 +1060,10 @@ static __init int dma_debug_cmdline(char *str)
static __init int dma_debug_entries_cmdline(char *str)
{
- int res;
-
if (!str)
return -EINVAL;
-
- res = get_option(&str, &req_entries);
-
- if (!res)
- req_entries = 0;
-
+ if (!get_option(&str, &nr_prealloc_entries))
+ nr_prealloc_entries = PREALLOC_DMA_DEBUG_ENTRIES;
return 0;
}
@@ -1293,6 +1286,32 @@ out:
put_hash_bucket(bucket, &flags);
}
+static void check_sg_segment(struct device *dev, struct scatterlist *sg)
+{
+#ifdef CONFIG_DMA_API_DEBUG_SG
+ unsigned int max_seg = dma_get_max_seg_size(dev);
+ u64 start, end, boundary = dma_get_seg_boundary(dev);
+
+ /*
+ * Either the driver forgot to set dma_parms appropriately, or
+ * whoever generated the list forgot to check them.
+ */
+ if (sg->length > max_seg)
+ err_printk(dev, NULL, "DMA-API: mapping sg segment longer than device claims to support [len=%u] [max=%u]\n",
+ sg->length, max_seg);
+ /*
+ * In some cases this could potentially be the DMA API
+ * implementation's fault, but it would usually imply that
+ * the scatterlist was built inappropriately to begin with.
+ */
+ start = sg_dma_address(sg);
+ end = start + sg_dma_len(sg) - 1;
+ if ((start ^ end) & ~boundary)
+ err_printk(dev, NULL, "DMA-API: mapping sg segment across boundary [start=0x%016llx] [end=0x%016llx] [boundary=0x%016llx]\n",
+ start, end, boundary);
+#endif
+}
+
void debug_dma_map_page(struct device *dev, struct page *page, size_t offset,
size_t size, int direction, dma_addr_t dma_addr,
bool map_single)
@@ -1423,6 +1442,8 @@ void debug_dma_map_sg(struct device *dev, struct scatterlist *sg,
check_for_illegal_area(dev, sg_virt(s), sg_dma_len(s));
}
+ check_sg_segment(dev, s);
+
add_dma_entry(entry);
}
}
diff --git a/lib/dma-direct.c b/lib/dma-direct.c
index bbfb229aa067..8be8106270c2 100644
--- a/lib/dma-direct.c
+++ b/lib/dma-direct.c
@@ -34,6 +34,13 @@ check_addr(struct device *dev, dma_addr_t dma_addr, size_t size,
const char *caller)
{
if (unlikely(dev && !dma_capable(dev, dma_addr, size))) {
+ if (!dev->dma_mask) {
+ dev_err(dev,
+ "%s: call on device without dma_mask\n",
+ caller);
+ return false;
+ }
+
if (*dev->dma_mask >= DMA_BIT_MASK(32)) {
dev_err(dev,
"%s: overflow %pad+%zu of device mask %llx\n",
@@ -84,6 +91,13 @@ again:
__free_pages(page, page_order);
page = NULL;
+ if (IS_ENABLED(CONFIG_ZONE_DMA32) &&
+ dev->coherent_dma_mask < DMA_BIT_MASK(64) &&
+ !(gfp & (GFP_DMA32 | GFP_DMA))) {
+ gfp |= GFP_DMA32;
+ goto again;
+ }
+
if (IS_ENABLED(CONFIG_ZONE_DMA) &&
dev->coherent_dma_mask < DMA_BIT_MASK(32) &&
!(gfp & GFP_DMA)) {
@@ -121,7 +135,7 @@ void dma_direct_free(struct device *dev, size_t size, void *cpu_addr,
free_pages((unsigned long)cpu_addr, page_order);
}
-static dma_addr_t dma_direct_map_page(struct device *dev, struct page *page,
+dma_addr_t dma_direct_map_page(struct device *dev, struct page *page,
unsigned long offset, size_t size, enum dma_data_direction dir,
unsigned long attrs)
{
@@ -132,8 +146,8 @@ static dma_addr_t dma_direct_map_page(struct device *dev, struct page *page,
return dma_addr;
}
-static int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl,
- int nents, enum dma_data_direction dir, unsigned long attrs)
+int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int nents,
+ enum dma_data_direction dir, unsigned long attrs)
{
int i;
struct scatterlist *sg;
@@ -165,10 +179,16 @@ int dma_direct_supported(struct device *dev, u64 mask)
if (mask < DMA_BIT_MASK(32))
return 0;
#endif
+ /*
+ * Various PCI/PCIe bridges have broken support for > 32bit DMA even
+ * if the device itself might support it.
+ */
+ if (dev->dma_32bit_limit && mask > DMA_BIT_MASK(32))
+ return 0;
return 1;
}
-static int dma_direct_mapping_error(struct device *dev, dma_addr_t dma_addr)
+int dma_direct_mapping_error(struct device *dev, dma_addr_t dma_addr)
{
return dma_addr == DIRECT_MAPPING_ERROR;
}
@@ -180,6 +200,5 @@ const struct dma_map_ops dma_direct_ops = {
.map_sg = dma_direct_map_sg,
.dma_supported = dma_direct_supported,
.mapping_error = dma_direct_mapping_error,
- .is_phys = 1,
};
EXPORT_SYMBOL(dma_direct_ops);
diff --git a/lib/dma-noncoherent.c b/lib/dma-noncoherent.c
new file mode 100644
index 000000000000..79e9a757387f
--- /dev/null
+++ b/lib/dma-noncoherent.c
@@ -0,0 +1,102 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2018 Christoph Hellwig.
+ *
+ * DMA operations that map physical memory directly without providing cache
+ * coherence.
+ */
+#include <linux/export.h>
+#include <linux/mm.h>
+#include <linux/dma-direct.h>
+#include <linux/dma-noncoherent.h>
+#include <linux/scatterlist.h>
+
+static void dma_noncoherent_sync_single_for_device(struct device *dev,
+ dma_addr_t addr, size_t size, enum dma_data_direction dir)
+{
+ arch_sync_dma_for_device(dev, dma_to_phys(dev, addr), size, dir);
+}
+
+static void dma_noncoherent_sync_sg_for_device(struct device *dev,
+ struct scatterlist *sgl, int nents, enum dma_data_direction dir)
+{
+ struct scatterlist *sg;
+ int i;
+
+ for_each_sg(sgl, sg, nents, i)
+ arch_sync_dma_for_device(dev, sg_phys(sg), sg->length, dir);
+}
+
+static dma_addr_t dma_noncoherent_map_page(struct device *dev, struct page *page,
+ unsigned long offset, size_t size, enum dma_data_direction dir,
+ unsigned long attrs)
+{
+ dma_addr_t addr;
+
+ addr = dma_direct_map_page(dev, page, offset, size, dir, attrs);
+ if (!dma_mapping_error(dev, addr) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC))
+ arch_sync_dma_for_device(dev, page_to_phys(page) + offset,
+ size, dir);
+ return addr;
+}
+
+static int dma_noncoherent_map_sg(struct device *dev, struct scatterlist *sgl,
+ int nents, enum dma_data_direction dir, unsigned long attrs)
+{
+ nents = dma_direct_map_sg(dev, sgl, nents, dir, attrs);
+ if (nents > 0 && !(attrs & DMA_ATTR_SKIP_CPU_SYNC))
+ dma_noncoherent_sync_sg_for_device(dev, sgl, nents, dir);
+ return nents;
+}
+
+#ifdef CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU
+static void dma_noncoherent_sync_single_for_cpu(struct device *dev,
+ dma_addr_t addr, size_t size, enum dma_data_direction dir)
+{
+ arch_sync_dma_for_cpu(dev, dma_to_phys(dev, addr), size, dir);
+}
+
+static void dma_noncoherent_sync_sg_for_cpu(struct device *dev,
+ struct scatterlist *sgl, int nents, enum dma_data_direction dir)
+{
+ struct scatterlist *sg;
+ int i;
+
+ for_each_sg(sgl, sg, nents, i)
+ arch_sync_dma_for_cpu(dev, sg_phys(sg), sg->length, dir);
+}
+
+static void dma_noncoherent_unmap_page(struct device *dev, dma_addr_t addr,
+ size_t size, enum dma_data_direction dir, unsigned long attrs)
+{
+ if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
+ dma_noncoherent_sync_single_for_cpu(dev, addr, size, dir);
+}
+
+static void dma_noncoherent_unmap_sg(struct device *dev, struct scatterlist *sgl,
+ int nents, enum dma_data_direction dir, unsigned long attrs)
+{
+ if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
+ dma_noncoherent_sync_sg_for_cpu(dev, sgl, nents, dir);
+}
+#endif
+
+const struct dma_map_ops dma_noncoherent_ops = {
+ .alloc = arch_dma_alloc,
+ .free = arch_dma_free,
+ .mmap = arch_dma_mmap,
+ .sync_single_for_device = dma_noncoherent_sync_single_for_device,
+ .sync_sg_for_device = dma_noncoherent_sync_sg_for_device,
+ .map_page = dma_noncoherent_map_page,
+ .map_sg = dma_noncoherent_map_sg,
+#ifdef CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU
+ .sync_single_for_cpu = dma_noncoherent_sync_single_for_cpu,
+ .sync_sg_for_cpu = dma_noncoherent_sync_sg_for_cpu,
+ .unmap_page = dma_noncoherent_unmap_page,
+ .unmap_sg = dma_noncoherent_unmap_sg,
+#endif
+ .dma_supported = dma_direct_supported,
+ .mapping_error = dma_direct_mapping_error,
+ .cache_sync = arch_dma_cache_sync,
+};
+EXPORT_SYMBOL(dma_noncoherent_ops);
diff --git a/lib/errseq.c b/lib/errseq.c
index df782418b333..81f9e33aa7e7 100644
--- a/lib/errseq.c
+++ b/lib/errseq.c
@@ -111,27 +111,22 @@ EXPORT_SYMBOL(errseq_set);
* errseq_sample() - Grab current errseq_t value.
* @eseq: Pointer to errseq_t to be sampled.
*
- * This function allows callers to sample an errseq_t value, marking it as
- * "seen" if required.
+ * This function allows callers to initialise their errseq_t variable.
+ * If the error has been "seen", new callers will not see an old error.
+ * If there is an unseen error in @eseq, the caller of this function will
+ * see it the next time it checks for an error.
*
+ * Context: Any context.
* Return: The current errseq value.
*/
errseq_t errseq_sample(errseq_t *eseq)
{
errseq_t old = READ_ONCE(*eseq);
- errseq_t new = old;
- /*
- * For the common case of no errors ever having been set, we can skip
- * marking the SEEN bit. Once an error has been set, the value will
- * never go back to zero.
- */
- if (old != 0) {
- new |= ERRSEQ_SEEN;
- if (old != new)
- cmpxchg(eseq, old, new);
- }
- return new;
+ /* If nobody has seen this error yet, then we can be the first. */
+ if (!(old & ERRSEQ_SEEN))
+ old = 0;
+ return old;
}
EXPORT_SYMBOL(errseq_sample);
diff --git a/lib/find_bit_benchmark.c b/lib/find_bit_benchmark.c
index 5985a25e6cbc..5367ffa5c18f 100644
--- a/lib/find_bit_benchmark.c
+++ b/lib/find_bit_benchmark.c
@@ -132,7 +132,12 @@ static int __init find_bit_test(void)
test_find_next_bit(bitmap, BITMAP_LEN);
test_find_next_zero_bit(bitmap, BITMAP_LEN);
test_find_last_bit(bitmap, BITMAP_LEN);
- test_find_first_bit(bitmap, BITMAP_LEN);
+
+ /*
+ * test_find_first_bit() may take some time, so
+ * traverse only part of bitmap to avoid soft lockup.
+ */
+ test_find_first_bit(bitmap, BITMAP_LEN / 10);
test_find_next_and_bit(bitmap, bitmap2, BITMAP_LEN);
pr_err("\nStart testing find_bit() with sparse bitmap\n");
diff --git a/lib/iommu-helper.c b/lib/iommu-helper.c
index 23633c0fda4a..92a9f243c0e2 100644
--- a/lib/iommu-helper.c
+++ b/lib/iommu-helper.c
@@ -3,19 +3,8 @@
* IOMMU helper functions for the free area management
*/
-#include <linux/export.h>
#include <linux/bitmap.h>
-#include <linux/bug.h>
-
-int iommu_is_span_boundary(unsigned int index, unsigned int nr,
- unsigned long shift,
- unsigned long boundary_size)
-{
- BUG_ON(!is_power_of_2(boundary_size));
-
- shift = (shift + index) & (boundary_size - 1);
- return shift + nr > boundary_size;
-}
+#include <linux/iommu-helper.h>
unsigned long iommu_area_alloc(unsigned long *map, unsigned long size,
unsigned long start, unsigned int nr,
@@ -38,4 +27,3 @@ again:
}
return -1;
}
-EXPORT_SYMBOL(iommu_area_alloc);
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index 970212670b6a..7e43cd54c84c 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -573,6 +573,67 @@ size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
}
EXPORT_SYMBOL(_copy_to_iter);
+#ifdef CONFIG_ARCH_HAS_UACCESS_MCSAFE
+static int copyout_mcsafe(void __user *to, const void *from, size_t n)
+{
+ if (access_ok(VERIFY_WRITE, to, n)) {
+ kasan_check_read(from, n);
+ n = copy_to_user_mcsafe((__force void *) to, from, n);
+ }
+ return n;
+}
+
+static unsigned long memcpy_mcsafe_to_page(struct page *page, size_t offset,
+ const char *from, size_t len)
+{
+ unsigned long ret;
+ char *to;
+
+ to = kmap_atomic(page);
+ ret = memcpy_mcsafe(to + offset, from, len);
+ kunmap_atomic(to);
+
+ return ret;
+}
+
+size_t _copy_to_iter_mcsafe(const void *addr, size_t bytes, struct iov_iter *i)
+{
+ const char *from = addr;
+ unsigned long rem, curr_addr, s_addr = (unsigned long) addr;
+
+ if (unlikely(i->type & ITER_PIPE)) {
+ WARN_ON(1);
+ return 0;
+ }
+ if (iter_is_iovec(i))
+ might_fault();
+ iterate_and_advance(i, bytes, v,
+ copyout_mcsafe(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len),
+ ({
+ rem = memcpy_mcsafe_to_page(v.bv_page, v.bv_offset,
+ (from += v.bv_len) - v.bv_len, v.bv_len);
+ if (rem) {
+ curr_addr = (unsigned long) from;
+ bytes = curr_addr - s_addr - rem;
+ return bytes;
+ }
+ }),
+ ({
+ rem = memcpy_mcsafe(v.iov_base, (from += v.iov_len) - v.iov_len,
+ v.iov_len);
+ if (rem) {
+ curr_addr = (unsigned long) from;
+ bytes = curr_addr - s_addr - rem;
+ return bytes;
+ }
+ })
+ )
+
+ return bytes;
+}
+EXPORT_SYMBOL_GPL(_copy_to_iter_mcsafe);
+#endif /* CONFIG_ARCH_HAS_UACCESS_MCSAFE */
+
size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i)
{
char *to = addr;
@@ -1012,7 +1073,7 @@ unsigned long iov_iter_gap_alignment(const struct iov_iter *i)
}
EXPORT_SYMBOL(iov_iter_gap_alignment);
-static inline size_t __pipe_get_pages(struct iov_iter *i,
+static inline ssize_t __pipe_get_pages(struct iov_iter *i,
size_t maxsize,
struct page **pages,
int idx,
@@ -1102,7 +1163,7 @@ static ssize_t pipe_get_pages_alloc(struct iov_iter *i,
size_t *start)
{
struct page **p;
- size_t n;
+ ssize_t n;
int idx;
int npages;
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index da9e10c827df..a9e41aed6de4 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -1612,11 +1612,9 @@ static void set_iter_tags(struct radix_tree_iter *iter,
static void __rcu **skip_siblings(struct radix_tree_node **nodep,
void __rcu **slot, struct radix_tree_iter *iter)
{
- void *sib = node_to_entry(slot - 1);
-
while (iter->index < iter->next_index) {
*nodep = rcu_dereference_raw(*slot);
- if (*nodep && *nodep != sib)
+ if (*nodep && !is_sibling_entry(iter->node, *nodep))
return slot;
slot++;
iter->index = __radix_tree_iter_add(iter, 1);
@@ -1631,7 +1629,7 @@ void __rcu **__radix_tree_next_slot(void __rcu **slot,
struct radix_tree_iter *iter, unsigned flags)
{
unsigned tag = flags & RADIX_TREE_ITER_TAG_MASK;
- struct radix_tree_node *node = rcu_dereference_raw(*slot);
+ struct radix_tree_node *node;
slot = skip_siblings(&node, slot, iter);
@@ -2036,10 +2034,12 @@ void *radix_tree_delete_item(struct radix_tree_root *root,
unsigned long index, void *item)
{
struct radix_tree_node *node = NULL;
- void __rcu **slot;
+ void __rcu **slot = NULL;
void *entry;
entry = __radix_tree_lookup(root, index, &node, &slot);
+ if (!slot)
+ return NULL;
if (!entry && (!is_idr(root) || node_tag_get(root, node, IDR_FREE,
get_slot_offset(node, slot))))
return NULL;
diff --git a/lib/sbitmap.c b/lib/sbitmap.c
index e6a9c06ec70c..6fdc6267f4a8 100644
--- a/lib/sbitmap.c
+++ b/lib/sbitmap.c
@@ -270,18 +270,33 @@ void sbitmap_bitmap_show(struct sbitmap *sb, struct seq_file *m)
}
EXPORT_SYMBOL_GPL(sbitmap_bitmap_show);
-static unsigned int sbq_calc_wake_batch(unsigned int depth)
+static unsigned int sbq_calc_wake_batch(struct sbitmap_queue *sbq,
+ unsigned int depth)
{
unsigned int wake_batch;
+ unsigned int shallow_depth;
/*
* For each batch, we wake up one queue. We need to make sure that our
- * batch size is small enough that the full depth of the bitmap is
- * enough to wake up all of the queues.
+ * batch size is small enough that the full depth of the bitmap,
+ * potentially limited by a shallow depth, is enough to wake up all of
+ * the queues.
+ *
+ * Each full word of the bitmap has bits_per_word bits, and there might
+ * be a partial word. There are depth / bits_per_word full words and
+ * depth % bits_per_word bits left over. In bitwise arithmetic:
+ *
+ * bits_per_word = 1 << shift
+ * depth / bits_per_word = depth >> shift
+ * depth % bits_per_word = depth & ((1 << shift) - 1)
+ *
+ * Each word can be limited to sbq->min_shallow_depth bits.
*/
- wake_batch = SBQ_WAKE_BATCH;
- if (wake_batch > depth / SBQ_WAIT_QUEUES)
- wake_batch = max(1U, depth / SBQ_WAIT_QUEUES);
+ shallow_depth = min(1U << sbq->sb.shift, sbq->min_shallow_depth);
+ depth = ((depth >> sbq->sb.shift) * shallow_depth +
+ min(depth & ((1U << sbq->sb.shift) - 1), shallow_depth));
+ wake_batch = clamp_t(unsigned int, depth / SBQ_WAIT_QUEUES, 1,
+ SBQ_WAKE_BATCH);
return wake_batch;
}
@@ -307,7 +322,8 @@ int sbitmap_queue_init_node(struct sbitmap_queue *sbq, unsigned int depth,
*per_cpu_ptr(sbq->alloc_hint, i) = prandom_u32() % depth;
}
- sbq->wake_batch = sbq_calc_wake_batch(depth);
+ sbq->min_shallow_depth = UINT_MAX;
+ sbq->wake_batch = sbq_calc_wake_batch(sbq, depth);
atomic_set(&sbq->wake_index, 0);
sbq->ws = kzalloc_node(SBQ_WAIT_QUEUES * sizeof(*sbq->ws), flags, node);
@@ -327,21 +343,28 @@ int sbitmap_queue_init_node(struct sbitmap_queue *sbq, unsigned int depth,
}
EXPORT_SYMBOL_GPL(sbitmap_queue_init_node);
-void sbitmap_queue_resize(struct sbitmap_queue *sbq, unsigned int depth)
+static void sbitmap_queue_update_wake_batch(struct sbitmap_queue *sbq,
+ unsigned int depth)
{
- unsigned int wake_batch = sbq_calc_wake_batch(depth);
+ unsigned int wake_batch = sbq_calc_wake_batch(sbq, depth);
int i;
if (sbq->wake_batch != wake_batch) {
WRITE_ONCE(sbq->wake_batch, wake_batch);
/*
- * Pairs with the memory barrier in sbq_wake_up() to ensure that
- * the batch size is updated before the wait counts.
+ * Pairs with the memory barrier in sbitmap_queue_wake_up()
+ * to ensure that the batch size is updated before the wait
+ * counts.
*/
smp_mb__before_atomic();
for (i = 0; i < SBQ_WAIT_QUEUES; i++)
atomic_set(&sbq->ws[i].wait_cnt, 1);
}
+}
+
+void sbitmap_queue_resize(struct sbitmap_queue *sbq, unsigned int depth)
+{
+ sbitmap_queue_update_wake_batch(sbq, depth);
sbitmap_resize(&sbq->sb, depth);
}
EXPORT_SYMBOL_GPL(sbitmap_queue_resize);
@@ -380,6 +403,8 @@ int __sbitmap_queue_get_shallow(struct sbitmap_queue *sbq,
unsigned int hint, depth;
int nr;
+ WARN_ON_ONCE(shallow_depth < sbq->min_shallow_depth);
+
hint = this_cpu_read(*sbq->alloc_hint);
depth = READ_ONCE(sbq->sb.depth);
if (unlikely(hint >= depth)) {
@@ -403,6 +428,14 @@ int __sbitmap_queue_get_shallow(struct sbitmap_queue *sbq,
}
EXPORT_SYMBOL_GPL(__sbitmap_queue_get_shallow);
+void sbitmap_queue_min_shallow_depth(struct sbitmap_queue *sbq,
+ unsigned int min_shallow_depth)
+{
+ sbq->min_shallow_depth = min_shallow_depth;
+ sbitmap_queue_update_wake_batch(sbq, sbq->sb.depth);
+}
+EXPORT_SYMBOL_GPL(sbitmap_queue_min_shallow_depth);
+
static struct sbq_wait_state *sbq_wake_ptr(struct sbitmap_queue *sbq)
{
int i, wake_index;
@@ -425,52 +458,67 @@ static struct sbq_wait_state *sbq_wake_ptr(struct sbitmap_queue *sbq)
return NULL;
}
-static void sbq_wake_up(struct sbitmap_queue *sbq)
+static bool __sbq_wake_up(struct sbitmap_queue *sbq)
{
struct sbq_wait_state *ws;
unsigned int wake_batch;
int wait_cnt;
- /*
- * Pairs with the memory barrier in set_current_state() to ensure the
- * proper ordering of clear_bit()/waitqueue_active() in the waker and
- * test_and_set_bit_lock()/prepare_to_wait()/finish_wait() in the
- * waiter. See the comment on waitqueue_active(). This is __after_atomic
- * because we just did clear_bit_unlock() in the caller.
- */
- smp_mb__after_atomic();
-
ws = sbq_wake_ptr(sbq);
if (!ws)
- return;
+ return false;
wait_cnt = atomic_dec_return(&ws->wait_cnt);
if (wait_cnt <= 0) {
+ int ret;
+
wake_batch = READ_ONCE(sbq->wake_batch);
+
/*
* Pairs with the memory barrier in sbitmap_queue_resize() to
* ensure that we see the batch size update before the wait
* count is reset.
*/
smp_mb__before_atomic();
+
/*
- * If there are concurrent callers to sbq_wake_up(), the last
- * one to decrement the wait count below zero will bump it back
- * up. If there is a concurrent resize, the count reset will
- * either cause the cmpxchg to fail or overwrite after the
- * cmpxchg.
+ * For concurrent callers of this, the one that failed the
+ * atomic_cmpxhcg() race should call this function again
+ * to wakeup a new batch on a different 'ws'.
*/
- atomic_cmpxchg(&ws->wait_cnt, wait_cnt, wait_cnt + wake_batch);
- sbq_index_atomic_inc(&sbq->wake_index);
- wake_up_nr(&ws->wait, wake_batch);
+ ret = atomic_cmpxchg(&ws->wait_cnt, wait_cnt, wake_batch);
+ if (ret == wait_cnt) {
+ sbq_index_atomic_inc(&sbq->wake_index);
+ wake_up_nr(&ws->wait, wake_batch);
+ return false;
+ }
+
+ return true;
}
+
+ return false;
+}
+
+void sbitmap_queue_wake_up(struct sbitmap_queue *sbq)
+{
+ while (__sbq_wake_up(sbq))
+ ;
}
+EXPORT_SYMBOL_GPL(sbitmap_queue_wake_up);
void sbitmap_queue_clear(struct sbitmap_queue *sbq, unsigned int nr,
unsigned int cpu)
{
sbitmap_clear_bit_unlock(&sbq->sb, nr);
- sbq_wake_up(sbq);
+ /*
+ * Pairs with the memory barrier in set_current_state() to ensure the
+ * proper ordering of clear_bit_unlock()/waitqueue_active() in the waker
+ * and test_and_set_bit_lock()/prepare_to_wait()/finish_wait() in the
+ * waiter. See the comment on waitqueue_active().
+ */
+ smp_mb__after_atomic();
+ sbitmap_queue_wake_up(sbq);
+
if (likely(!sbq->round_robin && nr < sbq->sb.depth))
*per_cpu_ptr(sbq->alloc_hint, cpu) = nr;
}
@@ -482,7 +530,7 @@ void sbitmap_queue_wake_all(struct sbitmap_queue *sbq)
/*
* Pairs with the memory barrier in set_current_state() like in
- * sbq_wake_up().
+ * sbitmap_queue_wake_up().
*/
smp_mb();
wake_index = atomic_read(&sbq->wake_index);
@@ -528,5 +576,6 @@ void sbitmap_queue_show(struct sbitmap_queue *sbq, struct seq_file *m)
seq_puts(m, "}\n");
seq_printf(m, "round_robin=%d\n", sbq->round_robin);
+ seq_printf(m, "min_shallow_depth=%u\n", sbq->min_shallow_depth);
}
EXPORT_SYMBOL_GPL(sbitmap_queue_show);
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index fece57566d45..04b68d9dffac 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -593,9 +593,8 @@ found:
}
/*
- * Allocates bounce buffer and returns its kernel virtual address.
+ * Allocates bounce buffer and returns its physical address.
*/
-
static phys_addr_t
map_single(struct device *hwdev, phys_addr_t phys, size_t size,
enum dma_data_direction dir, unsigned long attrs)
@@ -614,7 +613,7 @@ map_single(struct device *hwdev, phys_addr_t phys, size_t size,
}
/*
- * dma_addr is the kernel virtual address of the bounce buffer to unmap.
+ * tlb_addr is the physical address of the bounce buffer to unmap.
*/
void swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr,
size_t size, enum dma_data_direction dir,
@@ -692,7 +691,6 @@ void swiotlb_tbl_sync_single(struct device *hwdev, phys_addr_t tlb_addr,
}
}
-#ifdef CONFIG_DMA_DIRECT_OPS
static inline bool dma_coherent_ok(struct device *dev, dma_addr_t addr,
size_t size)
{
@@ -714,7 +712,7 @@ swiotlb_alloc_buffer(struct device *dev, size_t size, dma_addr_t *dma_handle,
phys_addr = swiotlb_tbl_map_single(dev,
__phys_to_dma(dev, io_tlb_start),
- 0, size, DMA_FROM_DEVICE, 0);
+ 0, size, DMA_FROM_DEVICE, attrs);
if (phys_addr == SWIOTLB_MAP_ERROR)
goto out_warn;
@@ -727,7 +725,7 @@ swiotlb_alloc_buffer(struct device *dev, size_t size, dma_addr_t *dma_handle,
out_unmap:
dev_warn(dev, "hwdev DMA mask = 0x%016Lx, dev_addr = 0x%016Lx\n",
- (unsigned long long)(dev ? dev->coherent_dma_mask : 0),
+ (unsigned long long)dev->coherent_dma_mask,
(unsigned long long)*dma_handle);
/*
@@ -737,7 +735,7 @@ out_unmap:
swiotlb_tbl_unmap_single(dev, phys_addr, size, DMA_TO_DEVICE,
DMA_ATTR_SKIP_CPU_SYNC);
out_warn:
- if ((attrs & DMA_ATTR_NO_WARN) && printk_ratelimit()) {
+ if (!(attrs & DMA_ATTR_NO_WARN) && printk_ratelimit()) {
dev_warn(dev,
"swiotlb: coherent allocation failed, size=%zu\n",
size);
@@ -764,7 +762,6 @@ static bool swiotlb_free_buffer(struct device *dev, size_t size,
DMA_ATTR_SKIP_CPU_SYNC);
return true;
}
-#endif
static void
swiotlb_full(struct device *dev, size_t size, enum dma_data_direction dir,
@@ -1045,7 +1042,6 @@ swiotlb_dma_supported(struct device *hwdev, u64 mask)
return __phys_to_dma(hwdev, io_tlb_end - 1) <= mask;
}
-#ifdef CONFIG_DMA_DIRECT_OPS
void *swiotlb_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
gfp_t gfp, unsigned long attrs)
{
@@ -1089,4 +1085,3 @@ const struct dma_map_ops swiotlb_dma_ops = {
.unmap_page = swiotlb_unmap_page,
.dma_supported = dma_direct_supported,
};
-#endif /* CONFIG_DMA_DIRECT_OPS */
diff --git a/lib/test_bitmap.c b/lib/test_bitmap.c
index de16f7869fb1..6cd7d0740005 100644
--- a/lib/test_bitmap.c
+++ b/lib/test_bitmap.c
@@ -331,23 +331,32 @@ static void noinline __init test_mem_optimisations(void)
unsigned int start, nbits;
for (start = 0; start < 1024; start += 8) {
- memset(bmap1, 0x5a, sizeof(bmap1));
- memset(bmap2, 0x5a, sizeof(bmap2));
for (nbits = 0; nbits < 1024 - start; nbits += 8) {
+ memset(bmap1, 0x5a, sizeof(bmap1));
+ memset(bmap2, 0x5a, sizeof(bmap2));
+
bitmap_set(bmap1, start, nbits);
__bitmap_set(bmap2, start, nbits);
- if (!bitmap_equal(bmap1, bmap2, 1024))
+ if (!bitmap_equal(bmap1, bmap2, 1024)) {
printk("set not equal %d %d\n", start, nbits);
- if (!__bitmap_equal(bmap1, bmap2, 1024))
+ failed_tests++;
+ }
+ if (!__bitmap_equal(bmap1, bmap2, 1024)) {
printk("set not __equal %d %d\n", start, nbits);
+ failed_tests++;
+ }
bitmap_clear(bmap1, start, nbits);
__bitmap_clear(bmap2, start, nbits);
- if (!bitmap_equal(bmap1, bmap2, 1024))
+ if (!bitmap_equal(bmap1, bmap2, 1024)) {
printk("clear not equal %d %d\n", start, nbits);
- if (!__bitmap_equal(bmap1, bmap2, 1024))
+ failed_tests++;
+ }
+ if (!__bitmap_equal(bmap1, bmap2, 1024)) {
printk("clear not __equal %d %d\n", start,
nbits);
+ failed_tests++;
+ }
}
}
}
diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index 30c0cb8cc9bc..23920c5ff728 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -1669,19 +1669,22 @@ char *pointer_string(char *buf, char *end, const void *ptr,
return number(buf, end, (unsigned long int)ptr, spec);
}
-static bool have_filled_random_ptr_key __read_mostly;
+static DEFINE_STATIC_KEY_TRUE(not_filled_random_ptr_key);
static siphash_key_t ptr_key __read_mostly;
-static void fill_random_ptr_key(struct random_ready_callback *unused)
+static void enable_ptr_key_workfn(struct work_struct *work)
{
get_random_bytes(&ptr_key, sizeof(ptr_key));
- /*
- * have_filled_random_ptr_key==true is dependent on get_random_bytes().
- * ptr_to_id() needs to see have_filled_random_ptr_key==true
- * after get_random_bytes() returns.
- */
- smp_mb();
- WRITE_ONCE(have_filled_random_ptr_key, true);
+ /* Needs to run from preemptible context */
+ static_branch_disable(&not_filled_random_ptr_key);
+}
+
+static DECLARE_WORK(enable_ptr_key_work, enable_ptr_key_workfn);
+
+static void fill_random_ptr_key(struct random_ready_callback *unused)
+{
+ /* This may be in an interrupt handler. */
+ queue_work(system_unbound_wq, &enable_ptr_key_work);
}
static struct random_ready_callback random_ready = {
@@ -1695,7 +1698,8 @@ static int __init initialize_ptr_random(void)
if (!ret) {
return 0;
} else if (ret == -EALREADY) {
- fill_random_ptr_key(&random_ready);
+ /* This is in preemptible context */
+ enable_ptr_key_workfn(&enable_ptr_key_work);
return 0;
}
@@ -1709,7 +1713,7 @@ static char *ptr_to_id(char *buf, char *end, void *ptr, struct printf_spec spec)
unsigned long hashval;
const int default_width = 2 * sizeof(ptr);
- if (unlikely(!have_filled_random_ptr_key)) {
+ if (static_branch_unlikely(&not_filled_random_ptr_key)) {
spec.field_width = default_width;
/* string length must be less than default_width */
return string(buf, end, "(ptrval)", spec);
diff --git a/mm/Kconfig b/mm/Kconfig
index d5004d82a1d6..3e0b6e87f65d 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -266,7 +266,7 @@ config ARCH_ENABLE_THP_MIGRATION
bool
config PHYS_ADDR_T_64BIT
- def_bool 64BIT || ARCH_PHYS_ADDR_T_64BIT
+ def_bool 64BIT
config BOUNCE
bool "Enable bounce buffers"
@@ -305,7 +305,7 @@ config KSM
the many instances by a single page with that content, so
saving memory until one or another app needs to modify the content.
Recommended for use with KVM, or with other duplicative applications.
- See Documentation/vm/ksm.txt for more information: KSM is inactive
+ See Documentation/vm/ksm.rst for more information: KSM is inactive
until a program has madvised that an area is MADV_MERGEABLE, and
root has set /sys/kernel/mm/ksm/run to 1 (if CONFIG_SYSFS is set).
@@ -530,7 +530,7 @@ config MEM_SOFT_DIRTY
into a page just as regular dirty bit, but unlike the latter
it can be cleared by hands.
- See Documentation/vm/soft-dirty.txt for more details.
+ See Documentation/admin-guide/mm/soft-dirty.rst for more details.
config ZSWAP
bool "Compressed cache for swap pages (EXPERIMENTAL)"
@@ -636,6 +636,7 @@ config DEFERRED_STRUCT_PAGE_INIT
default n
depends on NO_BOOTMEM
depends on !FLATMEM
+ depends on !NEED_PER_CPU_KM
help
Ordinarily all struct pages are initialised during early boot in a
single thread. On very large machines this can take a considerable
@@ -656,7 +657,8 @@ config IDLE_PAGE_TRACKING
be useful to tune memory cgroup limits and/or for job placement
within a compute cluster.
- See Documentation/vm/idle_page_tracking.txt for more details.
+ See Documentation/admin-guide/mm/idle_page_tracking.rst for
+ more details.
# arch_add_memory() comprehends device memory
config ARCH_HAS_ZONE_DEVICE
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 023190c69dce..8fe3ebd6ac00 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -115,6 +115,7 @@ static int bdi_debug_register(struct backing_dev_info *bdi, const char *name)
bdi, &bdi_debug_stats_fops);
if (!bdi->debug_stats) {
debugfs_remove(bdi->debug_dir);
+ bdi->debug_dir = NULL;
return -ENOMEM;
}
@@ -383,7 +384,7 @@ static void wb_shutdown(struct bdi_writeback *wb)
* the barrier provided by test_and_clear_bit() above.
*/
smp_wmb();
- clear_bit(WB_shutting_down, &wb->state);
+ clear_and_wake_up_bit(WB_shutting_down, &wb->state);
}
static void wb_exit(struct bdi_writeback *wb)
@@ -411,6 +412,7 @@ static void wb_exit(struct bdi_writeback *wb)
* protected.
*/
static DEFINE_SPINLOCK(cgwb_lock);
+static struct workqueue_struct *cgwb_release_wq;
/**
* wb_congested_get_create - get or create a wb_congested
@@ -521,7 +523,7 @@ static void cgwb_release(struct percpu_ref *refcnt)
{
struct bdi_writeback *wb = container_of(refcnt, struct bdi_writeback,
refcnt);
- schedule_work(&wb->release_work);
+ queue_work(cgwb_release_wq, &wb->release_work);
}
static void cgwb_kill(struct bdi_writeback *wb)
@@ -783,6 +785,21 @@ static void cgwb_bdi_register(struct backing_dev_info *bdi)
spin_unlock_irq(&cgwb_lock);
}
+static int __init cgwb_init(void)
+{
+ /*
+ * There can be many concurrent release work items overwhelming
+ * system_wq. Put them in a separate wq and limit concurrency.
+ * There's no point in executing many of these in parallel.
+ */
+ cgwb_release_wq = alloc_workqueue("cgwb_release", 0, 1);
+ if (!cgwb_release_wq)
+ return -ENOMEM;
+
+ return 0;
+}
+subsys_initcall(cgwb_init);
+
#else /* CONFIG_CGROUP_WRITEBACK */
static int cgwb_bdi_init(struct backing_dev_info *bdi)
diff --git a/mm/cleancache.c b/mm/cleancache.c
index f7b9fdc79d97..126548b5a292 100644
--- a/mm/cleancache.c
+++ b/mm/cleancache.c
@@ -3,7 +3,7 @@
*
* This code provides the generic "frontend" layer to call a matching
* "backend" driver implementation of cleancache. See
- * Documentation/vm/cleancache.txt for more information.
+ * Documentation/vm/cleancache.rst for more information.
*
* Copyright (C) 2009-2010 Oracle Corp. All rights reserved.
* Author: Dan Magenheimer
diff --git a/mm/cma.c b/mm/cma.c
index aa40e6c7b042..5809bbe360d7 100644
--- a/mm/cma.c
+++ b/mm/cma.c
@@ -39,7 +39,6 @@
#include <trace/events/cma.h>
#include "cma.h"
-#include "internal.h"
struct cma cma_areas[MAX_CMA_AREAS];
unsigned cma_area_count;
@@ -110,25 +109,23 @@ static int __init cma_activate_area(struct cma *cma)
if (!cma->bitmap)
return -ENOMEM;
+ WARN_ON_ONCE(!pfn_valid(pfn));
+ zone = page_zone(pfn_to_page(pfn));
+
do {
unsigned j;
base_pfn = pfn;
- if (!pfn_valid(base_pfn))
- goto err;
-
- zone = page_zone(pfn_to_page(base_pfn));
for (j = pageblock_nr_pages; j; --j, pfn++) {
- if (!pfn_valid(pfn))
- goto err;
-
+ WARN_ON_ONCE(!pfn_valid(pfn));
/*
- * In init_cma_reserved_pageblock(), present_pages
- * is adjusted with assumption that all pages in
- * the pageblock come from a single zone.
+ * alloc_contig_range requires the pfn range
+ * specified to be in the same zone. Make this
+ * simple by forcing the entire CMA resv range
+ * to be in the same zone.
*/
if (page_zone(pfn_to_page(pfn)) != zone)
- goto err;
+ goto not_in_zone;
}
init_cma_reserved_pageblock(pfn_to_page(base_pfn));
} while (--i);
@@ -142,7 +139,7 @@ static int __init cma_activate_area(struct cma *cma)
return 0;
-err:
+not_in_zone:
pr_err("CMA area %s could not be activated\n", cma->name);
kfree(cma->bitmap);
cma->count = 0;
@@ -152,41 +149,6 @@ err:
static int __init cma_init_reserved_areas(void)
{
int i;
- struct zone *zone;
- pg_data_t *pgdat;
-
- if (!cma_area_count)
- return 0;
-
- for_each_online_pgdat(pgdat) {
- unsigned long start_pfn = UINT_MAX, end_pfn = 0;
-
- zone = &pgdat->node_zones[ZONE_MOVABLE];
-
- /*
- * In this case, we cannot adjust the zone range
- * since it is now maximum node span and we don't
- * know original zone range.
- */
- if (populated_zone(zone))
- continue;
-
- for (i = 0; i < cma_area_count; i++) {
- if (pfn_to_nid(cma_areas[i].base_pfn) !=
- pgdat->node_id)
- continue;
-
- start_pfn = min(start_pfn, cma_areas[i].base_pfn);
- end_pfn = max(end_pfn, cma_areas[i].base_pfn +
- cma_areas[i].count);
- }
-
- if (!end_pfn)
- continue;
-
- zone->zone_start_pfn = start_pfn;
- zone->spanned_pages = end_pfn - start_pfn;
- }
for (i = 0; i < cma_area_count; i++) {
int ret = cma_activate_area(&cma_areas[i]);
@@ -195,32 +157,9 @@ static int __init cma_init_reserved_areas(void)
return ret;
}
- /*
- * Reserved pages for ZONE_MOVABLE are now activated and
- * this would change ZONE_MOVABLE's managed page counter and
- * the other zones' present counter. We need to re-calculate
- * various zone information that depends on this initialization.
- */
- build_all_zonelists(NULL);
- for_each_populated_zone(zone) {
- if (zone_idx(zone) == ZONE_MOVABLE) {
- zone_pcp_reset(zone);
- setup_zone_pageset(zone);
- } else
- zone_pcp_update(zone);
-
- set_zone_contiguous(zone);
- }
-
- /*
- * We need to re-init per zone wmark by calling
- * init_per_zone_wmark_min() but doesn't call here because it is
- * registered on core_initcall and it will be called later than us.
- */
-
return 0;
}
-pure_initcall(cma_init_reserved_areas);
+core_initcall(cma_init_reserved_areas);
/**
* cma_init_reserved_mem() - create custom contiguous area from reserved memory
diff --git a/mm/compaction.c b/mm/compaction.c
index 028b7210a669..29bd1df18b98 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -1450,12 +1450,14 @@ static enum compact_result __compaction_suitable(struct zone *zone, int order,
* if compaction succeeds.
* For costly orders, we require low watermark instead of min for
* compaction to proceed to increase its chances.
+ * ALLOC_CMA is used, as pages in CMA pageblocks are considered
+ * suitable migration targets
*/
watermark = (order > PAGE_ALLOC_COSTLY_ORDER) ?
low_wmark_pages(zone) : min_wmark_pages(zone);
watermark += compact_gap(order);
if (!__zone_watermark_ok(zone, 0, watermark, classzone_idx,
- 0, wmark_target))
+ ALLOC_CMA, wmark_target))
return COMPACT_SKIPPED;
return COMPACT_CONTINUE;
diff --git a/mm/frontswap.c b/mm/frontswap.c
index fec8b5044040..4f5476a0f955 100644
--- a/mm/frontswap.c
+++ b/mm/frontswap.c
@@ -3,7 +3,7 @@
*
* This code provides the generic "frontend" layer to call a matching
* "backend" driver implementation of frontswap. See
- * Documentation/vm/frontswap.txt for more information.
+ * Documentation/vm/frontswap.rst for more information.
*
* Copyright (C) 2009-2012 Oracle Corp. All rights reserved.
* Author: Dan Magenheimer
diff --git a/mm/gup.c b/mm/gup.c
index 76af4cfeaf68..541904a7c60f 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -544,6 +544,9 @@ static int check_vma_flags(struct vm_area_struct *vma, unsigned long gup_flags)
if (vm_flags & (VM_IO | VM_PFNMAP))
return -EFAULT;
+ if (gup_flags & FOLL_ANON && !vma_is_anonymous(vma))
+ return -EFAULT;
+
if (write) {
if (!(vm_flags & VM_WRITE)) {
if (!(gup_flags & FOLL_FORCE))
diff --git a/mm/hmm.c b/mm/hmm.c
index 486dc394a5a3..e63e353830e8 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -37,7 +37,7 @@
#if defined(CONFIG_DEVICE_PRIVATE) || defined(CONFIG_DEVICE_PUBLIC)
/*
- * Device private memory see HMM (Documentation/vm/hmm.txt) or hmm.h
+ * Device private memory see HMM (Documentation/vm/hmm.rst) or hmm.h
*/
DEFINE_STATIC_KEY_FALSE(device_private_key);
EXPORT_SYMBOL(device_private_key);
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index a3a1815f8e11..ac5591d8622c 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1185,7 +1185,7 @@ static int do_huge_pmd_wp_page_fallback(struct vm_fault *vmf, pmd_t orig_pmd,
* mmu_notifier_invalidate_range_end() happens which can lead to a
* device seeing memory write in different order than CPU.
*
- * See Documentation/vm/mmu_notifier.txt
+ * See Documentation/vm/mmu_notifier.rst
*/
pmdp_huge_clear_flush_notify(vma, haddr, vmf->pmd);
@@ -2037,7 +2037,7 @@ static void __split_huge_zero_page_pmd(struct vm_area_struct *vma,
* replacing a zero pmd write protected page with a zero pte write
* protected page.
*
- * See Documentation/vm/mmu_notifier.txt
+ * See Documentation/vm/mmu_notifier.rst
*/
pmdp_huge_clear_flush(vma, haddr, pmd);
@@ -2431,7 +2431,7 @@ static void __split_huge_page(struct page *page, struct list_head *list,
__split_huge_page_tail(head, i, lruvec, list);
/* Some pages can be beyond i_size: drop them from page cache */
if (head[i].index >= end) {
- __ClearPageDirty(head + i);
+ ClearPageDirty(head + i);
__delete_from_page_cache(head + i, NULL);
if (IS_ENABLED(CONFIG_SHMEM) && PageSwapBacked(head))
shmem_uncharge(head->mapping->host, 1);
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 218679138255..129088710510 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -3291,7 +3291,7 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
* table protection not changing it to point
* to a new page.
*
- * See Documentation/vm/mmu_notifier.txt
+ * See Documentation/vm/mmu_notifier.rst
*/
huge_ptep_set_wrprotect(src, addr, src_pte);
}
@@ -4357,7 +4357,7 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
* No need to call mmu_notifier_invalidate_range() we are downgrading
* page table protection not changing it to point to a new page.
*
- * See Documentation/vm/mmu_notifier.txt
+ * See Documentation/vm/mmu_notifier.rst
*/
i_mmap_unlock_write(vma->vm_file->f_mapping);
mmu_notifier_invalidate_range_end(mm, start, end);
diff --git a/mm/internal.h b/mm/internal.h
index 62d8c34e63d5..502d14189794 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -168,9 +168,6 @@ extern void post_alloc_hook(struct page *page, unsigned int order,
gfp_t gfp_flags);
extern int user_min_free_kbytes;
-extern void set_zone_contiguous(struct zone *zone);
-extern void clear_zone_contiguous(struct zone *zone);
-
#if defined CONFIG_COMPACTION || defined CONFIG_CMA
/*
@@ -498,6 +495,7 @@ unsigned long reclaim_clean_pages_from_list(struct zone *zone,
#define ALLOC_HARDER 0x10 /* try to alloc harder */
#define ALLOC_HIGH 0x20 /* __GFP_HIGH set */
#define ALLOC_CPUSET 0x40 /* check for correct cpuset */
+#define ALLOC_CMA 0x80 /* allow allocations from CMA areas */
enum ttu_flags;
struct tlbflush_unmap_batch;
diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c
index bc0e68f7dc75..f185455b3406 100644
--- a/mm/kasan/kasan.c
+++ b/mm/kasan/kasan.c
@@ -792,6 +792,40 @@ DEFINE_ASAN_SET_SHADOW(f5);
DEFINE_ASAN_SET_SHADOW(f8);
#ifdef CONFIG_MEMORY_HOTPLUG
+static bool shadow_mapped(unsigned long addr)
+{
+ pgd_t *pgd = pgd_offset_k(addr);
+ p4d_t *p4d;
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *pte;
+
+ if (pgd_none(*pgd))
+ return false;
+ p4d = p4d_offset(pgd, addr);
+ if (p4d_none(*p4d))
+ return false;
+ pud = pud_offset(p4d, addr);
+ if (pud_none(*pud))
+ return false;
+
+ /*
+ * We can't use pud_large() or pud_huge(), the first one is
+ * arch-specific, the last one depends on HUGETLB_PAGE. So let's abuse
+ * pud_bad(), if pud is bad then it's bad because it's huge.
+ */
+ if (pud_bad(*pud))
+ return true;
+ pmd = pmd_offset(pud, addr);
+ if (pmd_none(*pmd))
+ return false;
+
+ if (pmd_bad(*pmd))
+ return true;
+ pte = pte_offset_kernel(pmd, addr);
+ return !pte_none(*pte);
+}
+
static int __meminit kasan_mem_notifier(struct notifier_block *nb,
unsigned long action, void *data)
{
@@ -813,6 +847,14 @@ static int __meminit kasan_mem_notifier(struct notifier_block *nb,
case MEM_GOING_ONLINE: {
void *ret;
+ /*
+ * If shadow is mapped already than it must have been mapped
+ * during the boot. This could happen if we onlining previously
+ * offlined memory.
+ */
+ if (shadow_mapped(shadow_start))
+ return NOTIFY_OK;
+
ret = __vmalloc_node_range(shadow_size, PAGE_SIZE, shadow_start,
shadow_end, GFP_KERNEL,
PAGE_KERNEL, VM_NO_GUARD,
@@ -824,8 +866,26 @@ static int __meminit kasan_mem_notifier(struct notifier_block *nb,
kmemleak_ignore(ret);
return NOTIFY_OK;
}
- case MEM_OFFLINE:
- vfree((void *)shadow_start);
+ case MEM_CANCEL_ONLINE:
+ case MEM_OFFLINE: {
+ struct vm_struct *vm;
+
+ /*
+ * shadow_start was either mapped during boot by kasan_init()
+ * or during memory online by __vmalloc_node_range().
+ * In the latter case we can use vfree() to free shadow.
+ * Non-NULL result of the find_vm_area() will tell us if
+ * that was the second case.
+ *
+ * Currently it's not possible to free shadow mapped
+ * during boot by kasan_init(). It's because the code
+ * to do that hasn't been written yet. So we'll just
+ * leak the memory.
+ */
+ vm = find_vm_area((void *)shadow_start);
+ if (vm)
+ vfree((void *)shadow_start);
+ }
}
return NOTIFY_OK;
@@ -838,5 +898,5 @@ static int __init kasan_memhotplug_init(void)
return 0;
}
-module_init(kasan_memhotplug_init);
+core_initcall(kasan_memhotplug_init);
#endif
diff --git a/mm/ksm.c b/mm/ksm.c
index e3cbf9a92f3c..7d6558f3bac9 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -51,7 +51,9 @@
#define DO_NUMA(x) do { } while (0)
#endif
-/*
+/**
+ * DOC: Overview
+ *
* A few notes about the KSM scanning process,
* to make it easier to understand the data structures below:
*
@@ -67,6 +69,21 @@
* this tree is fully assured to be working (except when pages are unmapped),
* and therefore this tree is called the stable tree.
*
+ * The stable tree node includes information required for reverse
+ * mapping from a KSM page to virtual addresses that map this page.
+ *
+ * In order to avoid large latencies of the rmap walks on KSM pages,
+ * KSM maintains two types of nodes in the stable tree:
+ *
+ * * the regular nodes that keep the reverse mapping structures in a
+ * linked list
+ * * the "chains" that link nodes ("dups") that represent the same
+ * write protected memory content, but each "dup" corresponds to a
+ * different KSM page copy of that content
+ *
+ * Internally, the regular nodes, "dups" and "chains" are represented
+ * using the same :c:type:`struct stable_node` structure.
+ *
* In addition to the stable tree, KSM uses a second data structure called the
* unstable tree: this tree holds pointers to pages which have been found to
* be "unchanged for a period of time". The unstable tree sorts these pages
@@ -1049,7 +1066,7 @@ static int write_protect_page(struct vm_area_struct *vma, struct page *page,
* No need to notify as we are downgrading page table to read
* only not changing it to point to a new page.
*
- * See Documentation/vm/mmu_notifier.txt
+ * See Documentation/vm/mmu_notifier.rst
*/
entry = ptep_clear_flush(vma, pvmw.address, pvmw.pte);
/*
@@ -1145,7 +1162,7 @@ static int replace_page(struct vm_area_struct *vma, struct page *page,
* No need to notify as we are replacing a read only page with another
* read only page with the same content.
*
- * See Documentation/vm/mmu_notifier.txt
+ * See Documentation/vm/mmu_notifier.rst
*/
ptep_clear_flush(vma, addr, ptep);
set_pte_at_notify(mm, addr, ptep, newpte);
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 2bd3df3d101a..1695f38630f1 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -3849,7 +3849,7 @@ static ssize_t memcg_write_event_control(struct kernfs_open_file *of,
if (ret)
goto out_put_css;
- efile.file->f_op->poll(efile.file, &event->pt);
+ vfs_poll(efile.file, &event->pt);
spin_lock(&memcg->event_list_lock);
list_add(&event->list, &memcg->event_list);
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index f74826cdceea..25982467800b 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1158,7 +1158,7 @@ int __ref add_memory_resource(int nid, struct resource *res, bool online)
* nodes have to go through register_node.
* TODO clean up this mess.
*/
- ret = link_mem_sections(nid, start_pfn, nr_pages);
+ ret = link_mem_sections(nid, start_pfn, nr_pages, false);
register_fail:
/*
* If sysfs file of new node can't create, cpu on the node
diff --git a/mm/mempool.c b/mm/mempool.c
index 5c9dce34719b..b54f2c20e5e0 100644
--- a/mm/mempool.c
+++ b/mm/mempool.c
@@ -138,6 +138,28 @@ static void *remove_element(mempool_t *pool, gfp_t flags)
}
/**
+ * mempool_exit - exit a mempool initialized with mempool_init()
+ * @pool: pointer to the memory pool which was initialized with
+ * mempool_init().
+ *
+ * Free all reserved elements in @pool and @pool itself. This function
+ * only sleeps if the free_fn() function sleeps.
+ *
+ * May be called on a zeroed but uninitialized mempool (i.e. allocated with
+ * kzalloc()).
+ */
+void mempool_exit(mempool_t *pool)
+{
+ while (pool->curr_nr) {
+ void *element = remove_element(pool, GFP_KERNEL);
+ pool->free(element, pool->pool_data);
+ }
+ kfree(pool->elements);
+ pool->elements = NULL;
+}
+EXPORT_SYMBOL(mempool_exit);
+
+/**
* mempool_destroy - deallocate a memory pool
* @pool: pointer to the memory pool which was allocated via
* mempool_create().
@@ -150,15 +172,65 @@ void mempool_destroy(mempool_t *pool)
if (unlikely(!pool))
return;
- while (pool->curr_nr) {
- void *element = remove_element(pool, GFP_KERNEL);
- pool->free(element, pool->pool_data);
- }
- kfree(pool->elements);
+ mempool_exit(pool);
kfree(pool);
}
EXPORT_SYMBOL(mempool_destroy);
+int mempool_init_node(mempool_t *pool, int min_nr, mempool_alloc_t *alloc_fn,
+ mempool_free_t *free_fn, void *pool_data,
+ gfp_t gfp_mask, int node_id)
+{
+ spin_lock_init(&pool->lock);
+ pool->min_nr = min_nr;
+ pool->pool_data = pool_data;
+ pool->alloc = alloc_fn;
+ pool->free = free_fn;
+ init_waitqueue_head(&pool->wait);
+
+ pool->elements = kmalloc_array_node(min_nr, sizeof(void *),
+ gfp_mask, node_id);
+ if (!pool->elements)
+ return -ENOMEM;
+
+ /*
+ * First pre-allocate the guaranteed number of buffers.
+ */
+ while (pool->curr_nr < pool->min_nr) {
+ void *element;
+
+ element = pool->alloc(gfp_mask, pool->pool_data);
+ if (unlikely(!element)) {
+ mempool_exit(pool);
+ return -ENOMEM;
+ }
+ add_element(pool, element);
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL(mempool_init_node);
+
+/**
+ * mempool_init - initialize a memory pool
+ * @min_nr: the minimum number of elements guaranteed to be
+ * allocated for this pool.
+ * @alloc_fn: user-defined element-allocation function.
+ * @free_fn: user-defined element-freeing function.
+ * @pool_data: optional private data available to the user-defined functions.
+ *
+ * Like mempool_create(), but initializes the pool in (i.e. embedded in another
+ * structure).
+ */
+int mempool_init(mempool_t *pool, int min_nr, mempool_alloc_t *alloc_fn,
+ mempool_free_t *free_fn, void *pool_data)
+{
+ return mempool_init_node(pool, min_nr, alloc_fn, free_fn,
+ pool_data, GFP_KERNEL, NUMA_NO_NODE);
+
+}
+EXPORT_SYMBOL(mempool_init);
+
/**
* mempool_create - create a memory pool
* @min_nr: the minimum number of elements guaranteed to be
@@ -186,35 +258,17 @@ mempool_t *mempool_create_node(int min_nr, mempool_alloc_t *alloc_fn,
gfp_t gfp_mask, int node_id)
{
mempool_t *pool;
+
pool = kzalloc_node(sizeof(*pool), gfp_mask, node_id);
if (!pool)
return NULL;
- pool->elements = kmalloc_array_node(min_nr, sizeof(void *),
- gfp_mask, node_id);
- if (!pool->elements) {
+
+ if (mempool_init_node(pool, min_nr, alloc_fn, free_fn, pool_data,
+ gfp_mask, node_id)) {
kfree(pool);
return NULL;
}
- spin_lock_init(&pool->lock);
- pool->min_nr = min_nr;
- pool->pool_data = pool_data;
- init_waitqueue_head(&pool->wait);
- pool->alloc = alloc_fn;
- pool->free = free_fn;
- /*
- * First pre-allocate the guaranteed number of buffers.
- */
- while (pool->curr_nr < pool->min_nr) {
- void *element;
-
- element = pool->alloc(gfp_mask, pool->pool_data);
- if (unlikely(!element)) {
- mempool_destroy(pool);
- return NULL;
- }
- add_element(pool, element);
- }
return pool;
}
EXPORT_SYMBOL(mempool_create_node);
diff --git a/mm/migrate.c b/mm/migrate.c
index 568433023831..8c0af0f7cab1 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -528,14 +528,12 @@ int migrate_page_move_mapping(struct address_space *mapping,
int i;
int index = page_index(page);
- for (i = 0; i < HPAGE_PMD_NR; i++) {
+ for (i = 1; i < HPAGE_PMD_NR; i++) {
pslot = radix_tree_lookup_slot(&mapping->i_pages,
index + i);
radix_tree_replace_slot(&mapping->i_pages, pslot,
newpage + i);
}
- } else {
- radix_tree_replace_slot(&mapping->i_pages, pslot, newpage);
}
/*
diff --git a/mm/mmap.c b/mm/mmap.c
index 9d5968d1e8e3..d817764a9974 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1324,6 +1324,35 @@ static inline int mlock_future_check(struct mm_struct *mm,
return 0;
}
+static inline u64 file_mmap_size_max(struct file *file, struct inode *inode)
+{
+ if (S_ISREG(inode->i_mode))
+ return MAX_LFS_FILESIZE;
+
+ if (S_ISBLK(inode->i_mode))
+ return MAX_LFS_FILESIZE;
+
+ /* Special "we do even unsigned file positions" case */
+ if (file->f_mode & FMODE_UNSIGNED_OFFSET)
+ return 0;
+
+ /* Yes, random drivers might want more. But I'm tired of buggy drivers */
+ return ULONG_MAX;
+}
+
+static inline bool file_mmap_ok(struct file *file, struct inode *inode,
+ unsigned long pgoff, unsigned long len)
+{
+ u64 maxsize = file_mmap_size_max(file, inode);
+
+ if (maxsize && len > maxsize)
+ return false;
+ maxsize -= len;
+ if (pgoff > maxsize >> PAGE_SHIFT)
+ return false;
+ return true;
+}
+
/*
* The caller must hold down_write(&current->mm->mmap_sem).
*/
@@ -1409,6 +1438,9 @@ unsigned long do_mmap(struct file *file, unsigned long addr,
struct inode *inode = file_inode(file);
unsigned long flags_mask;
+ if (!file_mmap_ok(file, inode, pgoff, len))
+ return -EOVERFLOW;
+
flags_mask = LEGACY_MAP_MASK | file->f_op->mmap_supported_flags;
switch (flags & MAP_TYPE) {
@@ -2796,7 +2828,7 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
unsigned long ret = -EINVAL;
struct file *file;
- pr_warn_once("%s (%d) uses deprecated remap_file_pages() syscall. See Documentation/vm/remap_file_pages.txt.\n",
+ pr_warn_once("%s (%d) uses deprecated remap_file_pages() syscall. See Documentation/vm/remap_file_pages.rst.\n",
current->comm, current->pid);
if (prot)
@@ -3024,6 +3056,32 @@ void exit_mmap(struct mm_struct *mm)
/* mm's last user has gone, and its about to be pulled down */
mmu_notifier_release(mm);
+ if (unlikely(mm_is_oom_victim(mm))) {
+ /*
+ * Manually reap the mm to free as much memory as possible.
+ * Then, as the oom reaper does, set MMF_OOM_SKIP to disregard
+ * this mm from further consideration. Taking mm->mmap_sem for
+ * write after setting MMF_OOM_SKIP will guarantee that the oom
+ * reaper will not run on this mm again after mmap_sem is
+ * dropped.
+ *
+ * Nothing can be holding mm->mmap_sem here and the above call
+ * to mmu_notifier_release(mm) ensures mmu notifier callbacks in
+ * __oom_reap_task_mm() will not block.
+ *
+ * This needs to be done before calling munlock_vma_pages_all(),
+ * which clears VM_LOCKED, otherwise the oom reaper cannot
+ * reliably test it.
+ */
+ mutex_lock(&oom_lock);
+ __oom_reap_task_mm(mm);
+ mutex_unlock(&oom_lock);
+
+ set_bit(MMF_OOM_SKIP, &mm->flags);
+ down_write(&mm->mmap_sem);
+ up_write(&mm->mmap_sem);
+ }
+
if (mm->locked_vm) {
vma = mm->mmap;
while (vma) {
@@ -3045,24 +3103,6 @@ void exit_mmap(struct mm_struct *mm)
/* update_hiwater_rss(mm) here? but nobody should be looking */
/* Use -1 here to ensure all VMAs in the mm are unmapped */
unmap_vmas(&tlb, vma, 0, -1);
-
- if (unlikely(mm_is_oom_victim(mm))) {
- /*
- * Wait for oom_reap_task() to stop working on this
- * mm. Because MMF_OOM_SKIP is already set before
- * calling down_read(), oom_reap_task() will not run
- * on this "mm" post up_write().
- *
- * mm_is_oom_victim() cannot be set from under us
- * either because victim->mm is already set to NULL
- * under task_lock before calling mmput and oom_mm is
- * set not NULL by the OOM killer only if victim->mm
- * is found not NULL while holding the task_lock.
- */
- set_bit(MMF_OOM_SKIP, &mm->flags);
- down_write(&mm->mmap_sem);
- up_write(&mm->mmap_sem);
- }
free_pgtables(&tlb, vma, FIRST_USER_ADDRESS, USER_PGTABLES_CEILING);
tlb_finish_mmu(&tlb, 0, -1);
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index ff992fa8760a..8ba6cb88cf58 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -469,7 +469,6 @@ bool process_shares_mm(struct task_struct *p, struct mm_struct *mm)
return false;
}
-
#ifdef CONFIG_MMU
/*
* OOM Reaper kernel thread which tries to reap the memory used by the OOM
@@ -480,16 +479,54 @@ static DECLARE_WAIT_QUEUE_HEAD(oom_reaper_wait);
static struct task_struct *oom_reaper_list;
static DEFINE_SPINLOCK(oom_reaper_lock);
-static bool __oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm)
+void __oom_reap_task_mm(struct mm_struct *mm)
{
- struct mmu_gather tlb;
struct vm_area_struct *vma;
+
+ /*
+ * Tell all users of get_user/copy_from_user etc... that the content
+ * is no longer stable. No barriers really needed because unmapping
+ * should imply barriers already and the reader would hit a page fault
+ * if it stumbled over a reaped memory.
+ */
+ set_bit(MMF_UNSTABLE, &mm->flags);
+
+ for (vma = mm->mmap ; vma; vma = vma->vm_next) {
+ if (!can_madv_dontneed_vma(vma))
+ continue;
+
+ /*
+ * Only anonymous pages have a good chance to be dropped
+ * without additional steps which we cannot afford as we
+ * are OOM already.
+ *
+ * We do not even care about fs backed pages because all
+ * which are reclaimable have already been reclaimed and
+ * we do not want to block exit_mmap by keeping mm ref
+ * count elevated without a good reason.
+ */
+ if (vma_is_anonymous(vma) || !(vma->vm_flags & VM_SHARED)) {
+ const unsigned long start = vma->vm_start;
+ const unsigned long end = vma->vm_end;
+ struct mmu_gather tlb;
+
+ tlb_gather_mmu(&tlb, mm, start, end);
+ mmu_notifier_invalidate_range_start(mm, start, end);
+ unmap_page_range(&tlb, vma, start, end, NULL);
+ mmu_notifier_invalidate_range_end(mm, start, end);
+ tlb_finish_mmu(&tlb, start, end);
+ }
+ }
+}
+
+static bool oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm)
+{
bool ret = true;
/*
* We have to make sure to not race with the victim exit path
* and cause premature new oom victim selection:
- * __oom_reap_task_mm exit_mm
+ * oom_reap_task_mm exit_mm
* mmget_not_zero
* mmput
* atomic_dec_and_test
@@ -534,39 +571,8 @@ static bool __oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm)
trace_start_task_reaping(tsk->pid);
- /*
- * Tell all users of get_user/copy_from_user etc... that the content
- * is no longer stable. No barriers really needed because unmapping
- * should imply barriers already and the reader would hit a page fault
- * if it stumbled over a reaped memory.
- */
- set_bit(MMF_UNSTABLE, &mm->flags);
-
- for (vma = mm->mmap ; vma; vma = vma->vm_next) {
- if (!can_madv_dontneed_vma(vma))
- continue;
+ __oom_reap_task_mm(mm);
- /*
- * Only anonymous pages have a good chance to be dropped
- * without additional steps which we cannot afford as we
- * are OOM already.
- *
- * We do not even care about fs backed pages because all
- * which are reclaimable have already been reclaimed and
- * we do not want to block exit_mmap by keeping mm ref
- * count elevated without a good reason.
- */
- if (vma_is_anonymous(vma) || !(vma->vm_flags & VM_SHARED)) {
- const unsigned long start = vma->vm_start;
- const unsigned long end = vma->vm_end;
-
- tlb_gather_mmu(&tlb, mm, start, end);
- mmu_notifier_invalidate_range_start(mm, start, end);
- unmap_page_range(&tlb, vma, start, end, NULL);
- mmu_notifier_invalidate_range_end(mm, start, end);
- tlb_finish_mmu(&tlb, start, end);
- }
- }
pr_info("oom_reaper: reaped process %d (%s), now anon-rss:%lukB, file-rss:%lukB, shmem-rss:%lukB\n",
task_pid_nr(tsk), tsk->comm,
K(get_mm_counter(mm, MM_ANONPAGES)),
@@ -587,14 +593,13 @@ static void oom_reap_task(struct task_struct *tsk)
struct mm_struct *mm = tsk->signal->oom_mm;
/* Retry the down_read_trylock(mmap_sem) a few times */
- while (attempts++ < MAX_OOM_REAP_RETRIES && !__oom_reap_task_mm(tsk, mm))
+ while (attempts++ < MAX_OOM_REAP_RETRIES && !oom_reap_task_mm(tsk, mm))
schedule_timeout_idle(HZ/10);
if (attempts <= MAX_OOM_REAP_RETRIES ||
test_bit(MMF_OOM_SKIP, &mm->flags))
goto done;
-
pr_info("oom_reaper: unable to reap pid:%d (%s)\n",
task_pid_nr(tsk), tsk->comm);
debug_show_all_locks();
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 905db9d7962f..22320ea27489 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1743,38 +1743,16 @@ void __init page_alloc_init_late(void)
}
#ifdef CONFIG_CMA
-static void __init adjust_present_page_count(struct page *page, long count)
-{
- struct zone *zone = page_zone(page);
-
- /* We don't need to hold a lock since it is boot-up process */
- zone->present_pages += count;
-}
-
/* Free whole pageblock and set its migration type to MIGRATE_CMA. */
void __init init_cma_reserved_pageblock(struct page *page)
{
unsigned i = pageblock_nr_pages;
- unsigned long pfn = page_to_pfn(page);
struct page *p = page;
- int nid = page_to_nid(page);
-
- /*
- * ZONE_MOVABLE will steal present pages from other zones by
- * changing page links so page_zone() is changed. Before that,
- * we need to adjust previous zone's page count first.
- */
- adjust_present_page_count(page, -pageblock_nr_pages);
do {
__ClearPageReserved(p);
set_page_count(p, 0);
-
- /* Steal pages from other zones */
- set_page_links(p, ZONE_MOVABLE, nid, pfn);
- } while (++p, ++pfn, --i);
-
- adjust_present_page_count(page, pageblock_nr_pages);
+ } while (++p, --i);
set_pageblock_migratetype(page, MIGRATE_CMA);
@@ -2889,7 +2867,7 @@ int __isolate_free_page(struct page *page, unsigned int order)
* exists.
*/
watermark = min_wmark_pages(zone) + (1UL << order);
- if (!zone_watermark_ok(zone, 0, watermark, 0, 0))
+ if (!zone_watermark_ok(zone, 0, watermark, 0, ALLOC_CMA))
return 0;
__mod_zone_freepage_state(zone, -(1UL << order), mt);
@@ -3165,6 +3143,12 @@ bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark,
}
+#ifdef CONFIG_CMA
+ /* If allocation can't use CMA areas don't use free CMA pages */
+ if (!(alloc_flags & ALLOC_CMA))
+ free_pages -= zone_page_state(z, NR_FREE_CMA_PAGES);
+#endif
+
/*
* Check watermarks for an order-0 allocation request. If these
* are not met, then a high-order request also cannot go ahead
@@ -3191,8 +3175,10 @@ bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark,
}
#ifdef CONFIG_CMA
- if (!list_empty(&area->free_list[MIGRATE_CMA]))
+ if ((alloc_flags & ALLOC_CMA) &&
+ !list_empty(&area->free_list[MIGRATE_CMA])) {
return true;
+ }
#endif
if (alloc_harder &&
!list_empty(&area->free_list[MIGRATE_HIGHATOMIC]))
@@ -3212,6 +3198,13 @@ static inline bool zone_watermark_fast(struct zone *z, unsigned int order,
unsigned long mark, int classzone_idx, unsigned int alloc_flags)
{
long free_pages = zone_page_state(z, NR_FREE_PAGES);
+ long cma_pages = 0;
+
+#ifdef CONFIG_CMA
+ /* If allocation can't use CMA areas don't use free CMA pages */
+ if (!(alloc_flags & ALLOC_CMA))
+ cma_pages = zone_page_state(z, NR_FREE_CMA_PAGES);
+#endif
/*
* Fast check for order-0 only. If this fails then the reserves
@@ -3220,7 +3213,7 @@ static inline bool zone_watermark_fast(struct zone *z, unsigned int order,
* the caller is !atomic then it'll uselessly search the free
* list. That corner case is then slower but it is harmless.
*/
- if (!order && free_pages > mark + z->lowmem_reserve[classzone_idx])
+ if (!order && (free_pages - cma_pages) > mark + z->lowmem_reserve[classzone_idx])
return true;
return __zone_watermark_ok(z, order, mark, classzone_idx, alloc_flags,
@@ -3856,6 +3849,10 @@ gfp_to_alloc_flags(gfp_t gfp_mask)
} else if (unlikely(rt_task(current)) && !in_interrupt())
alloc_flags |= ALLOC_HARDER;
+#ifdef CONFIG_CMA
+ if (gfpflags_to_migratetype(gfp_mask) == MIGRATE_MOVABLE)
+ alloc_flags |= ALLOC_CMA;
+#endif
return alloc_flags;
}
@@ -4322,6 +4319,9 @@ static inline bool prepare_alloc_pages(gfp_t gfp_mask, unsigned int order,
if (should_fail_alloc_page(gfp_mask, order))
return false;
+ if (IS_ENABLED(CONFIG_CMA) && ac->migratetype == MIGRATE_MOVABLE)
+ *alloc_flags |= ALLOC_CMA;
+
return true;
}
@@ -6204,7 +6204,6 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat)
{
enum zone_type j;
int nid = pgdat->node_id;
- unsigned long node_end_pfn = 0;
pgdat_resize_init(pgdat);
#ifdef CONFIG_NUMA_BALANCING
@@ -6232,13 +6231,9 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat)
struct zone *zone = pgdat->node_zones + j;
unsigned long size, realsize, freesize, memmap_pages;
unsigned long zone_start_pfn = zone->zone_start_pfn;
- unsigned long movable_size = 0;
size = zone->spanned_pages;
realsize = freesize = zone->present_pages;
- if (zone_end_pfn(zone) > node_end_pfn)
- node_end_pfn = zone_end_pfn(zone);
-
/*
* Adjust freesize so that it accounts for how much memory
@@ -6287,30 +6282,12 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat)
zone_seqlock_init(zone);
zone_pcp_init(zone);
- /*
- * The size of the CMA area is unknown now so we need to
- * prepare the memory for the usemap at maximum.
- */
- if (IS_ENABLED(CONFIG_CMA) && j == ZONE_MOVABLE &&
- pgdat->node_spanned_pages) {
- movable_size = node_end_pfn - pgdat->node_start_pfn;
- }
-
- if (!size && !movable_size)
+ if (!size)
continue;
set_pageblock_order();
- if (movable_size) {
- zone->zone_start_pfn = pgdat->node_start_pfn;
- zone->spanned_pages = movable_size;
- setup_usemap(pgdat, zone,
- pgdat->node_start_pfn, movable_size);
- init_currently_empty_zone(zone,
- pgdat->node_start_pfn, movable_size);
- } else {
- setup_usemap(pgdat, zone, zone_start_pfn, size);
- init_currently_empty_zone(zone, zone_start_pfn, size);
- }
+ setup_usemap(pgdat, zone, zone_start_pfn, size);
+ init_currently_empty_zone(zone, zone_start_pfn, size);
memmap_init(size, nid, j, zone_start_pfn);
}
}
@@ -7621,11 +7598,12 @@ bool has_unmovable_pages(struct zone *zone, struct page *page, int count,
unsigned long pfn, iter, found;
/*
- * For avoiding noise data, lru_add_drain_all() should be called
- * If ZONE_MOVABLE, the zone never contains unmovable pages
+ * TODO we could make this much more efficient by not checking every
+ * page in the range if we know all of them are in MOVABLE_ZONE and
+ * that the movable zone guarantees that pages are migratable but
+ * the later is not the case right now unfortunatelly. E.g. movablecore
+ * can still lead to having bootmem allocations in zone_movable.
*/
- if (zone_idx(zone) == ZONE_MOVABLE)
- return false;
/*
* CMA allocations (alloc_contig_range) really need to mark isolate
@@ -7646,7 +7624,7 @@ bool has_unmovable_pages(struct zone *zone, struct page *page, int count,
page = pfn_to_page(check);
if (PageReserved(page))
- return true;
+ goto unmovable;
/*
* Hugepages are not in LRU lists, but they're movable.
@@ -7696,9 +7674,12 @@ bool has_unmovable_pages(struct zone *zone, struct page *page, int count,
* page at boot.
*/
if (found > count)
- return true;
+ goto unmovable;
}
return false;
+unmovable:
+ WARN_ON_ONCE(zone_idx(zone) == ZONE_MOVABLE);
+ return true;
}
bool is_pageblock_removable_nolock(struct page *page)
@@ -7951,7 +7932,7 @@ void free_contig_range(unsigned long pfn, unsigned nr_pages)
}
#endif
-#if defined CONFIG_MEMORY_HOTPLUG || defined CONFIG_CMA
+#ifdef CONFIG_MEMORY_HOTPLUG
/*
* The zone indicated has a new number of managed_pages; batch sizes and percpu
* page high values need to be recalulated.
diff --git a/mm/rmap.c b/mm/rmap.c
index 8d5337fed37b..6db729dc4c50 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -942,7 +942,7 @@ static bool page_mkclean_one(struct page *page, struct vm_area_struct *vma,
* downgrading page table protection not changing it to point
* to a new page.
*
- * See Documentation/vm/mmu_notifier.txt
+ * See Documentation/vm/mmu_notifier.rst
*/
if (ret)
(*cleaned)++;
@@ -1599,7 +1599,7 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
* point at new page while a device still is using this
* page.
*
- * See Documentation/vm/mmu_notifier.txt
+ * See Documentation/vm/mmu_notifier.rst
*/
dec_mm_counter(mm, mm_counter_file(page));
}
@@ -1609,7 +1609,7 @@ discard:
* done above for all cases requiring it to happen under page
* table lock before mmu_notifier_invalidate_range_end()
*
- * See Documentation/vm/mmu_notifier.txt
+ * See Documentation/vm/mmu_notifier.rst
*/
page_remove_rmap(subpage, PageHuge(page));
put_page(page);
diff --git a/mm/sparse.c b/mm/sparse.c
index 62eef264a7bd..73dc2fcc0eab 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -629,7 +629,7 @@ void offline_mem_sections(unsigned long start_pfn, unsigned long end_pfn)
unsigned long pfn;
for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
- unsigned long section_nr = pfn_to_section_nr(start_pfn);
+ unsigned long section_nr = pfn_to_section_nr(pfn);
struct mem_section *ms;
/*
diff --git a/mm/swapfile.c b/mm/swapfile.c
index cc2cf04d9018..78a015fcec3b 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -3112,6 +3112,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
unsigned long *frontswap_map = NULL;
struct page *page = NULL;
struct inode *inode = NULL;
+ bool inced_nr_rotate_swap = false;
if (swap_flags & ~SWAP_FLAGS_VALID)
return -EINVAL;
@@ -3215,8 +3216,10 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
cluster = per_cpu_ptr(p->percpu_cluster, cpu);
cluster_set_null(&cluster->index);
}
- } else
+ } else {
atomic_inc(&nr_rotate_swap);
+ inced_nr_rotate_swap = true;
+ }
error = swap_cgroup_swapon(p->type, maxpages);
if (error)
@@ -3307,6 +3310,8 @@ bad_swap:
vfree(swap_map);
kvfree(cluster_info);
kvfree(frontswap_map);
+ if (inced_nr_rotate_swap)
+ atomic_dec(&nr_rotate_swap);
if (swap_file) {
if (inode && S_ISREG(inode->i_mode)) {
inode_unlock(inode);
diff --git a/mm/util.c b/mm/util.c
index 45fc3169e7b0..c2d0a7cdb189 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -621,7 +621,7 @@ EXPORT_SYMBOL_GPL(vm_memory_committed);
* succeed and -ENOMEM implies there is not.
*
* We currently support three overcommit policies, which are set via the
- * vm.overcommit_memory sysctl. See Documentation/vm/overcommit-accounting
+ * vm.overcommit_memory sysctl. See Documentation/vm/overcommit-accounting.rst
*
* Strict overcommit modes added 2002 Feb 26 by Alan Cox.
* Additional code 2002 Jul 20 by Robert Love.
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index ebff729cc956..63a5f502da08 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -2751,25 +2751,14 @@ static const struct seq_operations vmalloc_op = {
.show = s_show,
};
-static int vmalloc_open(struct inode *inode, struct file *file)
+static int __init proc_vmalloc_init(void)
{
if (IS_ENABLED(CONFIG_NUMA))
- return seq_open_private(file, &vmalloc_op,
- nr_node_ids * sizeof(unsigned int));
+ proc_create_seq_private("vmallocinfo", S_IRUSR, NULL,
+ &vmalloc_op,
+ nr_node_ids * sizeof(unsigned int), NULL);
else
- return seq_open(file, &vmalloc_op);
-}
-
-static const struct file_operations proc_vmalloc_operations = {
- .open = vmalloc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release_private,
-};
-
-static int __init proc_vmalloc_init(void)
-{
- proc_create("vmallocinfo", S_IRUSR, NULL, &proc_vmalloc_operations);
+ proc_create_seq("vmallocinfo", S_IRUSR, NULL, &vmalloc_op);
return 0;
}
module_init(proc_vmalloc_init);
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 9b697323a88c..9270a4370d54 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1418,7 +1418,7 @@ int __isolate_lru_page(struct page *page, isolate_mode_t mode)
return ret;
mapping = page_mapping(page);
- migrate_dirty = mapping && mapping->a_ops->migratepage;
+ migrate_dirty = !mapping || mapping->a_ops->migratepage;
unlock_page(page);
if (!migrate_dirty)
return ret;
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 536332e988b8..75eda9c2b260 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1161,7 +1161,7 @@ const char * const vmstat_text[] = {
"nr_vmscan_immediate_reclaim",
"nr_dirtied",
"nr_written",
- "nr_indirectly_reclaimable",
+ "", /* nr_indirectly_reclaimable */
/* enum writeback_stat_item counters */
"nr_dirty_threshold",
@@ -1516,18 +1516,6 @@ static const struct seq_operations fragmentation_op = {
.show = frag_show,
};
-static int fragmentation_open(struct inode *inode, struct file *file)
-{
- return seq_open(file, &fragmentation_op);
-}
-
-static const struct file_operations buddyinfo_file_operations = {
- .open = fragmentation_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
static const struct seq_operations pagetypeinfo_op = {
.start = frag_start,
.next = frag_next,
@@ -1535,18 +1523,6 @@ static const struct seq_operations pagetypeinfo_op = {
.show = pagetypeinfo_show,
};
-static int pagetypeinfo_open(struct inode *inode, struct file *file)
-{
- return seq_open(file, &pagetypeinfo_op);
-}
-
-static const struct file_operations pagetypeinfo_file_operations = {
- .open = pagetypeinfo_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
static bool is_zone_first_populated(pg_data_t *pgdat, struct zone *zone)
{
int zid;
@@ -1663,18 +1639,6 @@ static const struct seq_operations zoneinfo_op = {
.show = zoneinfo_show,
};
-static int zoneinfo_open(struct inode *inode, struct file *file)
-{
- return seq_open(file, &zoneinfo_op);
-}
-
-static const struct file_operations zoneinfo_file_operations = {
- .open = zoneinfo_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
enum writeback_stat_item {
NR_DIRTY_THRESHOLD,
NR_DIRTY_BG_THRESHOLD,
@@ -1740,6 +1704,10 @@ static int vmstat_show(struct seq_file *m, void *arg)
unsigned long *l = arg;
unsigned long off = l - (unsigned long *)m->private;
+ /* Skip hidden vmstat items. */
+ if (*vmstat_text[off] == '\0')
+ return 0;
+
seq_puts(m, vmstat_text[off]);
seq_put_decimal_ull(m, " ", *l);
seq_putc(m, '\n');
@@ -1758,18 +1726,6 @@ static const struct seq_operations vmstat_op = {
.stop = vmstat_stop,
.show = vmstat_show,
};
-
-static int vmstat_open(struct inode *inode, struct file *file)
-{
- return seq_open(file, &vmstat_op);
-}
-
-static const struct file_operations vmstat_file_operations = {
- .open = vmstat_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
#endif /* CONFIG_PROC_FS */
#ifdef CONFIG_SMP
@@ -2016,10 +1972,10 @@ void __init init_mm_internals(void)
start_shepherd_timer();
#endif
#ifdef CONFIG_PROC_FS
- proc_create("buddyinfo", 0444, NULL, &buddyinfo_file_operations);
- proc_create("pagetypeinfo", 0444, NULL, &pagetypeinfo_file_operations);
- proc_create("vmstat", 0444, NULL, &vmstat_file_operations);
- proc_create("zoneinfo", 0444, NULL, &zoneinfo_file_operations);
+ proc_create_seq("buddyinfo", 0444, NULL, &fragmentation_op);
+ proc_create_seq("pagetypeinfo", 0444, NULL, &pagetypeinfo_op);
+ proc_create_seq("vmstat", 0444, NULL, &vmstat_op);
+ proc_create_seq("zoneinfo", 0444, NULL, &zoneinfo_op);
#endif
}
diff --git a/mm/z3fold.c b/mm/z3fold.c
index c0bca6153b95..4b366d181f35 100644
--- a/mm/z3fold.c
+++ b/mm/z3fold.c
@@ -144,7 +144,8 @@ enum z3fold_page_flags {
PAGE_HEADLESS = 0,
MIDDLE_CHUNK_MAPPED,
NEEDS_COMPACTING,
- PAGE_STALE
+ PAGE_STALE,
+ UNDER_RECLAIM
};
/*****************
@@ -173,6 +174,7 @@ static struct z3fold_header *init_z3fold_page(struct page *page,
clear_bit(MIDDLE_CHUNK_MAPPED, &page->private);
clear_bit(NEEDS_COMPACTING, &page->private);
clear_bit(PAGE_STALE, &page->private);
+ clear_bit(UNDER_RECLAIM, &page->private);
spin_lock_init(&zhdr->page_lock);
kref_init(&zhdr->refcount);
@@ -756,6 +758,10 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle)
atomic64_dec(&pool->pages_nr);
return;
}
+ if (test_bit(UNDER_RECLAIM, &page->private)) {
+ z3fold_page_unlock(zhdr);
+ return;
+ }
if (test_and_set_bit(NEEDS_COMPACTING, &page->private)) {
z3fold_page_unlock(zhdr);
return;
@@ -840,6 +846,8 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
kref_get(&zhdr->refcount);
list_del_init(&zhdr->buddy);
zhdr->cpu = -1;
+ set_bit(UNDER_RECLAIM, &page->private);
+ break;
}
list_del_init(&page->lru);
@@ -887,25 +895,35 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
goto next;
}
next:
- spin_lock(&pool->lock);
if (test_bit(PAGE_HEADLESS, &page->private)) {
if (ret == 0) {
- spin_unlock(&pool->lock);
free_z3fold_page(page);
return 0;
}
- } else if (kref_put(&zhdr->refcount, release_z3fold_page)) {
- atomic64_dec(&pool->pages_nr);
+ spin_lock(&pool->lock);
+ list_add(&page->lru, &pool->lru);
+ spin_unlock(&pool->lock);
+ } else {
+ z3fold_page_lock(zhdr);
+ clear_bit(UNDER_RECLAIM, &page->private);
+ if (kref_put(&zhdr->refcount,
+ release_z3fold_page_locked)) {
+ atomic64_dec(&pool->pages_nr);
+ return 0;
+ }
+ /*
+ * if we are here, the page is still not completely
+ * free. Take the global pool lock then to be able
+ * to add it back to the lru list
+ */
+ spin_lock(&pool->lock);
+ list_add(&page->lru, &pool->lru);
spin_unlock(&pool->lock);
- return 0;
+ z3fold_page_unlock(zhdr);
}
- /*
- * Add to the beginning of LRU.
- * Pool lock has to be kept here to ensure the page has
- * not already been released
- */
- list_add(&page->lru, &pool->lru);
+ /* We started off locked to we need to lock the pool back */
+ spin_lock(&pool->lock);
}
spin_unlock(&pool->lock);
return -EAGAIN;
diff --git a/net/8021q/vlanproc.c b/net/8021q/vlanproc.c
index a627a5db2125..d36e8c4b7f56 100644
--- a/net/8021q/vlanproc.c
+++ b/net/8021q/vlanproc.c
@@ -73,35 +73,6 @@ static const struct seq_operations vlan_seq_ops = {
.show = vlan_seq_show,
};
-static int vlan_seq_open(struct inode *inode, struct file *file)
-{
- return seq_open_net(inode, file, &vlan_seq_ops,
- sizeof(struct seq_net_private));
-}
-
-static const struct file_operations vlan_fops = {
- .open = vlan_seq_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release_net,
-};
-
-/*
- * /proc/net/vlan/<device> file and inode operations
- */
-
-static int vlandev_seq_open(struct inode *inode, struct file *file)
-{
- return single_open(file, vlandev_seq_show, PDE_DATA(inode));
-}
-
-static const struct file_operations vlandev_fops = {
- .open = vlandev_seq_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
/*
* Proc filesystem directory entries.
*/
@@ -148,8 +119,9 @@ int __net_init vlan_proc_init(struct net *net)
if (!vn->proc_vlan_dir)
goto err;
- vn->proc_vlan_conf = proc_create(name_conf, S_IFREG | 0600,
- vn->proc_vlan_dir, &vlan_fops);
+ vn->proc_vlan_conf = proc_create_net(name_conf, S_IFREG | 0600,
+ vn->proc_vlan_dir, &vlan_seq_ops,
+ sizeof(struct seq_net_private));
if (!vn->proc_vlan_conf)
goto err;
return 0;
@@ -171,9 +143,8 @@ int vlan_proc_add_dev(struct net_device *vlandev)
if (!strcmp(vlandev->name, name_conf))
return -EINVAL;
- vlan->dent =
- proc_create_data(vlandev->name, S_IFREG | 0600,
- vn->proc_vlan_dir, &vlandev_fops, vlandev);
+ vlan->dent = proc_create_single_data(vlandev->name, S_IFREG | 0600,
+ vn->proc_vlan_dir, vlandev_seq_show, vlandev);
if (!vlan->dent)
return -ENOBUFS;
return 0;
diff --git a/net/9p/trans_common.c b/net/9p/trans_common.c
index 38aa6345bdfa..b718db2085b2 100644
--- a/net/9p/trans_common.c
+++ b/net/9p/trans_common.c
@@ -16,7 +16,7 @@
#include <linux/module.h>
/**
- * p9_release_req_pages - Release pages after the transaction.
+ * p9_release_pages - Release pages after the transaction.
*/
void p9_release_pages(struct page **pages, int nr_pages)
{
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index 0cfba919d167..588bf88c3305 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -231,7 +231,7 @@ static void p9_conn_cancel(struct p9_conn *m, int err)
static __poll_t
p9_fd_poll(struct p9_client *client, struct poll_table_struct *pt, int *err)
{
- __poll_t ret, n;
+ __poll_t ret;
struct p9_trans_fd *ts = NULL;
if (client && client->status == Connected)
@@ -243,19 +243,9 @@ p9_fd_poll(struct p9_client *client, struct poll_table_struct *pt, int *err)
return EPOLLERR;
}
- if (!ts->rd->f_op->poll)
- ret = DEFAULT_POLLMASK;
- else
- ret = ts->rd->f_op->poll(ts->rd, pt);
-
- if (ts->rd != ts->wr) {
- if (!ts->wr->f_op->poll)
- n = DEFAULT_POLLMASK;
- else
- n = ts->wr->f_op->poll(ts->wr, pt);
- ret = (ret & ~EPOLLOUT) | (n & ~EPOLLIN);
- }
-
+ ret = vfs_poll(ts->rd, pt);
+ if (ts->rd != ts->wr)
+ ret = (ret & ~EPOLLOUT) | (vfs_poll(ts->wr, pt) & ~EPOLLIN);
return ret;
}
@@ -1092,8 +1082,8 @@ static struct p9_trans_module p9_fd_trans = {
};
/**
- * p9_poll_proc - poll worker thread
- * @a: thread state and arguments
+ * p9_poll_workfn - poll worker thread
+ * @work: work queue
*
* polls all v9fs transports for new events and queues the appropriate
* work to the work queue
diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c
index 6d8e3031978f..3d414acb7015 100644
--- a/net/9p/trans_rdma.c
+++ b/net/9p/trans_rdma.c
@@ -68,8 +68,6 @@
* @pd: Protection Domain pointer
* @qp: Queue Pair pointer
* @cq: Completion Queue pointer
- * @dm_mr: DMA Memory Region pointer
- * @lkey: The local access only memory region key
* @timeout: Number of uSecs to wait for connection management events
* @privport: Whether a privileged port may be used
* @port: The port to use
@@ -632,7 +630,7 @@ static int p9_rdma_bind_privport(struct p9_trans_rdma *rdma)
}
/**
- * trans_create_rdma - Transport method for creating atransport instance
+ * rdma_create_trans - Transport method for creating a transport instance
* @client: client instance
* @addr: IP address string
* @args: Mount options string
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index 3aa5a93ad107..4d0372263e5d 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -60,7 +60,6 @@ static atomic_t vp_pinned = ATOMIC_INIT(0);
/**
* struct virtio_chan - per-instance transport information
- * @initialized: whether the channel is initialized
* @inuse: whether the channel is in use
* @lock: protects multiple elements within this structure
* @client: client instance
@@ -385,8 +384,8 @@ static int p9_get_mapped_pages(struct virtio_chan *chan,
* @uidata: user bffer that should be ued for zero copy read
* @uodata: user buffer that shoud be user for zero copy write
* @inlen: read buffer size
- * @olen: write buffer size
- * @hdrlen: reader header size, This is the size of response protocol data
+ * @outlen: write buffer size
+ * @in_hdr_len: reader header size, This is the size of response protocol data
*
*/
static int
diff --git a/net/9p/trans_xen.c b/net/9p/trans_xen.c
index 086a4abdfa7c..0f19960390a6 100644
--- a/net/9p/trans_xen.c
+++ b/net/9p/trans_xen.c
@@ -485,7 +485,7 @@ static int xen_9pfs_front_probe(struct xenbus_device *dev,
static int xen_9pfs_front_resume(struct xenbus_device *dev)
{
- dev_warn(&dev->dev, "suspsend/resume unsupported\n");
+ dev_warn(&dev->dev, "suspend/resume unsupported\n");
return 0;
}
diff --git a/net/appletalk/aarp.c b/net/appletalk/aarp.c
index d4c1021e74e1..49a16cee2aae 100644
--- a/net/appletalk/aarp.c
+++ b/net/appletalk/aarp.c
@@ -907,11 +907,6 @@ void aarp_device_down(struct net_device *dev)
}
#ifdef CONFIG_PROC_FS
-struct aarp_iter_state {
- int bucket;
- struct aarp_entry **table;
-};
-
/*
* Get the aarp entry that is in the chain described
* by the iterator.
@@ -1033,25 +1028,12 @@ static int aarp_seq_show(struct seq_file *seq, void *v)
return 0;
}
-static const struct seq_operations aarp_seq_ops = {
+const struct seq_operations aarp_seq_ops = {
.start = aarp_seq_start,
.next = aarp_seq_next,
.stop = aarp_seq_stop,
.show = aarp_seq_show,
};
-
-static int aarp_seq_open(struct inode *inode, struct file *file)
-{
- return seq_open_private(file, &aarp_seq_ops,
- sizeof(struct aarp_iter_state));
-}
-
-const struct file_operations atalk_seq_arp_fops = {
- .open = aarp_seq_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release_private,
-};
#endif
/* General module cleanup. Called from cleanup_module() in ddp.c. */
diff --git a/net/appletalk/atalk_proc.c b/net/appletalk/atalk_proc.c
index 7214aea14cb3..8006295f8bd7 100644
--- a/net/appletalk/atalk_proc.c
+++ b/net/appletalk/atalk_proc.c
@@ -210,42 +210,6 @@ static const struct seq_operations atalk_seq_socket_ops = {
.show = atalk_seq_socket_show,
};
-static int atalk_seq_interface_open(struct inode *inode, struct file *file)
-{
- return seq_open(file, &atalk_seq_interface_ops);
-}
-
-static int atalk_seq_route_open(struct inode *inode, struct file *file)
-{
- return seq_open(file, &atalk_seq_route_ops);
-}
-
-static int atalk_seq_socket_open(struct inode *inode, struct file *file)
-{
- return seq_open(file, &atalk_seq_socket_ops);
-}
-
-static const struct file_operations atalk_seq_interface_fops = {
- .open = atalk_seq_interface_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
-static const struct file_operations atalk_seq_route_fops = {
- .open = atalk_seq_route_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
-static const struct file_operations atalk_seq_socket_fops = {
- .open = atalk_seq_socket_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
static struct proc_dir_entry *atalk_proc_dir;
int __init atalk_proc_init(void)
@@ -257,22 +221,23 @@ int __init atalk_proc_init(void)
if (!atalk_proc_dir)
goto out;
- p = proc_create("interface", 0444, atalk_proc_dir,
- &atalk_seq_interface_fops);
+ p = proc_create_seq("interface", 0444, atalk_proc_dir,
+ &atalk_seq_interface_ops);
if (!p)
goto out_interface;
- p = proc_create("route", 0444, atalk_proc_dir,
- &atalk_seq_route_fops);
+ p = proc_create_seq("route", 0444, atalk_proc_dir,
+ &atalk_seq_route_ops);
if (!p)
goto out_route;
- p = proc_create("socket", 0444, atalk_proc_dir,
- &atalk_seq_socket_fops);
+ p = proc_create_seq("socket", 0444, atalk_proc_dir,
+ &atalk_seq_socket_ops);
if (!p)
goto out_socket;
- p = proc_create("arp", 0444, atalk_proc_dir, &atalk_seq_arp_fops);
+ p = proc_create_seq_private("arp", 0444, atalk_proc_dir, &aarp_seq_ops,
+ sizeof(struct aarp_iter_state), NULL);
if (!p)
goto out_arp;
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index 9b6bc5abe946..55fdba05d7d9 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -1869,7 +1869,7 @@ static const struct proto_ops atalk_dgram_ops = {
.socketpair = sock_no_socketpair,
.accept = sock_no_accept,
.getname = atalk_getname,
- .poll = datagram_poll,
+ .poll_mask = datagram_poll_mask,
.ioctl = atalk_ioctl,
#ifdef CONFIG_COMPAT
.compat_ioctl = atalk_compat_ioctl,
diff --git a/net/atm/br2684.c b/net/atm/br2684.c
index fd94bea36ee8..36b3adacc0dd 100644
--- a/net/atm/br2684.c
+++ b/net/atm/br2684.c
@@ -818,18 +818,6 @@ static const struct seq_operations br2684_seq_ops = {
.show = br2684_seq_show,
};
-static int br2684_proc_open(struct inode *inode, struct file *file)
-{
- return seq_open(file, &br2684_seq_ops);
-}
-
-static const struct file_operations br2684_proc_ops = {
- .open = br2684_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
extern struct proc_dir_entry *atm_proc_root; /* from proc.c */
#endif /* CONFIG_PROC_FS */
@@ -837,7 +825,7 @@ static int __init br2684_init(void)
{
#ifdef CONFIG_PROC_FS
struct proc_dir_entry *p;
- p = proc_create("br2684", 0, atm_proc_root, &br2684_proc_ops);
+ p = proc_create_seq("br2684", 0, atm_proc_root, &br2684_seq_ops);
if (p == NULL)
return -ENOMEM;
#endif
diff --git a/net/atm/clip.c b/net/atm/clip.c
index f07dbc632222..66caa48a27c2 100644
--- a/net/atm/clip.c
+++ b/net/atm/clip.c
@@ -863,20 +863,6 @@ static const struct seq_operations arp_seq_ops = {
.stop = neigh_seq_stop,
.show = clip_seq_show,
};
-
-static int arp_seq_open(struct inode *inode, struct file *file)
-{
- return seq_open_net(inode, file, &arp_seq_ops,
- sizeof(struct clip_seq_state));
-}
-
-static const struct file_operations arp_seq_fops = {
- .open = arp_seq_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release_net,
- .owner = THIS_MODULE
-};
#endif
static void atm_clip_exit_noproc(void);
@@ -893,7 +879,8 @@ static int __init atm_clip_init(void)
{
struct proc_dir_entry *p;
- p = proc_create("arp", 0444, atm_proc_root, &arp_seq_fops);
+ p = proc_create_net("arp", 0444, atm_proc_root, &arp_seq_ops,
+ sizeof(struct clip_seq_state));
if (!p) {
pr_err("Unable to initialize /proc/net/atm/arp\n");
atm_clip_exit_noproc();
diff --git a/net/atm/common.c b/net/atm/common.c
index fc78a0508ae1..1f2af59935db 100644
--- a/net/atm/common.c
+++ b/net/atm/common.c
@@ -648,16 +648,11 @@ out:
return error;
}
-__poll_t vcc_poll(struct file *file, struct socket *sock, poll_table *wait)
+__poll_t vcc_poll_mask(struct socket *sock, __poll_t events)
{
struct sock *sk = sock->sk;
- struct atm_vcc *vcc;
- __poll_t mask;
-
- sock_poll_wait(file, sk_sleep(sk), wait);
- mask = 0;
-
- vcc = ATM_SD(sock);
+ struct atm_vcc *vcc = ATM_SD(sock);
+ __poll_t mask = 0;
/* exceptional events */
if (sk->sk_err)
diff --git a/net/atm/common.h b/net/atm/common.h
index 5850649068bb..526796ad230f 100644
--- a/net/atm/common.h
+++ b/net/atm/common.h
@@ -17,7 +17,7 @@ int vcc_connect(struct socket *sock, int itf, short vpi, int vci);
int vcc_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
int flags);
int vcc_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len);
-__poll_t vcc_poll(struct file *file, struct socket *sock, poll_table *wait);
+__poll_t vcc_poll_mask(struct socket *sock, __poll_t events);
int vcc_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
int vcc_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
int vcc_setsockopt(struct socket *sock, int level, int optname,
diff --git a/net/atm/lec.c b/net/atm/lec.c
index 01d5d20a6eb1..5a95fcf6f9b6 100644
--- a/net/atm/lec.c
+++ b/net/atm/lec.c
@@ -41,6 +41,9 @@ static unsigned char bridge_ula_lec[] = { 0x01, 0x80, 0xc2, 0x00, 0x00 };
#include <linux/module.h>
#include <linux/init.h>
+/* Hardening for Spectre-v1 */
+#include <linux/nospec.h>
+
#include "lec.h"
#include "lec_arpc.h"
#include "resources.h"
@@ -687,8 +690,10 @@ static int lec_vcc_attach(struct atm_vcc *vcc, void __user *arg)
bytes_left = copy_from_user(&ioc_data, arg, sizeof(struct atmlec_ioc));
if (bytes_left != 0)
pr_info("copy from user failed for %d bytes\n", bytes_left);
- if (ioc_data.dev_num < 0 || ioc_data.dev_num >= MAX_LEC_ITF ||
- !dev_lec[ioc_data.dev_num])
+ if (ioc_data.dev_num < 0 || ioc_data.dev_num >= MAX_LEC_ITF)
+ return -EINVAL;
+ ioc_data.dev_num = array_index_nospec(ioc_data.dev_num, MAX_LEC_ITF);
+ if (!dev_lec[ioc_data.dev_num])
return -EINVAL;
vpriv = kmalloc(sizeof(struct lec_vcc_priv), GFP_KERNEL);
if (!vpriv)
@@ -985,18 +990,6 @@ static const struct seq_operations lec_seq_ops = {
.stop = lec_seq_stop,
.show = lec_seq_show,
};
-
-static int lec_seq_open(struct inode *inode, struct file *file)
-{
- return seq_open_private(file, &lec_seq_ops, sizeof(struct lec_state));
-}
-
-static const struct file_operations lec_seq_fops = {
- .open = lec_seq_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release_private,
-};
#endif
static int lane_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
@@ -1042,7 +1035,8 @@ static int __init lane_module_init(void)
#ifdef CONFIG_PROC_FS
struct proc_dir_entry *p;
- p = proc_create("lec", 0444, atm_proc_root, &lec_seq_fops);
+ p = proc_create_seq_private("lec", 0444, atm_proc_root, &lec_seq_ops,
+ sizeof(struct lec_state), NULL);
if (!p) {
pr_err("Unable to initialize /proc/net/atm/lec\n");
return -ENOMEM;
diff --git a/net/atm/proc.c b/net/atm/proc.c
index 55410c00c7e2..0b0495a41bbe 100644
--- a/net/atm/proc.c
+++ b/net/atm/proc.c
@@ -68,7 +68,6 @@ static void atm_dev_info(struct seq_file *seq, const struct atm_dev *dev)
struct vcc_state {
int bucket;
struct sock *sk;
- int family;
};
static inline int compare_family(struct sock *sk, int family)
@@ -106,23 +105,13 @@ out:
return (l < 0);
}
-static inline void *vcc_walk(struct vcc_state *state, loff_t l)
+static inline void *vcc_walk(struct seq_file *seq, loff_t l)
{
- return __vcc_walk(&state->sk, state->family, &state->bucket, l) ?
- state : NULL;
-}
-
-static int __vcc_seq_open(struct inode *inode, struct file *file,
- int family, const struct seq_operations *ops)
-{
- struct vcc_state *state;
-
- state = __seq_open_private(file, ops, sizeof(*state));
- if (state == NULL)
- return -ENOMEM;
+ struct vcc_state *state = seq->private;
+ int family = (uintptr_t)(PDE_DATA(file_inode(seq->file)));
- state->family = family;
- return 0;
+ return __vcc_walk(&state->sk, family, &state->bucket, l) ?
+ state : NULL;
}
static void *vcc_seq_start(struct seq_file *seq, loff_t *pos)
@@ -133,7 +122,7 @@ static void *vcc_seq_start(struct seq_file *seq, loff_t *pos)
read_lock(&vcc_sklist_lock);
state->sk = SEQ_START_TOKEN;
- return left ? vcc_walk(state, left) : SEQ_START_TOKEN;
+ return left ? vcc_walk(seq, left) : SEQ_START_TOKEN;
}
static void vcc_seq_stop(struct seq_file *seq, void *v)
@@ -144,9 +133,7 @@ static void vcc_seq_stop(struct seq_file *seq, void *v)
static void *vcc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
- struct vcc_state *state = seq->private;
-
- v = vcc_walk(state, 1);
+ v = vcc_walk(seq, 1);
*pos += !!PTR_ERR(v);
return v;
}
@@ -257,18 +244,6 @@ static const struct seq_operations atm_dev_seq_ops = {
.show = atm_dev_seq_show,
};
-static int atm_dev_seq_open(struct inode *inode, struct file *file)
-{
- return seq_open(file, &atm_dev_seq_ops);
-}
-
-static const struct file_operations devices_seq_fops = {
- .open = atm_dev_seq_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
static int pvc_seq_show(struct seq_file *seq, void *v)
{
static char atm_pvc_banner[] =
@@ -292,18 +267,6 @@ static const struct seq_operations pvc_seq_ops = {
.show = pvc_seq_show,
};
-static int pvc_seq_open(struct inode *inode, struct file *file)
-{
- return __vcc_seq_open(inode, file, PF_ATMPVC, &pvc_seq_ops);
-}
-
-static const struct file_operations pvc_seq_fops = {
- .open = pvc_seq_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release_private,
-};
-
static int vcc_seq_show(struct seq_file *seq, void *v)
{
if (v == SEQ_START_TOKEN) {
@@ -326,18 +289,6 @@ static const struct seq_operations vcc_seq_ops = {
.show = vcc_seq_show,
};
-static int vcc_seq_open(struct inode *inode, struct file *file)
-{
- return __vcc_seq_open(inode, file, 0, &vcc_seq_ops);
-}
-
-static const struct file_operations vcc_seq_fops = {
- .open = vcc_seq_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release_private,
-};
-
static int svc_seq_show(struct seq_file *seq, void *v)
{
static const char atm_svc_banner[] =
@@ -361,18 +312,6 @@ static const struct seq_operations svc_seq_ops = {
.show = svc_seq_show,
};
-static int svc_seq_open(struct inode *inode, struct file *file)
-{
- return __vcc_seq_open(inode, file, PF_ATMSVC, &svc_seq_ops);
-}
-
-static const struct file_operations svc_seq_fops = {
- .open = svc_seq_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release_private,
-};
-
static ssize_t proc_dev_atm_read(struct file *file, char __user *buf,
size_t count, loff_t *pos)
{
@@ -440,58 +379,22 @@ void atm_proc_dev_deregister(struct atm_dev *dev)
kfree(dev->proc_name);
}
-static struct atm_proc_entry {
- char *name;
- const struct file_operations *proc_fops;
- struct proc_dir_entry *dirent;
-} atm_proc_ents[] = {
- { .name = "devices", .proc_fops = &devices_seq_fops },
- { .name = "pvc", .proc_fops = &pvc_seq_fops },
- { .name = "svc", .proc_fops = &svc_seq_fops },
- { .name = "vc", .proc_fops = &vcc_seq_fops },
- { .name = NULL, .proc_fops = NULL }
-};
-
-static void atm_proc_dirs_remove(void)
-{
- static struct atm_proc_entry *e;
-
- for (e = atm_proc_ents; e->name; e++) {
- if (e->dirent)
- remove_proc_entry(e->name, atm_proc_root);
- }
- remove_proc_entry("atm", init_net.proc_net);
-}
-
int __init atm_proc_init(void)
{
- static struct atm_proc_entry *e;
- int ret;
-
atm_proc_root = proc_net_mkdir(&init_net, "atm", init_net.proc_net);
if (!atm_proc_root)
- goto err_out;
- for (e = atm_proc_ents; e->name; e++) {
- struct proc_dir_entry *dirent;
-
- dirent = proc_create(e->name, 0444,
- atm_proc_root, e->proc_fops);
- if (!dirent)
- goto err_out_remove;
- e->dirent = dirent;
- }
- ret = 0;
-out:
- return ret;
-
-err_out_remove:
- atm_proc_dirs_remove();
-err_out:
- ret = -ENOMEM;
- goto out;
+ return -ENOMEM;
+ proc_create_seq("devices", 0444, atm_proc_root, &atm_dev_seq_ops);
+ proc_create_seq_private("pvc", 0444, atm_proc_root, &pvc_seq_ops,
+ sizeof(struct vcc_state), (void *)(uintptr_t)PF_ATMPVC);
+ proc_create_seq_private("svc", 0444, atm_proc_root, &svc_seq_ops,
+ sizeof(struct vcc_state), (void *)(uintptr_t)PF_ATMSVC);
+ proc_create_seq_private("vc", 0444, atm_proc_root, &vcc_seq_ops,
+ sizeof(struct vcc_state), NULL);
+ return 0;
}
void atm_proc_exit(void)
{
- atm_proc_dirs_remove();
+ remove_proc_subtree("atm", init_net.proc_net);
}
diff --git a/net/atm/pvc.c b/net/atm/pvc.c
index 2cb10af16afc..9f75092fe778 100644
--- a/net/atm/pvc.c
+++ b/net/atm/pvc.c
@@ -113,7 +113,7 @@ static const struct proto_ops pvc_proto_ops = {
.socketpair = sock_no_socketpair,
.accept = sock_no_accept,
.getname = pvc_getname,
- .poll = vcc_poll,
+ .poll_mask = vcc_poll_mask,
.ioctl = vcc_ioctl,
#ifdef CONFIG_COMPAT
.compat_ioctl = vcc_compat_ioctl,
diff --git a/net/atm/svc.c b/net/atm/svc.c
index 2f91b766ac42..53f4ad7087b1 100644
--- a/net/atm/svc.c
+++ b/net/atm/svc.c
@@ -636,7 +636,7 @@ static const struct proto_ops svc_proto_ops = {
.socketpair = sock_no_socketpair,
.accept = svc_accept,
.getname = svc_getname,
- .poll = vcc_poll,
+ .poll_mask = vcc_poll_mask,
.ioctl = svc_ioctl,
#ifdef CONFIG_COMPAT
.compat_ioctl = svc_compat_ioctl,
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index 2b41366fcad2..d1d2442ce573 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -1924,19 +1924,6 @@ static const struct seq_operations ax25_info_seqops = {
.stop = ax25_info_stop,
.show = ax25_info_show,
};
-
-static int ax25_info_open(struct inode *inode, struct file *file)
-{
- return seq_open(file, &ax25_info_seqops);
-}
-
-static const struct file_operations ax25_info_fops = {
- .open = ax25_info_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
#endif
static const struct net_proto_family ax25_family_ops = {
@@ -1954,7 +1941,7 @@ static const struct proto_ops ax25_proto_ops = {
.socketpair = sock_no_socketpair,
.accept = ax25_accept,
.getname = ax25_getname,
- .poll = datagram_poll,
+ .poll_mask = datagram_poll_mask,
.ioctl = ax25_ioctl,
.listen = ax25_listen,
.shutdown = ax25_shutdown,
@@ -1989,10 +1976,10 @@ static int __init ax25_init(void)
dev_add_pack(&ax25_packet_type);
register_netdevice_notifier(&ax25_dev_notifier);
- proc_create("ax25_route", 0444, init_net.proc_net,
- &ax25_route_fops);
- proc_create("ax25", 0444, init_net.proc_net, &ax25_info_fops);
- proc_create("ax25_calls", 0444, init_net.proc_net, &ax25_uid_fops);
+ proc_create_seq("ax25_route", 0444, init_net.proc_net, &ax25_rt_seqops);
+ proc_create_seq("ax25", 0444, init_net.proc_net, &ax25_info_seqops);
+ proc_create_seq("ax25_calls", 0444, init_net.proc_net,
+ &ax25_uid_seqops);
out:
return rc;
}
diff --git a/net/ax25/ax25_route.c b/net/ax25/ax25_route.c
index 525558972fd9..a0eff323af12 100644
--- a/net/ax25/ax25_route.c
+++ b/net/ax25/ax25_route.c
@@ -323,25 +323,12 @@ static int ax25_rt_seq_show(struct seq_file *seq, void *v)
return 0;
}
-static const struct seq_operations ax25_rt_seqops = {
+const struct seq_operations ax25_rt_seqops = {
.start = ax25_rt_seq_start,
.next = ax25_rt_seq_next,
.stop = ax25_rt_seq_stop,
.show = ax25_rt_seq_show,
};
-
-static int ax25_rt_info_open(struct inode *inode, struct file *file)
-{
- return seq_open(file, &ax25_rt_seqops);
-}
-
-const struct file_operations ax25_route_fops = {
- .open = ax25_rt_info_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
#endif
/*
diff --git a/net/ax25/ax25_uid.c b/net/ax25/ax25_uid.c
index 4ebe91ba317a..99d02e390e43 100644
--- a/net/ax25/ax25_uid.c
+++ b/net/ax25/ax25_uid.c
@@ -181,25 +181,12 @@ static int ax25_uid_seq_show(struct seq_file *seq, void *v)
return 0;
}
-static const struct seq_operations ax25_uid_seqops = {
+const struct seq_operations ax25_uid_seqops = {
.start = ax25_uid_seq_start,
.next = ax25_uid_seq_next,
.stop = ax25_uid_seq_stop,
.show = ax25_uid_seq_show,
};
-
-static int ax25_uid_info_open(struct inode *inode, struct file *file)
-{
- return seq_open(file, &ax25_uid_seqops);
-}
-
-const struct file_operations ax25_uid_fops = {
- .open = ax25_uid_info_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
#endif
/*
diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c
index a11d3d89f012..a35f597e8c8b 100644
--- a/net/batman-adv/multicast.c
+++ b/net/batman-adv/multicast.c
@@ -1536,7 +1536,7 @@ out:
if (!ret && primary_if)
*primary_if = hard_iface;
- else
+ else if (hard_iface)
batadv_hardif_put(hard_iface);
return ret;
diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index 0225616d5771..3986551397ca 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -862,7 +862,7 @@ batadv_tt_prepare_tvlv_global_data(struct batadv_orig_node *orig_node,
struct batadv_orig_node_vlan *vlan;
u8 *tt_change_ptr;
- rcu_read_lock();
+ spin_lock_bh(&orig_node->vlan_list_lock);
hlist_for_each_entry_rcu(vlan, &orig_node->vlan_list, list) {
num_vlan++;
num_entries += atomic_read(&vlan->tt.num_entries);
@@ -900,7 +900,7 @@ batadv_tt_prepare_tvlv_global_data(struct batadv_orig_node *orig_node,
*tt_change = (struct batadv_tvlv_tt_change *)tt_change_ptr;
out:
- rcu_read_unlock();
+ spin_unlock_bh(&orig_node->vlan_list_lock);
return tvlv_len;
}
@@ -931,15 +931,20 @@ batadv_tt_prepare_tvlv_local_data(struct batadv_priv *bat_priv,
struct batadv_tvlv_tt_vlan_data *tt_vlan;
struct batadv_softif_vlan *vlan;
u16 num_vlan = 0;
- u16 num_entries = 0;
+ u16 vlan_entries = 0;
+ u16 total_entries = 0;
u16 tvlv_len;
u8 *tt_change_ptr;
int change_offset;
- rcu_read_lock();
+ spin_lock_bh(&bat_priv->softif_vlan_list_lock);
hlist_for_each_entry_rcu(vlan, &bat_priv->softif_vlan_list, list) {
+ vlan_entries = atomic_read(&vlan->tt.num_entries);
+ if (vlan_entries < 1)
+ continue;
+
num_vlan++;
- num_entries += atomic_read(&vlan->tt.num_entries);
+ total_entries += vlan_entries;
}
change_offset = sizeof(**tt_data);
@@ -947,7 +952,7 @@ batadv_tt_prepare_tvlv_local_data(struct batadv_priv *bat_priv,
/* if tt_len is negative, allocate the space needed by the full table */
if (*tt_len < 0)
- *tt_len = batadv_tt_len(num_entries);
+ *tt_len = batadv_tt_len(total_entries);
tvlv_len = *tt_len;
tvlv_len += change_offset;
@@ -964,6 +969,10 @@ batadv_tt_prepare_tvlv_local_data(struct batadv_priv *bat_priv,
tt_vlan = (struct batadv_tvlv_tt_vlan_data *)(*tt_data + 1);
hlist_for_each_entry_rcu(vlan, &bat_priv->softif_vlan_list, list) {
+ vlan_entries = atomic_read(&vlan->tt.num_entries);
+ if (vlan_entries < 1)
+ continue;
+
tt_vlan->vid = htons(vlan->vid);
tt_vlan->crc = htonl(vlan->tt.crc);
@@ -974,7 +983,7 @@ batadv_tt_prepare_tvlv_local_data(struct batadv_priv *bat_priv,
*tt_change = (struct batadv_tvlv_tt_change *)tt_change_ptr;
out:
- rcu_read_unlock();
+ spin_unlock_bh(&bat_priv->softif_vlan_list_lock);
return tvlv_len;
}
@@ -1538,6 +1547,8 @@ batadv_tt_global_orig_entry_find(const struct batadv_tt_global_entry *entry,
* handled by a given originator
* @entry: the TT global entry to check
* @orig_node: the originator to search in the list
+ * @flags: a pointer to store TT flags for the given @entry received
+ * from @orig_node
*
* find out if an orig_node is already in the list of a tt_global_entry.
*
@@ -1545,7 +1556,8 @@ batadv_tt_global_orig_entry_find(const struct batadv_tt_global_entry *entry,
*/
static bool
batadv_tt_global_entry_has_orig(const struct batadv_tt_global_entry *entry,
- const struct batadv_orig_node *orig_node)
+ const struct batadv_orig_node *orig_node,
+ u8 *flags)
{
struct batadv_tt_orig_list_entry *orig_entry;
bool found = false;
@@ -1553,6 +1565,10 @@ batadv_tt_global_entry_has_orig(const struct batadv_tt_global_entry *entry,
orig_entry = batadv_tt_global_orig_entry_find(entry, orig_node);
if (orig_entry) {
found = true;
+
+ if (flags)
+ *flags = orig_entry->flags;
+
batadv_tt_orig_list_entry_put(orig_entry);
}
@@ -1731,7 +1747,7 @@ static bool batadv_tt_global_add(struct batadv_priv *bat_priv,
if (!(common->flags & BATADV_TT_CLIENT_TEMP))
goto out;
if (batadv_tt_global_entry_has_orig(tt_global_entry,
- orig_node))
+ orig_node, NULL))
goto out_remove;
batadv_tt_global_del_orig_list(tt_global_entry);
goto add_orig_entry;
@@ -2880,23 +2896,46 @@ unlock:
}
/**
- * batadv_tt_local_valid() - verify that given tt entry is a valid one
+ * batadv_tt_local_valid() - verify local tt entry and get flags
* @entry_ptr: to be checked local tt entry
* @data_ptr: not used but definition required to satisfy the callback prototype
+ * @flags: a pointer to store TT flags for this client to
+ *
+ * Checks the validity of the given local TT entry. If it is, then the provided
+ * flags pointer is updated.
*
* Return: true if the entry is a valid, false otherwise.
*/
-static bool batadv_tt_local_valid(const void *entry_ptr, const void *data_ptr)
+static bool batadv_tt_local_valid(const void *entry_ptr,
+ const void *data_ptr,
+ u8 *flags)
{
const struct batadv_tt_common_entry *tt_common_entry = entry_ptr;
if (tt_common_entry->flags & BATADV_TT_CLIENT_NEW)
return false;
+
+ if (flags)
+ *flags = tt_common_entry->flags;
+
return true;
}
+/**
+ * batadv_tt_global_valid() - verify global tt entry and get flags
+ * @entry_ptr: to be checked global tt entry
+ * @data_ptr: an orig_node object (may be NULL)
+ * @flags: a pointer to store TT flags for this client to
+ *
+ * Checks the validity of the given global TT entry. If it is, then the provided
+ * flags pointer is updated either with the common (summed) TT flags if data_ptr
+ * is NULL or the specific, per originator TT flags otherwise.
+ *
+ * Return: true if the entry is a valid, false otherwise.
+ */
static bool batadv_tt_global_valid(const void *entry_ptr,
- const void *data_ptr)
+ const void *data_ptr,
+ u8 *flags)
{
const struct batadv_tt_common_entry *tt_common_entry = entry_ptr;
const struct batadv_tt_global_entry *tt_global_entry;
@@ -2910,7 +2949,8 @@ static bool batadv_tt_global_valid(const void *entry_ptr,
struct batadv_tt_global_entry,
common);
- return batadv_tt_global_entry_has_orig(tt_global_entry, orig_node);
+ return batadv_tt_global_entry_has_orig(tt_global_entry, orig_node,
+ flags);
}
/**
@@ -2920,25 +2960,34 @@ static bool batadv_tt_global_valid(const void *entry_ptr,
* @hash: hash table containing the tt entries
* @tt_len: expected tvlv tt data buffer length in number of bytes
* @tvlv_buff: pointer to the buffer to fill with the TT data
- * @valid_cb: function to filter tt change entries
+ * @valid_cb: function to filter tt change entries and to return TT flags
* @cb_data: data passed to the filter function as argument
+ *
+ * Fills the tvlv buff with the tt entries from the specified hash. If valid_cb
+ * is not provided then this becomes a no-op.
*/
static void batadv_tt_tvlv_generate(struct batadv_priv *bat_priv,
struct batadv_hashtable *hash,
void *tvlv_buff, u16 tt_len,
bool (*valid_cb)(const void *,
- const void *),
+ const void *,
+ u8 *flags),
void *cb_data)
{
struct batadv_tt_common_entry *tt_common_entry;
struct batadv_tvlv_tt_change *tt_change;
struct hlist_head *head;
u16 tt_tot, tt_num_entries = 0;
+ u8 flags;
+ bool ret;
u32 i;
tt_tot = batadv_tt_entries(tt_len);
tt_change = (struct batadv_tvlv_tt_change *)tvlv_buff;
+ if (!valid_cb)
+ return;
+
rcu_read_lock();
for (i = 0; i < hash->size; i++) {
head = &hash->table[i];
@@ -2948,11 +2997,12 @@ static void batadv_tt_tvlv_generate(struct batadv_priv *bat_priv,
if (tt_tot == tt_num_entries)
break;
- if ((valid_cb) && (!valid_cb(tt_common_entry, cb_data)))
+ ret = valid_cb(tt_common_entry, cb_data, &flags);
+ if (!ret)
continue;
ether_addr_copy(tt_change->addr, tt_common_entry->addr);
- tt_change->flags = tt_common_entry->flags;
+ tt_change->flags = flags;
tt_change->vid = htons(tt_common_entry->vid);
memset(tt_change->reserved, 0,
sizeof(tt_change->reserved));
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index 84d92a077834..510ab4f55df5 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -437,16 +437,13 @@ static inline __poll_t bt_accept_poll(struct sock *parent)
return 0;
}
-__poll_t bt_sock_poll(struct file *file, struct socket *sock,
- poll_table *wait)
+__poll_t bt_sock_poll_mask(struct socket *sock, __poll_t events)
{
struct sock *sk = sock->sk;
__poll_t mask = 0;
BT_DBG("sock %p, sk %p", sock, sk);
- poll_wait(file, sk_sleep(sk), wait);
-
if (sk->sk_state == BT_LISTEN)
return bt_accept_poll(sk);
@@ -478,7 +475,7 @@ __poll_t bt_sock_poll(struct file *file, struct socket *sock,
return mask;
}
-EXPORT_SYMBOL(bt_sock_poll);
+EXPORT_SYMBOL(bt_sock_poll_mask);
int bt_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
{
@@ -605,15 +602,10 @@ int bt_sock_wait_ready(struct sock *sk, unsigned long flags)
EXPORT_SYMBOL(bt_sock_wait_ready);
#ifdef CONFIG_PROC_FS
-struct bt_seq_state {
- struct bt_sock_list *l;
-};
-
static void *bt_seq_start(struct seq_file *seq, loff_t *pos)
__acquires(seq->private->l->lock)
{
- struct bt_seq_state *s = seq->private;
- struct bt_sock_list *l = s->l;
+ struct bt_sock_list *l = PDE_DATA(file_inode(seq->file));
read_lock(&l->lock);
return seq_hlist_start_head(&l->head, *pos);
@@ -621,8 +613,7 @@ static void *bt_seq_start(struct seq_file *seq, loff_t *pos)
static void *bt_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
- struct bt_seq_state *s = seq->private;
- struct bt_sock_list *l = s->l;
+ struct bt_sock_list *l = PDE_DATA(file_inode(seq->file));
return seq_hlist_next(v, &l->head, pos);
}
@@ -630,16 +621,14 @@ static void *bt_seq_next(struct seq_file *seq, void *v, loff_t *pos)
static void bt_seq_stop(struct seq_file *seq, void *v)
__releases(seq->private->l->lock)
{
- struct bt_seq_state *s = seq->private;
- struct bt_sock_list *l = s->l;
+ struct bt_sock_list *l = PDE_DATA(file_inode(seq->file));
read_unlock(&l->lock);
}
static int bt_seq_show(struct seq_file *seq, void *v)
{
- struct bt_seq_state *s = seq->private;
- struct bt_sock_list *l = s->l;
+ struct bt_sock_list *l = PDE_DATA(file_inode(seq->file));
if (v == SEQ_START_TOKEN) {
seq_puts(seq ,"sk RefCnt Rmem Wmem User Inode Parent");
@@ -681,35 +670,13 @@ static const struct seq_operations bt_seq_ops = {
.show = bt_seq_show,
};
-static int bt_seq_open(struct inode *inode, struct file *file)
-{
- struct bt_sock_list *sk_list;
- struct bt_seq_state *s;
-
- sk_list = PDE_DATA(inode);
- s = __seq_open_private(file, &bt_seq_ops,
- sizeof(struct bt_seq_state));
- if (!s)
- return -ENOMEM;
-
- s->l = sk_list;
- return 0;
-}
-
-static const struct file_operations bt_fops = {
- .open = bt_seq_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release_private
-};
-
int bt_procfs_init(struct net *net, const char *name,
struct bt_sock_list *sk_list,
int (* seq_show)(struct seq_file *, void *))
{
sk_list->custom_seq_show = seq_show;
- if (!proc_create_data(name, 0, net->proc_net, &bt_fops, sk_list))
+ if (!proc_create_seq_data(name, 0, net->proc_net, &bt_seq_ops, sk_list))
return -ENOMEM;
return 0;
}
diff --git a/net/bluetooth/bnep/sock.c b/net/bluetooth/bnep/sock.c
index b5116fa9835e..00deacdcb51c 100644
--- a/net/bluetooth/bnep/sock.c
+++ b/net/bluetooth/bnep/sock.c
@@ -175,7 +175,6 @@ static const struct proto_ops bnep_sock_ops = {
.getname = sock_no_getname,
.sendmsg = sock_no_sendmsg,
.recvmsg = sock_no_recvmsg,
- .poll = sock_no_poll,
.listen = sock_no_listen,
.shutdown = sock_no_shutdown,
.setsockopt = sock_no_setsockopt,
diff --git a/net/bluetooth/cmtp/capi.c b/net/bluetooth/cmtp/capi.c
index 426a92f02db4..eb41556002e3 100644
--- a/net/bluetooth/cmtp/capi.c
+++ b/net/bluetooth/cmtp/capi.c
@@ -521,18 +521,6 @@ static int cmtp_proc_show(struct seq_file *m, void *v)
return 0;
}
-static int cmtp_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, cmtp_proc_show, PDE_DATA(inode));
-}
-
-static const struct file_operations cmtp_proc_fops = {
- .open = cmtp_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
int cmtp_attach_device(struct cmtp_session *session)
{
unsigned char buf[4];
@@ -571,7 +559,7 @@ int cmtp_attach_device(struct cmtp_session *session)
session->ctrl.send_message = cmtp_send_message;
session->ctrl.procinfo = cmtp_procinfo;
- session->ctrl.proc_fops = &cmtp_proc_fops;
+ session->ctrl.proc_show = cmtp_proc_show;
if (attach_capi_ctr(&session->ctrl) < 0) {
BT_ERR("Can't attach new controller");
diff --git a/net/bluetooth/cmtp/sock.c b/net/bluetooth/cmtp/sock.c
index ce86a7bae844..e08f28fadd65 100644
--- a/net/bluetooth/cmtp/sock.c
+++ b/net/bluetooth/cmtp/sock.c
@@ -178,7 +178,6 @@ static const struct proto_ops cmtp_sock_ops = {
.getname = sock_no_getname,
.sendmsg = sock_no_sendmsg,
.recvmsg = sock_no_recvmsg,
- .poll = sock_no_poll,
.listen = sock_no_listen,
.shutdown = sock_no_shutdown,
.setsockopt = sock_no_setsockopt,
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index 1506e1632394..d6c099861538 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -1975,7 +1975,7 @@ static const struct proto_ops hci_sock_ops = {
.sendmsg = hci_sock_sendmsg,
.recvmsg = hci_sock_recvmsg,
.ioctl = hci_sock_ioctl,
- .poll = datagram_poll,
+ .poll_mask = datagram_poll_mask,
.listen = sock_no_listen,
.shutdown = sock_no_shutdown,
.setsockopt = hci_sock_setsockopt,
diff --git a/net/bluetooth/hidp/sock.c b/net/bluetooth/hidp/sock.c
index 008ba439bd62..1eaac01f85de 100644
--- a/net/bluetooth/hidp/sock.c
+++ b/net/bluetooth/hidp/sock.c
@@ -208,7 +208,6 @@ static const struct proto_ops hidp_sock_ops = {
.getname = sock_no_getname,
.sendmsg = sock_no_sendmsg,
.recvmsg = sock_no_recvmsg,
- .poll = sock_no_poll,
.listen = sock_no_listen,
.shutdown = sock_no_shutdown,
.setsockopt = sock_no_setsockopt,
diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c
index 686bdc6b35b0..742a190034e6 100644
--- a/net/bluetooth/l2cap_sock.c
+++ b/net/bluetooth/l2cap_sock.c
@@ -1653,7 +1653,7 @@ static const struct proto_ops l2cap_sock_ops = {
.getname = l2cap_sock_getname,
.sendmsg = l2cap_sock_sendmsg,
.recvmsg = l2cap_sock_recvmsg,
- .poll = bt_sock_poll,
+ .poll_mask = bt_sock_poll_mask,
.ioctl = bt_sock_ioctl,
.mmap = sock_no_mmap,
.socketpair = sock_no_socketpair,
diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
index d606e9212291..1cf57622473a 100644
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -1049,7 +1049,7 @@ static const struct proto_ops rfcomm_sock_ops = {
.setsockopt = rfcomm_sock_setsockopt,
.getsockopt = rfcomm_sock_getsockopt,
.ioctl = rfcomm_sock_ioctl,
- .poll = bt_sock_poll,
+ .poll_mask = bt_sock_poll_mask,
.socketpair = sock_no_socketpair,
.mmap = sock_no_mmap
};
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index 413b8ee49fec..d60dbc61d170 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -1197,7 +1197,7 @@ static const struct proto_ops sco_sock_ops = {
.getname = sco_sock_getname,
.sendmsg = sco_sock_sendmsg,
.recvmsg = sco_sock_recvmsg,
- .poll = bt_sock_poll,
+ .poll_mask = bt_sock_poll_mask,
.ioctl = bt_sock_ioctl,
.mmap = sock_no_mmap,
.socketpair = sock_no_socketpair,
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index 82c1a6f430b3..5bb6681fa91e 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -518,8 +518,8 @@ int br_add_if(struct net_bridge *br, struct net_device *dev,
return -ELOOP;
}
- /* Device is already being bridged */
- if (br_port_exists(dev))
+ /* Device has master upper dev */
+ if (netdev_master_upper_dev_get(dev))
return -EBUSY;
/* No bridging devices that dislike that (e.g. wireless) */
diff --git a/net/bridge/netfilter/ebt_stp.c b/net/bridge/netfilter/ebt_stp.c
index 47ba98db145d..46c1fe7637ea 100644
--- a/net/bridge/netfilter/ebt_stp.c
+++ b/net/bridge/netfilter/ebt_stp.c
@@ -161,8 +161,8 @@ static int ebt_stp_mt_check(const struct xt_mtchk_param *par)
/* Make sure the match only receives stp frames */
if (!par->nft_compat &&
(!ether_addr_equal(e->destmac, eth_stp_addr) ||
- !is_broadcast_ether_addr(e->destmsk) ||
- !(e->bitmask & EBT_DESTMAC)))
+ !(e->bitmask & EBT_DESTMAC) ||
+ !is_broadcast_ether_addr(e->destmsk)))
return -EINVAL;
return 0;
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 28a4c3490359..6ba639f6c51d 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -1954,7 +1954,8 @@ static int compat_mtw_from_user(struct compat_ebt_entry_mwt *mwt,
int off, pad = 0;
unsigned int size_kern, match_size = mwt->match_size;
- strlcpy(name, mwt->u.name, sizeof(name));
+ if (strscpy(name, mwt->u.name, sizeof(name)) < 0)
+ return -EINVAL;
if (state->buf_kern_start)
dst = state->buf_kern_start + state->buf_kern_offset;
diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
index a6fb1b3bcad9..c7991867d622 100644
--- a/net/caif/caif_socket.c
+++ b/net/caif/caif_socket.c
@@ -934,15 +934,11 @@ static int caif_release(struct socket *sock)
}
/* Copied from af_unix.c:unix_poll(), added CAIF tx_flow handling */
-static __poll_t caif_poll(struct file *file,
- struct socket *sock, poll_table *wait)
+static __poll_t caif_poll_mask(struct socket *sock, __poll_t events)
{
struct sock *sk = sock->sk;
- __poll_t mask;
struct caifsock *cf_sk = container_of(sk, struct caifsock, sk);
-
- sock_poll_wait(file, sk_sleep(sk), wait);
- mask = 0;
+ __poll_t mask = 0;
/* exceptional events? */
if (sk->sk_err)
@@ -976,7 +972,7 @@ static const struct proto_ops caif_seqpacket_ops = {
.socketpair = sock_no_socketpair,
.accept = sock_no_accept,
.getname = sock_no_getname,
- .poll = caif_poll,
+ .poll_mask = caif_poll_mask,
.ioctl = sock_no_ioctl,
.listen = sock_no_listen,
.shutdown = sock_no_shutdown,
@@ -997,7 +993,7 @@ static const struct proto_ops caif_stream_ops = {
.socketpair = sock_no_socketpair,
.accept = sock_no_accept,
.getname = sock_no_getname,
- .poll = caif_poll,
+ .poll_mask = caif_poll_mask,
.ioctl = sock_no_ioctl,
.listen = sock_no_listen,
.shutdown = sock_no_shutdown,
diff --git a/net/can/bcm.c b/net/can/bcm.c
index ac5e5e34fee3..97fedff3f0c4 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -239,18 +239,6 @@ static int bcm_proc_show(struct seq_file *m, void *v)
seq_putc(m, '\n');
return 0;
}
-
-static int bcm_proc_open(struct inode *inode, struct file *file)
-{
- return single_open_net(inode, file, bcm_proc_show);
-}
-
-static const struct file_operations bcm_proc_fops = {
- .open = bcm_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
#endif /* CONFIG_PROC_FS */
/*
@@ -1606,9 +1594,9 @@ static int bcm_connect(struct socket *sock, struct sockaddr *uaddr, int len,
if (net->can.bcmproc_dir) {
/* unique socket address as filename */
sprintf(bo->procname, "%lu", sock_i_ino(sk));
- bo->bcm_proc_read = proc_create_data(bo->procname, 0644,
+ bo->bcm_proc_read = proc_create_net_single(bo->procname, 0644,
net->can.bcmproc_dir,
- &bcm_proc_fops, sk);
+ bcm_proc_show, sk);
if (!bo->bcm_proc_read) {
ret = -ENOMEM;
goto fail;
@@ -1669,7 +1657,7 @@ static const struct proto_ops bcm_ops = {
.socketpair = sock_no_socketpair,
.accept = sock_no_accept,
.getname = sock_no_getname,
- .poll = datagram_poll,
+ .poll_mask = datagram_poll_mask,
.ioctl = can_ioctl, /* use can_ioctl() from af_can.c */
.listen = sock_no_listen,
.shutdown = sock_no_shutdown,
diff --git a/net/can/proc.c b/net/can/proc.c
index fdf704e9bb8c..70fea17bb04c 100644
--- a/net/can/proc.c
+++ b/net/can/proc.c
@@ -270,18 +270,6 @@ static int can_stats_proc_show(struct seq_file *m, void *v)
return 0;
}
-static int can_stats_proc_open(struct inode *inode, struct file *file)
-{
- return single_open_net(inode, file, can_stats_proc_show);
-}
-
-static const struct file_operations can_stats_proc_fops = {
- .open = can_stats_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
static int can_reset_stats_proc_show(struct seq_file *m, void *v)
{
struct net *net = m->private;
@@ -303,36 +291,12 @@ static int can_reset_stats_proc_show(struct seq_file *m, void *v)
return 0;
}
-static int can_reset_stats_proc_open(struct inode *inode, struct file *file)
-{
- return single_open_net(inode, file, can_reset_stats_proc_show);
-}
-
-static const struct file_operations can_reset_stats_proc_fops = {
- .open = can_reset_stats_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
static int can_version_proc_show(struct seq_file *m, void *v)
{
seq_printf(m, "%s\n", CAN_VERSION_STRING);
return 0;
}
-static int can_version_proc_open(struct inode *inode, struct file *file)
-{
- return single_open_net(inode, file, can_version_proc_show);
-}
-
-static const struct file_operations can_version_proc_fops = {
- .open = can_version_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
static inline void can_rcvlist_proc_show_one(struct seq_file *m, int idx,
struct net_device *dev,
struct can_dev_rcv_lists *d)
@@ -373,18 +337,6 @@ static int can_rcvlist_proc_show(struct seq_file *m, void *v)
return 0;
}
-static int can_rcvlist_proc_open(struct inode *inode, struct file *file)
-{
- return single_open_net(inode, file, can_rcvlist_proc_show);
-}
-
-static const struct file_operations can_rcvlist_proc_fops = {
- .open = can_rcvlist_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
static inline void can_rcvlist_proc_show_array(struct seq_file *m,
struct net_device *dev,
struct hlist_head *rcv_array,
@@ -440,19 +392,6 @@ static int can_rcvlist_sff_proc_show(struct seq_file *m, void *v)
return 0;
}
-static int can_rcvlist_sff_proc_open(struct inode *inode, struct file *file)
-{
- return single_open_net(inode, file, can_rcvlist_sff_proc_show);
-}
-
-static const struct file_operations can_rcvlist_sff_proc_fops = {
- .open = can_rcvlist_sff_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
-
static int can_rcvlist_eff_proc_show(struct seq_file *m, void *v)
{
struct net_device *dev;
@@ -483,18 +422,6 @@ static int can_rcvlist_eff_proc_show(struct seq_file *m, void *v)
return 0;
}
-static int can_rcvlist_eff_proc_open(struct inode *inode, struct file *file)
-{
- return single_open_net(inode, file, can_rcvlist_eff_proc_show);
-}
-
-static const struct file_operations can_rcvlist_eff_proc_fops = {
- .open = can_rcvlist_eff_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
/*
* can_init_proc - create main CAN proc directory and procfs entries
*/
@@ -510,37 +437,29 @@ void can_init_proc(struct net *net)
}
/* own procfs entries from the AF_CAN core */
- net->can.pde_version = proc_create(CAN_PROC_VERSION, 0644,
- net->can.proc_dir,
- &can_version_proc_fops);
- net->can.pde_stats = proc_create(CAN_PROC_STATS, 0644,
- net->can.proc_dir,
- &can_stats_proc_fops);
- net->can.pde_reset_stats = proc_create(CAN_PROC_RESET_STATS, 0644,
- net->can.proc_dir,
- &can_reset_stats_proc_fops);
- net->can.pde_rcvlist_err = proc_create_data(CAN_PROC_RCVLIST_ERR, 0644,
- net->can.proc_dir,
- &can_rcvlist_proc_fops,
- (void *)RX_ERR);
- net->can.pde_rcvlist_all = proc_create_data(CAN_PROC_RCVLIST_ALL, 0644,
- net->can.proc_dir,
- &can_rcvlist_proc_fops,
- (void *)RX_ALL);
- net->can.pde_rcvlist_fil = proc_create_data(CAN_PROC_RCVLIST_FIL, 0644,
- net->can.proc_dir,
- &can_rcvlist_proc_fops,
- (void *)RX_FIL);
- net->can.pde_rcvlist_inv = proc_create_data(CAN_PROC_RCVLIST_INV, 0644,
- net->can.proc_dir,
- &can_rcvlist_proc_fops,
- (void *)RX_INV);
- net->can.pde_rcvlist_eff = proc_create(CAN_PROC_RCVLIST_EFF, 0644,
- net->can.proc_dir,
- &can_rcvlist_eff_proc_fops);
- net->can.pde_rcvlist_sff = proc_create(CAN_PROC_RCVLIST_SFF, 0644,
- net->can.proc_dir,
- &can_rcvlist_sff_proc_fops);
+ net->can.pde_version = proc_create_net_single(CAN_PROC_VERSION, 0644,
+ net->can.proc_dir, can_version_proc_show, NULL);
+ net->can.pde_stats = proc_create_net_single(CAN_PROC_STATS, 0644,
+ net->can.proc_dir, can_stats_proc_show, NULL);
+ net->can.pde_reset_stats = proc_create_net_single(CAN_PROC_RESET_STATS,
+ 0644, net->can.proc_dir, can_reset_stats_proc_show,
+ NULL);
+ net->can.pde_rcvlist_err = proc_create_net_single(CAN_PROC_RCVLIST_ERR,
+ 0644, net->can.proc_dir, can_rcvlist_proc_show,
+ (void *)RX_ERR);
+ net->can.pde_rcvlist_all = proc_create_net_single(CAN_PROC_RCVLIST_ALL,
+ 0644, net->can.proc_dir, can_rcvlist_proc_show,
+ (void *)RX_ALL);
+ net->can.pde_rcvlist_fil = proc_create_net_single(CAN_PROC_RCVLIST_FIL,
+ 0644, net->can.proc_dir, can_rcvlist_proc_show,
+ (void *)RX_FIL);
+ net->can.pde_rcvlist_inv = proc_create_net_single(CAN_PROC_RCVLIST_INV,
+ 0644, net->can.proc_dir, can_rcvlist_proc_show,
+ (void *)RX_INV);
+ net->can.pde_rcvlist_eff = proc_create_net_single(CAN_PROC_RCVLIST_EFF,
+ 0644, net->can.proc_dir, can_rcvlist_eff_proc_show, NULL);
+ net->can.pde_rcvlist_sff = proc_create_net_single(CAN_PROC_RCVLIST_SFF,
+ 0644, net->can.proc_dir, can_rcvlist_sff_proc_show, NULL);
}
/*
diff --git a/net/can/raw.c b/net/can/raw.c
index 1051eee82581..fd7e2f49ea6a 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -843,7 +843,7 @@ static const struct proto_ops raw_ops = {
.socketpair = sock_no_socketpair,
.accept = sock_no_accept,
.getname = raw_getname,
- .poll = datagram_poll,
+ .poll_mask = datagram_poll_mask,
.ioctl = can_ioctl, /* use can_ioctl() from af_can.c */
.listen = sock_no_listen,
.shutdown = sock_no_shutdown,
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index ea2a6c9fb7ce..d2667e5dddc3 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -157,10 +157,12 @@ static void ceph_osd_data_bio_init(struct ceph_osd_data *osd_data,
#endif /* CONFIG_BLOCK */
static void ceph_osd_data_bvecs_init(struct ceph_osd_data *osd_data,
- struct ceph_bvec_iter *bvec_pos)
+ struct ceph_bvec_iter *bvec_pos,
+ u32 num_bvecs)
{
osd_data->type = CEPH_OSD_DATA_TYPE_BVECS;
osd_data->bvec_pos = *bvec_pos;
+ osd_data->num_bvecs = num_bvecs;
}
#define osd_req_op_data(oreq, whch, typ, fld) \
@@ -237,6 +239,22 @@ void osd_req_op_extent_osd_data_bio(struct ceph_osd_request *osd_req,
EXPORT_SYMBOL(osd_req_op_extent_osd_data_bio);
#endif /* CONFIG_BLOCK */
+void osd_req_op_extent_osd_data_bvecs(struct ceph_osd_request *osd_req,
+ unsigned int which,
+ struct bio_vec *bvecs, u32 num_bvecs,
+ u32 bytes)
+{
+ struct ceph_osd_data *osd_data;
+ struct ceph_bvec_iter it = {
+ .bvecs = bvecs,
+ .iter = { .bi_size = bytes },
+ };
+
+ osd_data = osd_req_op_data(osd_req, which, extent, osd_data);
+ ceph_osd_data_bvecs_init(osd_data, &it, num_bvecs);
+}
+EXPORT_SYMBOL(osd_req_op_extent_osd_data_bvecs);
+
void osd_req_op_extent_osd_data_bvec_pos(struct ceph_osd_request *osd_req,
unsigned int which,
struct ceph_bvec_iter *bvec_pos)
@@ -244,7 +262,7 @@ void osd_req_op_extent_osd_data_bvec_pos(struct ceph_osd_request *osd_req,
struct ceph_osd_data *osd_data;
osd_data = osd_req_op_data(osd_req, which, extent, osd_data);
- ceph_osd_data_bvecs_init(osd_data, bvec_pos);
+ ceph_osd_data_bvecs_init(osd_data, bvec_pos, 0);
}
EXPORT_SYMBOL(osd_req_op_extent_osd_data_bvec_pos);
@@ -287,7 +305,8 @@ EXPORT_SYMBOL(osd_req_op_cls_request_data_pages);
void osd_req_op_cls_request_data_bvecs(struct ceph_osd_request *osd_req,
unsigned int which,
- struct bio_vec *bvecs, u32 bytes)
+ struct bio_vec *bvecs, u32 num_bvecs,
+ u32 bytes)
{
struct ceph_osd_data *osd_data;
struct ceph_bvec_iter it = {
@@ -296,7 +315,7 @@ void osd_req_op_cls_request_data_bvecs(struct ceph_osd_request *osd_req,
};
osd_data = osd_req_op_data(osd_req, which, cls, request_data);
- ceph_osd_data_bvecs_init(osd_data, &it);
+ ceph_osd_data_bvecs_init(osd_data, &it, num_bvecs);
osd_req->r_ops[which].cls.indata_len += bytes;
osd_req->r_ops[which].indata_len += bytes;
}
diff --git a/net/compat.c b/net/compat.c
index 5ae7437d3853..7242cce5631b 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -377,7 +377,8 @@ static int compat_sock_setsockopt(struct socket *sock, int level, int optname,
optname == SO_ATTACH_REUSEPORT_CBPF)
return do_set_attach_filter(sock, level, optname,
optval, optlen);
- if (optname == SO_RCVTIMEO || optname == SO_SNDTIMEO)
+ if (!COMPAT_USE_64BIT_TIME &&
+ (optname == SO_RCVTIMEO || optname == SO_SNDTIMEO))
return do_set_sock_timeout(sock, level, optname, optval, optlen);
return sock_setsockopt(sock, level, optname, optval, optlen);
@@ -448,7 +449,8 @@ static int do_get_sock_timeout(struct socket *sock, int level, int optname,
static int compat_sock_getsockopt(struct socket *sock, int level, int optname,
char __user *optval, int __user *optlen)
{
- if (optname == SO_RCVTIMEO || optname == SO_SNDTIMEO)
+ if (!COMPAT_USE_64BIT_TIME &&
+ (optname == SO_RCVTIMEO || optname == SO_SNDTIMEO))
return do_get_sock_timeout(sock, level, optname, optval, optlen);
return sock_getsockopt(sock, level, optname, optval, optlen);
}
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 9938952c5c78..f19bf3dc2bd6 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -819,9 +819,8 @@ EXPORT_SYMBOL(skb_copy_and_csum_datagram_msg);
/**
* datagram_poll - generic datagram poll
- * @file: file struct
* @sock: socket
- * @wait: poll table
+ * @events to wait for
*
* Datagram poll: Again totally generic. This also handles
* sequenced packet sockets providing the socket receive queue
@@ -831,14 +830,10 @@ EXPORT_SYMBOL(skb_copy_and_csum_datagram_msg);
* and you use a different write policy from sock_writeable()
* then please supply your own write_space callback.
*/
-__poll_t datagram_poll(struct file *file, struct socket *sock,
- poll_table *wait)
+__poll_t datagram_poll_mask(struct socket *sock, __poll_t events)
{
struct sock *sk = sock->sk;
- __poll_t mask;
-
- sock_poll_wait(file, sk_sleep(sk), wait);
- mask = 0;
+ __poll_t mask = 0;
/* exceptional events? */
if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
@@ -871,4 +866,4 @@ __poll_t datagram_poll(struct file *file, struct socket *sock,
return mask;
}
-EXPORT_SYMBOL(datagram_poll);
+EXPORT_SYMBOL(datagram_poll_mask);
diff --git a/net/core/dev.c b/net/core/dev.c
index af0558b00c6c..983b277a1229 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2124,7 +2124,7 @@ static bool remove_xps_queue_cpu(struct net_device *dev,
int i, j;
for (i = count, j = offset; i--; j++) {
- if (!remove_xps_queue(dev_maps, cpu, j))
+ if (!remove_xps_queue(dev_maps, tci, j))
break;
}
@@ -2884,11 +2884,7 @@ void netdev_rx_csum_fault(struct net_device *dev)
EXPORT_SYMBOL(netdev_rx_csum_fault);
#endif
-/* Actually, we should eliminate this check as soon as we know, that:
- * 1. IOMMU is present and allows to map all the memory.
- * 2. No high memory really exists on this machine.
- */
-
+/* XXX: check that highmem exists at all on the given machine. */
static int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
{
#ifdef CONFIG_HIGHMEM
@@ -2902,20 +2898,6 @@ static int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
return 1;
}
}
-
- if (PCI_DMA_BUS_IS_PHYS) {
- struct device *pdev = dev->dev.parent;
-
- if (!pdev)
- return 0;
- for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
- skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
- dma_addr_t addr = page_to_phys(skb_frag_page(frag));
-
- if (!pdev->dma_mask || addr + PAGE_SIZE - 1 > *pdev->dma_mask)
- return 1;
- }
- }
#endif
return 0;
}
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 03416e6dd5d7..ba02f0dfe85c 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -1032,6 +1032,11 @@ static noinline_for_stack int ethtool_get_rxnfc(struct net_device *dev,
info_size = sizeof(info);
if (copy_from_user(&info, useraddr, info_size))
return -EFAULT;
+ /* Since malicious users may modify the original data,
+ * we need to check whether FLOW_RSS is still requested.
+ */
+ if (!(info.flow_type & FLOW_RSS))
+ return -EINVAL;
}
if (info.cmd == ETHTOOL_GRXCLSRLALL) {
diff --git a/net/core/filter.c b/net/core/filter.c
index d31aff93270d..201ff36b17a8 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -481,11 +481,18 @@ do_pass:
#define BPF_EMIT_JMP \
do { \
+ const s32 off_min = S16_MIN, off_max = S16_MAX; \
+ s32 off; \
+ \
if (target >= len || target < 0) \
goto err; \
- insn->off = addrs ? addrs[target] - addrs[i] - 1 : 0; \
+ off = addrs ? addrs[target] - addrs[i] - 1 : 0; \
/* Adjust pc relative offset for 2nd or 3rd insn. */ \
- insn->off -= insn - tmp_insns; \
+ off -= insn - tmp_insns; \
+ /* Reject anything not fitting into insn->off. */ \
+ if (off < off_min || off > off_max) \
+ goto err; \
+ insn->off = off; \
} while (0)
case BPF_JMP | BPF_JA:
@@ -3240,6 +3247,7 @@ BPF_CALL_4(bpf_skb_set_tunnel_key, struct sk_buff *, skb,
skb_dst_set(skb, (struct dst_entry *) md);
info = &md->u.tun_info;
+ memset(info, 0, sizeof(*info));
info->mode = IP_TUNNEL_INFO_TX;
info->key.tun_flags = TUNNEL_KEY | TUNNEL_CSUM | TUNNEL_NOCACHE;
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index ce519861be59..1fb43bff417d 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -59,7 +59,7 @@ static int pneigh_ifdown_and_unlock(struct neigh_table *tbl,
struct net_device *dev);
#ifdef CONFIG_PROC_FS
-static const struct file_operations neigh_stat_seq_fops;
+static const struct seq_operations neigh_stat_seq_ops;
#endif
/*
@@ -1558,8 +1558,8 @@ void neigh_table_init(int index, struct neigh_table *tbl)
panic("cannot create neighbour cache statistics");
#ifdef CONFIG_PROC_FS
- if (!proc_create_data(tbl->id, 0, init_net.proc_net_stat,
- &neigh_stat_seq_fops, tbl))
+ if (!proc_create_seq_data(tbl->id, 0, init_net.proc_net_stat,
+ &neigh_stat_seq_ops, tbl))
panic("cannot create neighbour proc dir entry");
#endif
@@ -2786,7 +2786,7 @@ EXPORT_SYMBOL(neigh_seq_stop);
static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos)
{
- struct neigh_table *tbl = seq->private;
+ struct neigh_table *tbl = PDE_DATA(file_inode(seq->file));
int cpu;
if (*pos == 0)
@@ -2803,7 +2803,7 @@ static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos)
static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
- struct neigh_table *tbl = seq->private;
+ struct neigh_table *tbl = PDE_DATA(file_inode(seq->file));
int cpu;
for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
@@ -2822,7 +2822,7 @@ static void neigh_stat_seq_stop(struct seq_file *seq, void *v)
static int neigh_stat_seq_show(struct seq_file *seq, void *v)
{
- struct neigh_table *tbl = seq->private;
+ struct neigh_table *tbl = PDE_DATA(file_inode(seq->file));
struct neigh_statistics *st = v;
if (v == SEQ_START_TOKEN) {
@@ -2861,25 +2861,6 @@ static const struct seq_operations neigh_stat_seq_ops = {
.stop = neigh_stat_seq_stop,
.show = neigh_stat_seq_show,
};
-
-static int neigh_stat_seq_open(struct inode *inode, struct file *file)
-{
- int ret = seq_open(file, &neigh_stat_seq_ops);
-
- if (!ret) {
- struct seq_file *sf = file->private_data;
- sf->private = PDE_DATA(inode);
- }
- return ret;
-};
-
-static const struct file_operations neigh_stat_seq_fops = {
- .open = neigh_stat_seq_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
#endif /* CONFIG_PROC_FS */
static inline size_t neigh_nlmsg_size(void)
diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c
index 9737302907b1..63881f72ef71 100644
--- a/net/core/net-procfs.c
+++ b/net/core/net-procfs.c
@@ -175,19 +175,6 @@ static const struct seq_operations dev_seq_ops = {
.show = dev_seq_show,
};
-static int dev_seq_open(struct inode *inode, struct file *file)
-{
- return seq_open_net(inode, file, &dev_seq_ops,
- sizeof(struct seq_net_private));
-}
-
-static const struct file_operations dev_seq_fops = {
- .open = dev_seq_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release_net,
-};
-
static const struct seq_operations softnet_seq_ops = {
.start = softnet_seq_start,
.next = softnet_seq_next,
@@ -195,18 +182,6 @@ static const struct seq_operations softnet_seq_ops = {
.show = softnet_seq_show,
};
-static int softnet_seq_open(struct inode *inode, struct file *file)
-{
- return seq_open(file, &softnet_seq_ops);
-}
-
-static const struct file_operations softnet_seq_fops = {
- .open = softnet_seq_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
static void *ptype_get_idx(loff_t pos)
{
struct packet_type *pt = NULL;
@@ -297,30 +272,18 @@ static const struct seq_operations ptype_seq_ops = {
.show = ptype_seq_show,
};
-static int ptype_seq_open(struct inode *inode, struct file *file)
-{
- return seq_open_net(inode, file, &ptype_seq_ops,
- sizeof(struct seq_net_private));
-}
-
-static const struct file_operations ptype_seq_fops = {
- .open = ptype_seq_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release_net,
-};
-
-
static int __net_init dev_proc_net_init(struct net *net)
{
int rc = -ENOMEM;
- if (!proc_create("dev", 0444, net->proc_net, &dev_seq_fops))
+ if (!proc_create_net("dev", 0444, net->proc_net, &dev_seq_ops,
+ sizeof(struct seq_net_private)))
goto out;
- if (!proc_create("softnet_stat", 0444, net->proc_net,
- &softnet_seq_fops))
+ if (!proc_create_seq("softnet_stat", 0444, net->proc_net,
+ &softnet_seq_ops))
goto out_dev;
- if (!proc_create("ptype", 0444, net->proc_net, &ptype_seq_fops))
+ if (!proc_create_net("ptype", 0444, net->proc_net, &ptype_seq_ops,
+ sizeof(struct seq_net_private)))
goto out_softnet;
if (wext_proc_init(net))
@@ -377,22 +340,10 @@ static const struct seq_operations dev_mc_seq_ops = {
.show = dev_mc_seq_show,
};
-static int dev_mc_seq_open(struct inode *inode, struct file *file)
-{
- return seq_open_net(inode, file, &dev_mc_seq_ops,
- sizeof(struct seq_net_private));
-}
-
-static const struct file_operations dev_mc_seq_fops = {
- .open = dev_mc_seq_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release_net,
-};
-
static int __net_init dev_mc_net_init(struct net *net)
{
- if (!proc_create("dev_mcast", 0, net->proc_net, &dev_mc_seq_fops))
+ if (!proc_create_net("dev_mcast", 0, net->proc_net, &dev_mc_seq_ops,
+ sizeof(struct seq_net_private)))
return -ENOMEM;
return 0;
}
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index c476f0794132..bb7e80f4ced3 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -1214,9 +1214,6 @@ static ssize_t xps_cpus_show(struct netdev_queue *queue,
cpumask_var_t mask;
unsigned long index;
- if (!zalloc_cpumask_var(&mask, GFP_KERNEL))
- return -ENOMEM;
-
index = get_netdev_queue_index(queue);
if (dev->num_tc) {
@@ -1226,6 +1223,9 @@ static ssize_t xps_cpus_show(struct netdev_queue *queue,
return -EINVAL;
}
+ if (!zalloc_cpumask_var(&mask, GFP_KERNEL))
+ return -ENOMEM;
+
rcu_read_lock();
dev_maps = rcu_dereference(dev->xps_maps);
if (dev_maps) {
diff --git a/net/core/sock.c b/net/core/sock.c
index 6444525f610c..2aed99a541d5 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1606,7 +1606,7 @@ static void __sk_free(struct sock *sk)
if (likely(sk->sk_net_refcnt))
sock_inuse_add(sock_net(sk), -1);
- if (unlikely(sock_diag_has_destroy_listeners(sk) && sk->sk_net_refcnt))
+ if (unlikely(sk->sk_net_refcnt && sock_diag_has_destroy_listeners(sk)))
sock_diag_broadcast_destroy(sk);
else
sk_destruct(sk);
@@ -2567,12 +2567,6 @@ int sock_no_getname(struct socket *sock, struct sockaddr *saddr,
}
EXPORT_SYMBOL(sock_no_getname);
-__poll_t sock_no_poll(struct file *file, struct socket *sock, poll_table *pt)
-{
- return 0;
-}
-EXPORT_SYMBOL(sock_no_poll);
-
int sock_no_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
{
return -EOPNOTSUPP;
@@ -3439,22 +3433,10 @@ static const struct seq_operations proto_seq_ops = {
.show = proto_seq_show,
};
-static int proto_seq_open(struct inode *inode, struct file *file)
-{
- return seq_open_net(inode, file, &proto_seq_ops,
- sizeof(struct seq_net_private));
-}
-
-static const struct file_operations proto_seq_fops = {
- .open = proto_seq_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release_net,
-};
-
static __net_init int proto_init_net(struct net *net)
{
- if (!proc_create("protocols", 0444, net->proc_net, &proto_seq_fops))
+ if (!proc_create_net("protocols", 0444, net->proc_net, &proto_seq_ops,
+ sizeof(struct seq_net_private)))
return -ENOMEM;
return 0;
diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c
index 92d016e87816..385f153fe031 100644
--- a/net/dccp/ccids/ccid2.c
+++ b/net/dccp/ccids/ccid2.c
@@ -126,6 +126,16 @@ static void ccid2_change_l_seq_window(struct sock *sk, u64 val)
DCCPF_SEQ_WMAX));
}
+static void dccp_tasklet_schedule(struct sock *sk)
+{
+ struct tasklet_struct *t = &dccp_sk(sk)->dccps_xmitlet;
+
+ if (!test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) {
+ sock_hold(sk);
+ __tasklet_schedule(t);
+ }
+}
+
static void ccid2_hc_tx_rto_expire(struct timer_list *t)
{
struct ccid2_hc_tx_sock *hc = from_timer(hc, t, tx_rtotimer);
@@ -166,7 +176,7 @@ static void ccid2_hc_tx_rto_expire(struct timer_list *t)
/* if we were blocked before, we may now send cwnd=1 packet */
if (sender_was_blocked)
- tasklet_schedule(&dccp_sk(sk)->dccps_xmitlet);
+ dccp_tasklet_schedule(sk);
/* restart backed-off timer */
sk_reset_timer(sk, &hc->tx_rtotimer, jiffies + hc->tx_rto);
out:
@@ -706,7 +716,7 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
done:
/* check if incoming Acks allow pending packets to be sent */
if (sender_was_blocked && !ccid2_cwnd_network_limited(hc))
- tasklet_schedule(&dccp_sk(sk)->dccps_xmitlet);
+ dccp_tasklet_schedule(sk);
dccp_ackvec_parsed_cleanup(&hc->tx_av_chunks);
}
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index f91e3816806b..0ea2ee56ac1b 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -316,8 +316,7 @@ int dccp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
int flags, int *addr_len);
void dccp_shutdown(struct sock *sk, int how);
int inet_dccp_listen(struct socket *sock, int backlog);
-__poll_t dccp_poll(struct file *file, struct socket *sock,
- poll_table *wait);
+__poll_t dccp_poll_mask(struct socket *sock, __poll_t events);
int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len);
void dccp_req_err(struct sock *sk, u64 seq);
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index b08feb219b44..a9e478cd3787 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -984,7 +984,7 @@ static const struct proto_ops inet_dccp_ops = {
.accept = inet_accept,
.getname = inet_getname,
/* FIXME: work on tcp_poll to rename it to inet_csk_poll */
- .poll = dccp_poll,
+ .poll_mask = dccp_poll_mask,
.ioctl = inet_ioctl,
/* FIXME: work on inet_listen to rename it to sock_common_listen */
.listen = inet_dccp_listen,
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 6344f1b18a6a..17fc4e0166ba 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -1070,7 +1070,7 @@ static const struct proto_ops inet6_dccp_ops = {
.socketpair = sock_no_socketpair,
.accept = inet_accept,
.getname = inet6_getname,
- .poll = dccp_poll,
+ .poll_mask = dccp_poll_mask,
.ioctl = inet6_ioctl,
.listen = inet_dccp_listen,
.shutdown = inet_shutdown,
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 84cd4e3fd01b..ca21c1c76da0 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -283,9 +283,7 @@ int dccp_disconnect(struct sock *sk, int flags)
dccp_clear_xmit_timers(sk);
ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
- ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
dp->dccps_hc_rx_ccid = NULL;
- dp->dccps_hc_tx_ccid = NULL;
__skb_queue_purge(&sk->sk_receive_queue);
__skb_queue_purge(&sk->sk_write_queue);
@@ -314,20 +312,11 @@ int dccp_disconnect(struct sock *sk, int flags)
EXPORT_SYMBOL_GPL(dccp_disconnect);
-/*
- * Wait for a DCCP event.
- *
- * Note that we don't need to lock the socket, as the upper poll layers
- * take care of normal races (between the test and the event) and we don't
- * go look at any of the socket buffers directly.
- */
-__poll_t dccp_poll(struct file *file, struct socket *sock,
- poll_table *wait)
+__poll_t dccp_poll_mask(struct socket *sock, __poll_t events)
{
__poll_t mask;
struct sock *sk = sock->sk;
- sock_poll_wait(file, sk_sleep(sk), wait);
if (sk->sk_state == DCCP_LISTEN)
return inet_csk_listen_poll(sk);
@@ -369,7 +358,7 @@ __poll_t dccp_poll(struct file *file, struct socket *sock,
return mask;
}
-EXPORT_SYMBOL_GPL(dccp_poll);
+EXPORT_SYMBOL_GPL(dccp_poll_mask);
int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
{
diff --git a/net/dccp/timer.c b/net/dccp/timer.c
index b50a8732ff43..1501a20a94ca 100644
--- a/net/dccp/timer.c
+++ b/net/dccp/timer.c
@@ -232,6 +232,7 @@ static void dccp_write_xmitlet(unsigned long data)
else
dccp_write_xmit(sk);
bh_unlock_sock(sk);
+ sock_put(sk);
}
static void dccp_write_xmit_timer(struct timer_list *t)
@@ -240,7 +241,6 @@ static void dccp_write_xmit_timer(struct timer_list *t)
struct sock *sk = &dp->dccps_inet_connection.icsk_inet.sk;
dccp_write_xmitlet((unsigned long)sk);
- sock_put(sk);
}
void dccp_init_xmit_timers(struct sock *sk)
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index 32751602767f..9a686d890bfa 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -1207,11 +1207,11 @@ static int dn_getname(struct socket *sock, struct sockaddr *uaddr,int peer)
}
-static __poll_t dn_poll(struct file *file, struct socket *sock, poll_table *wait)
+static __poll_t dn_poll_mask(struct socket *sock, __poll_t events)
{
struct sock *sk = sock->sk;
struct dn_scp *scp = DN_SK(sk);
- __poll_t mask = datagram_poll(file, sock, wait);
+ __poll_t mask = datagram_poll_mask(sock, events);
if (!skb_queue_empty(&scp->other_receive_queue))
mask |= EPOLLRDBAND;
@@ -2314,19 +2314,6 @@ static const struct seq_operations dn_socket_seq_ops = {
.stop = dn_socket_seq_stop,
.show = dn_socket_seq_show,
};
-
-static int dn_socket_seq_open(struct inode *inode, struct file *file)
-{
- return seq_open_private(file, &dn_socket_seq_ops,
- sizeof(struct dn_iter_state));
-}
-
-static const struct file_operations dn_socket_seq_fops = {
- .open = dn_socket_seq_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release_private,
-};
#endif
static const struct net_proto_family dn_family_ops = {
@@ -2344,7 +2331,7 @@ static const struct proto_ops dn_proto_ops = {
.socketpair = sock_no_socketpair,
.accept = dn_accept,
.getname = dn_getname,
- .poll = dn_poll,
+ .poll_mask = dn_poll_mask,
.ioctl = dn_ioctl,
.listen = dn_listen,
.shutdown = dn_shutdown,
@@ -2383,7 +2370,9 @@ static int __init decnet_init(void)
dev_add_pack(&dn_dix_packet_type);
register_netdevice_notifier(&dn_dev_notifier);
- proc_create("decnet", 0444, init_net.proc_net, &dn_socket_seq_fops);
+ proc_create_seq_private("decnet", 0444, init_net.proc_net,
+ &dn_socket_seq_ops, sizeof(struct dn_iter_state),
+ NULL);
dn_register_sysctl();
out:
return rc;
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c
index c03b046478c3..bfd43e8f2c06 100644
--- a/net/decnet/dn_dev.c
+++ b/net/decnet/dn_dev.c
@@ -1382,19 +1382,6 @@ static const struct seq_operations dn_dev_seq_ops = {
.stop = dn_dev_seq_stop,
.show = dn_dev_seq_show,
};
-
-static int dn_dev_seq_open(struct inode *inode, struct file *file)
-{
- return seq_open(file, &dn_dev_seq_ops);
-}
-
-static const struct file_operations dn_dev_seq_fops = {
- .open = dn_dev_seq_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
#endif /* CONFIG_PROC_FS */
static int addr[2];
@@ -1424,7 +1411,7 @@ void __init dn_dev_init(void)
rtnl_register_module(THIS_MODULE, PF_DECnet, RTM_GETADDR,
NULL, dn_nl_dump_ifaddr, 0);
- proc_create("decnet_dev", 0444, init_net.proc_net, &dn_dev_seq_fops);
+ proc_create_seq("decnet_dev", 0444, init_net.proc_net, &dn_dev_seq_ops);
#ifdef CONFIG_SYSCTL
{
diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c
index 13156165afa3..94b306f6d551 100644
--- a/net/decnet/dn_neigh.c
+++ b/net/decnet/dn_neigh.c
@@ -589,27 +589,13 @@ static const struct seq_operations dn_neigh_seq_ops = {
.stop = neigh_seq_stop,
.show = dn_neigh_seq_show,
};
-
-static int dn_neigh_seq_open(struct inode *inode, struct file *file)
-{
- return seq_open_net(inode, file, &dn_neigh_seq_ops,
- sizeof(struct neigh_seq_state));
-}
-
-static const struct file_operations dn_neigh_seq_fops = {
- .open = dn_neigh_seq_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release_net,
-};
-
#endif
void __init dn_neigh_init(void)
{
neigh_table_init(NEIGH_DN_TABLE, &dn_neigh_table);
- proc_create("decnet_neigh", 0444, init_net.proc_net,
- &dn_neigh_seq_fops);
+ proc_create_net("decnet_neigh", 0444, init_net.proc_net,
+ &dn_neigh_seq_ops, sizeof(struct neigh_seq_state));
}
void __exit dn_neigh_cleanup(void)
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index eca0cc6b761f..e74765024d88 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -1852,20 +1852,6 @@ static const struct seq_operations dn_rt_cache_seq_ops = {
.stop = dn_rt_cache_seq_stop,
.show = dn_rt_cache_seq_show,
};
-
-static int dn_rt_cache_seq_open(struct inode *inode, struct file *file)
-{
- return seq_open_private(file, &dn_rt_cache_seq_ops,
- sizeof(struct dn_rt_cache_iter_state));
-}
-
-static const struct file_operations dn_rt_cache_seq_fops = {
- .open = dn_rt_cache_seq_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release_private,
-};
-
#endif /* CONFIG_PROC_FS */
void __init dn_route_init(void)
@@ -1918,8 +1904,9 @@ void __init dn_route_init(void)
dn_dst_ops.gc_thresh = (dn_rt_hash_mask + 1);
- proc_create("decnet_cache", 0444, init_net.proc_net,
- &dn_rt_cache_seq_fops);
+ proc_create_seq_private("decnet_cache", 0444, init_net.proc_net,
+ &dn_rt_cache_seq_ops,
+ sizeof(struct dn_rt_cache_iter_state), NULL);
#ifdef CONFIG_DECNET_ROUTER
rtnl_register_module(THIS_MODULE, PF_DECnet, RTM_GETROUTE,
diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c
index adf50fbc4c13..47725250b4ca 100644
--- a/net/dsa/dsa2.c
+++ b/net/dsa/dsa2.c
@@ -258,11 +258,13 @@ static void dsa_tree_teardown_default_cpu(struct dsa_switch_tree *dst)
static int dsa_port_setup(struct dsa_port *dp)
{
struct dsa_switch *ds = dp->ds;
- int err;
+ int err = 0;
memset(&dp->devlink_port, 0, sizeof(dp->devlink_port));
- err = devlink_port_register(ds->devlink, &dp->devlink_port, dp->index);
+ if (dp->type != DSA_PORT_TYPE_UNUSED)
+ err = devlink_port_register(ds->devlink, &dp->devlink_port,
+ dp->index);
if (err)
return err;
@@ -293,7 +295,8 @@ static int dsa_port_setup(struct dsa_port *dp)
static void dsa_port_teardown(struct dsa_port *dp)
{
- devlink_port_unregister(&dp->devlink_port);
+ if (dp->type != DSA_PORT_TYPE_UNUSED)
+ devlink_port_unregister(&dp->devlink_port);
switch (dp->type) {
case DSA_PORT_TYPE_UNUSED:
diff --git a/net/ieee802154/6lowpan/6lowpan_i.h b/net/ieee802154/6lowpan/6lowpan_i.h
index b8d95cb71c25..44a7e16bf3b5 100644
--- a/net/ieee802154/6lowpan/6lowpan_i.h
+++ b/net/ieee802154/6lowpan/6lowpan_i.h
@@ -20,8 +20,8 @@ typedef unsigned __bitwise lowpan_rx_result;
struct frag_lowpan_compare_key {
u16 tag;
u16 d_size;
- const struct ieee802154_addr src;
- const struct ieee802154_addr dst;
+ struct ieee802154_addr src;
+ struct ieee802154_addr dst;
};
/* Equivalent of ipv4 struct ipq
diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c
index 1790b65944b3..2cc224106b69 100644
--- a/net/ieee802154/6lowpan/reassembly.c
+++ b/net/ieee802154/6lowpan/reassembly.c
@@ -75,14 +75,14 @@ fq_find(struct net *net, const struct lowpan_802154_cb *cb,
{
struct netns_ieee802154_lowpan *ieee802154_lowpan =
net_ieee802154_lowpan(net);
- struct frag_lowpan_compare_key key = {
- .tag = cb->d_tag,
- .d_size = cb->d_size,
- .src = *src,
- .dst = *dst,
- };
+ struct frag_lowpan_compare_key key = {};
struct inet_frag_queue *q;
+ key.tag = cb->d_tag;
+ key.d_size = cb->d_size;
+ key.src = *src;
+ key.dst = *dst;
+
q = inet_frag_find(&ieee802154_lowpan->frags, &key);
if (!q)
return NULL;
@@ -372,7 +372,7 @@ int lowpan_frag_rcv(struct sk_buff *skb, u8 frag_type)
struct lowpan_frag_queue *fq;
struct net *net = dev_net(skb->dev);
struct lowpan_802154_cb *cb = lowpan_802154_cb(skb);
- struct ieee802154_hdr hdr;
+ struct ieee802154_hdr hdr = {};
int err;
if (ieee802154_hdr_peek_addrs(skb, &hdr) < 0)
diff --git a/net/ieee802154/socket.c b/net/ieee802154/socket.c
index a60658c85a9a..a0768d2759b8 100644
--- a/net/ieee802154/socket.c
+++ b/net/ieee802154/socket.c
@@ -423,7 +423,7 @@ static const struct proto_ops ieee802154_raw_ops = {
.socketpair = sock_no_socketpair,
.accept = sock_no_accept,
.getname = sock_no_getname,
- .poll = datagram_poll,
+ .poll_mask = datagram_poll_mask,
.ioctl = ieee802154_sock_ioctl,
.listen = sock_no_listen,
.shutdown = sock_no_shutdown,
@@ -969,7 +969,7 @@ static const struct proto_ops ieee802154_dgram_ops = {
.socketpair = sock_no_socketpair,
.accept = sock_no_accept,
.getname = sock_no_getname,
- .poll = datagram_poll,
+ .poll_mask = datagram_poll_mask,
.ioctl = ieee802154_sock_ioctl,
.listen = sock_no_listen,
.shutdown = sock_no_shutdown,
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index eaed0367e669..8a59428e63ab 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -986,7 +986,7 @@ const struct proto_ops inet_stream_ops = {
.socketpair = sock_no_socketpair,
.accept = inet_accept,
.getname = inet_getname,
- .poll = tcp_poll,
+ .poll_mask = tcp_poll_mask,
.ioctl = inet_ioctl,
.listen = inet_listen,
.shutdown = inet_shutdown,
@@ -1018,7 +1018,7 @@ const struct proto_ops inet_dgram_ops = {
.socketpair = sock_no_socketpair,
.accept = sock_no_accept,
.getname = inet_getname,
- .poll = udp_poll,
+ .poll_mask = udp_poll_mask,
.ioctl = inet_ioctl,
.listen = sock_no_listen,
.shutdown = inet_shutdown,
@@ -1039,7 +1039,7 @@ EXPORT_SYMBOL(inet_dgram_ops);
/*
* For SOCK_RAW sockets; should be the same as inet_dgram_ops but without
- * udp_poll
+ * udp_poll_mask
*/
static const struct proto_ops inet_sockraw_ops = {
.family = PF_INET,
@@ -1050,7 +1050,7 @@ static const struct proto_ops inet_sockraw_ops = {
.socketpair = sock_no_socketpair,
.accept = sock_no_accept,
.getname = inet_getname,
- .poll = datagram_poll,
+ .poll_mask = datagram_poll_mask,
.ioctl = inet_ioctl,
.listen = sock_no_listen,
.shutdown = inet_shutdown,
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index bf6c2d4d4fdc..e90c89ef8c08 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -1418,23 +1418,12 @@ static const struct seq_operations arp_seq_ops = {
.show = arp_seq_show,
};
-static int arp_seq_open(struct inode *inode, struct file *file)
-{
- return seq_open_net(inode, file, &arp_seq_ops,
- sizeof(struct neigh_seq_state));
-}
-
-static const struct file_operations arp_seq_fops = {
- .open = arp_seq_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release_net,
-};
-
+/* ------------------------------------------------------------------------ */
static int __net_init arp_net_init(struct net *net)
{
- if (!proc_create("arp", 0444, net->proc_net, &arp_seq_fops))
+ if (!proc_create_net("arp", 0444, net->proc_net, &arp_seq_ops,
+ sizeof(struct neigh_seq_state)))
return -ENOMEM;
return 0;
}
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index f05afaf3235c..e66172aaf241 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -326,10 +326,11 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
u8 tos, int oif, struct net_device *dev,
int rpf, struct in_device *idev, u32 *itag)
{
+ struct net *net = dev_net(dev);
+ struct flow_keys flkeys;
int ret, no_addr;
struct fib_result res;
struct flowi4 fl4;
- struct net *net = dev_net(dev);
bool dev_match;
fl4.flowi4_oif = 0;
@@ -347,6 +348,11 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
no_addr = idev->ifa_list == NULL;
fl4.flowi4_mark = IN_DEV_SRC_VMARK(idev) ? skb->mark : 0;
+ if (!fib4_rules_early_flow_dissect(net, skb, &fl4, &flkeys)) {
+ fl4.flowi4_proto = 0;
+ fl4.fl4_sport = 0;
+ fl4.fl4_dport = 0;
+ }
trace_fib_validate_source(dev, &fl4);
@@ -643,6 +649,7 @@ const struct nla_policy rtm_ipv4_policy[RTA_MAX + 1] = {
[RTA_ENCAP] = { .type = NLA_NESTED },
[RTA_UID] = { .type = NLA_U32 },
[RTA_MARK] = { .type = NLA_U32 },
+ [RTA_TABLE] = { .type = NLA_U32 },
};
static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 3dcffd3ce98c..99c23a0cb8ca 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -2348,18 +2348,6 @@ static int fib_triestat_seq_show(struct seq_file *seq, void *v)
return 0;
}
-static int fib_triestat_seq_open(struct inode *inode, struct file *file)
-{
- return single_open_net(inode, file, fib_triestat_seq_show);
-}
-
-static const struct file_operations fib_triestat_fops = {
- .open = fib_triestat_seq_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release_net,
-};
-
static struct key_vector *fib_trie_get_idx(struct seq_file *seq, loff_t pos)
{
struct fib_trie_iter *iter = seq->private;
@@ -2533,19 +2521,6 @@ static const struct seq_operations fib_trie_seq_ops = {
.show = fib_trie_seq_show,
};
-static int fib_trie_seq_open(struct inode *inode, struct file *file)
-{
- return seq_open_net(inode, file, &fib_trie_seq_ops,
- sizeof(struct fib_trie_iter));
-}
-
-static const struct file_operations fib_trie_fops = {
- .open = fib_trie_seq_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release_net,
-};
-
struct fib_route_iter {
struct seq_net_private p;
struct fib_table *main_tb;
@@ -2726,29 +2701,18 @@ static const struct seq_operations fib_route_seq_ops = {
.show = fib_route_seq_show,
};
-static int fib_route_seq_open(struct inode *inode, struct file *file)
-{
- return seq_open_net(inode, file, &fib_route_seq_ops,
- sizeof(struct fib_route_iter));
-}
-
-static const struct file_operations fib_route_fops = {
- .open = fib_route_seq_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release_net,
-};
-
int __net_init fib_proc_init(struct net *net)
{
- if (!proc_create("fib_trie", 0444, net->proc_net, &fib_trie_fops))
+ if (!proc_create_net("fib_trie", 0444, net->proc_net, &fib_trie_seq_ops,
+ sizeof(struct fib_trie_iter)))
goto out1;
- if (!proc_create("fib_triestat", 0444, net->proc_net,
- &fib_triestat_fops))
+ if (!proc_create_net_single("fib_triestat", 0444, net->proc_net,
+ fib_triestat_seq_show, NULL))
goto out2;
- if (!proc_create("route", 0444, net->proc_net, &fib_route_fops))
+ if (!proc_create_net("route", 0444, net->proc_net, &fib_route_seq_ops,
+ sizeof(struct fib_route_iter)))
goto out3;
return 0;
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index b26a81a7de42..85b617b655bc 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -2829,19 +2829,6 @@ static const struct seq_operations igmp_mc_seq_ops = {
.show = igmp_mc_seq_show,
};
-static int igmp_mc_seq_open(struct inode *inode, struct file *file)
-{
- return seq_open_net(inode, file, &igmp_mc_seq_ops,
- sizeof(struct igmp_mc_iter_state));
-}
-
-static const struct file_operations igmp_mc_seq_fops = {
- .open = igmp_mc_seq_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release_net,
-};
-
struct igmp_mcf_iter_state {
struct seq_net_private p;
struct net_device *dev;
@@ -2975,29 +2962,17 @@ static const struct seq_operations igmp_mcf_seq_ops = {
.show = igmp_mcf_seq_show,
};
-static int igmp_mcf_seq_open(struct inode *inode, struct file *file)
-{
- return seq_open_net(inode, file, &igmp_mcf_seq_ops,
- sizeof(struct igmp_mcf_iter_state));
-}
-
-static const struct file_operations igmp_mcf_seq_fops = {
- .open = igmp_mcf_seq_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release_net,
-};
-
static int __net_init igmp_net_init(struct net *net)
{
struct proc_dir_entry *pde;
int err;
- pde = proc_create("igmp", 0444, net->proc_net, &igmp_mc_seq_fops);
+ pde = proc_create_net("igmp", 0444, net->proc_net, &igmp_mc_seq_ops,
+ sizeof(struct igmp_mc_iter_state));
if (!pde)
goto out_igmp;
- pde = proc_create("mcfilter", 0444, net->proc_net,
- &igmp_mcf_seq_fops);
+ pde = proc_create_net("mcfilter", 0444, net->proc_net,
+ &igmp_mcf_seq_ops, sizeof(struct igmp_mcf_iter_state));
if (!pde)
goto out_mcfilter;
err = inet_ctl_sock_create(&net->ipv4.mc_autojoin_sk, AF_INET,
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 9c169bb2444d..f200b304f76c 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -722,10 +722,12 @@ static netdev_tx_t erspan_xmit(struct sk_buff *skb,
erspan_build_header(skb, ntohl(tunnel->parms.o_key),
tunnel->index,
truncate, true);
- else
+ else if (tunnel->erspan_ver == 2)
erspan_build_header_v2(skb, ntohl(tunnel->parms.o_key),
tunnel->dir, tunnel->hwid,
truncate, true);
+ else
+ goto free_skb;
tunnel->parms.o_flags &= ~TUNNEL_KEY;
__gre_xmit(skb, dev, &tunnel->parms.iph, htons(ETH_P_ERSPAN));
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 83c73bab2c3d..d54abc097800 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -1045,7 +1045,8 @@ alloc_new_skb:
if (copy > length)
copy = length;
- if (!(rt->dst.dev->features&NETIF_F_SG)) {
+ if (!(rt->dst.dev->features&NETIF_F_SG) &&
+ skb_tailroom(skb) >= copy) {
unsigned int off;
off = skb->len;
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 5ad2d8ed3a3f..57bbb060faaf 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -505,8 +505,6 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
int err;
int copied;
- WARN_ON_ONCE(sk->sk_family == AF_INET6);
-
err = -EAGAIN;
skb = sock_dequeue_err_skb(sk);
if (!skb)
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index 6b0e362cc99b..38d906baf1df 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -328,7 +328,7 @@ static int ip_tunnel_bind_dev(struct net_device *dev)
if (tdev) {
hlen = tdev->hard_header_len + tdev->needed_headroom;
- mtu = tdev->mtu;
+ mtu = min(tdev->mtu, IP_MAX_MTU);
}
dev->needed_headroom = t_hlen + hlen;
@@ -362,7 +362,7 @@ static struct ip_tunnel *ip_tunnel_create(struct net *net,
nt = netdev_priv(dev);
t_hlen = nt->hlen + sizeof(struct iphdr);
dev->min_mtu = ETH_MIN_MTU;
- dev->max_mtu = 0xFFF8 - dev->hard_header_len - t_hlen;
+ dev->max_mtu = IP_MAX_MTU - dev->hard_header_len - t_hlen;
ip_tunnel_add(itn, nt);
return nt;
@@ -930,7 +930,7 @@ int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
int t_hlen = tunnel->hlen + sizeof(struct iphdr);
- int max_mtu = 0xFFF8 - dev->hard_header_len - t_hlen;
+ int max_mtu = IP_MAX_MTU - dev->hard_header_len - t_hlen;
if (new_mtu < ETH_MIN_MTU)
return -EINVAL;
@@ -1107,7 +1107,7 @@ int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
mtu = ip_tunnel_bind_dev(dev);
if (tb[IFLA_MTU]) {
- unsigned int max = 0xfff8 - dev->hard_header_len - nt->hlen;
+ unsigned int max = IP_MAX_MTU - dev->hard_header_len - nt->hlen;
mtu = clamp(dev->mtu, (unsigned int)ETH_MIN_MTU,
(unsigned int)(max - sizeof(struct iphdr)));
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index 43f620feb1c4..bbcbcc113d19 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -1282,18 +1282,6 @@ static int pnp_seq_show(struct seq_file *seq, void *v)
&ic_servaddr);
return 0;
}
-
-static int pnp_seq_open(struct inode *indoe, struct file *file)
-{
- return single_open(file, pnp_seq_show, NULL);
-}
-
-static const struct file_operations pnp_seq_fops = {
- .open = pnp_seq_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
#endif /* CONFIG_PROC_FS */
/*
@@ -1369,7 +1357,7 @@ static int __init ip_auto_config(void)
unsigned int i;
#ifdef CONFIG_PROC_FS
- proc_create("pnp", 0444, init_net.proc_net, &pnp_seq_fops);
+ proc_create_single("pnp", 0444, init_net.proc_net, pnp_seq_show);
#endif /* CONFIG_PROC_FS */
if (!ic_enable)
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 2fb4de3f7f66..37c4f885ff7b 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -2828,19 +2828,6 @@ static const struct seq_operations ipmr_vif_seq_ops = {
.show = ipmr_vif_seq_show,
};
-static int ipmr_vif_open(struct inode *inode, struct file *file)
-{
- return seq_open_net(inode, file, &ipmr_vif_seq_ops,
- sizeof(struct mr_vif_iter));
-}
-
-static const struct file_operations ipmr_vif_fops = {
- .open = ipmr_vif_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release_net,
-};
-
static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
{
struct net *net = seq_file_net(seq);
@@ -2900,19 +2887,6 @@ static const struct seq_operations ipmr_mfc_seq_ops = {
.stop = mr_mfc_seq_stop,
.show = ipmr_mfc_seq_show,
};
-
-static int ipmr_mfc_open(struct inode *inode, struct file *file)
-{
- return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
- sizeof(struct mr_mfc_iter));
-}
-
-static const struct file_operations ipmr_mfc_fops = {
- .open = ipmr_mfc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release_net,
-};
#endif
#ifdef CONFIG_IP_PIMSM_V2
@@ -2977,9 +2951,11 @@ static int __net_init ipmr_net_init(struct net *net)
#ifdef CONFIG_PROC_FS
err = -ENOMEM;
- if (!proc_create("ip_mr_vif", 0, net->proc_net, &ipmr_vif_fops))
+ if (!proc_create_net("ip_mr_vif", 0, net->proc_net, &ipmr_vif_seq_ops,
+ sizeof(struct mr_vif_iter)))
goto proc_vif_fail;
- if (!proc_create("ip_mr_cache", 0, net->proc_net, &ipmr_mfc_fops))
+ if (!proc_create_net("ip_mr_cache", 0, net->proc_net, &ipmr_mfc_seq_ops,
+ sizeof(struct mr_mfc_iter)))
goto proc_cache_fail;
#endif
return 0;
diff --git a/net/ipv4/ipmr_base.c b/net/ipv4/ipmr_base.c
index 4fe97723b53f..30221701614c 100644
--- a/net/ipv4/ipmr_base.c
+++ b/net/ipv4/ipmr_base.c
@@ -43,7 +43,10 @@ mr_table_alloc(struct net *net, u32 id,
write_pnet(&mrt->net, net);
mrt->ops = *ops;
- rhltable_init(&mrt->mfc_hash, mrt->ops.rht_params);
+ if (rhltable_init(&mrt->mfc_hash, mrt->ops.rht_params)) {
+ kfree(mrt);
+ return NULL;
+ }
INIT_LIST_HEAD(&mrt->mfc_cache_list);
INIT_LIST_HEAD(&mrt->mfc_unres_queue);
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 44b308d93ec2..e85f35b89c49 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -34,6 +34,7 @@
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
MODULE_DESCRIPTION("IPv4 packet filter");
+MODULE_ALIAS("ipt_icmp");
void *ipt_alloc_initial_table(const struct xt_table *info)
{
diff --git a/net/ipv4/netfilter/ipt_rpfilter.c b/net/ipv4/netfilter/ipt_rpfilter.c
index fd01f13c896a..12843c9ef142 100644
--- a/net/ipv4/netfilter/ipt_rpfilter.c
+++ b/net/ipv4/netfilter/ipt_rpfilter.c
@@ -89,10 +89,10 @@ static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par)
return true ^ invert;
}
+ memset(&flow, 0, sizeof(flow));
flow.flowi4_iif = LOOPBACK_IFINDEX;
flow.daddr = iph->saddr;
flow.saddr = rpfilter_get_saddr(iph->daddr);
- flow.flowi4_oif = 0;
flow.flowi4_mark = info->flags & XT_RPFILTER_VALID_MARK ? skb->mark : 0;
flow.flowi4_tos = RT_TOS(iph->tos);
flow.flowi4_scope = RT_SCOPE_UNIVERSE;
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index 05e47d777009..2ed64bca54e3 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -775,8 +775,10 @@ static int ping_v4_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
ipc.addr = faddr = daddr;
if (ipc.opt && ipc.opt->opt.srr) {
- if (!daddr)
- return -EINVAL;
+ if (!daddr) {
+ err = -EINVAL;
+ goto out_free;
+ }
faddr = ipc.opt->opt.faddr;
}
tos = get_rttos(&ipc, inet);
@@ -842,6 +844,7 @@ back_from_confirm:
out:
ip_rt_put(rt);
+out_free:
if (free)
kfree(ipc.opt);
if (!err) {
@@ -1147,58 +1150,24 @@ static int ping_v4_seq_show(struct seq_file *seq, void *v)
return 0;
}
-static int ping_seq_open(struct inode *inode, struct file *file)
-{
- struct ping_seq_afinfo *afinfo = PDE_DATA(inode);
- return seq_open_net(inode, file, &afinfo->seq_ops,
- sizeof(struct ping_iter_state));
-}
-
-const struct file_operations ping_seq_fops = {
- .open = ping_seq_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release_net,
-};
-EXPORT_SYMBOL_GPL(ping_seq_fops);
-
-static struct ping_seq_afinfo ping_v4_seq_afinfo = {
- .name = "icmp",
- .family = AF_INET,
- .seq_fops = &ping_seq_fops,
- .seq_ops = {
- .start = ping_v4_seq_start,
- .show = ping_v4_seq_show,
- .next = ping_seq_next,
- .stop = ping_seq_stop,
- },
+static const struct seq_operations ping_v4_seq_ops = {
+ .start = ping_v4_seq_start,
+ .show = ping_v4_seq_show,
+ .next = ping_seq_next,
+ .stop = ping_seq_stop,
};
-int ping_proc_register(struct net *net, struct ping_seq_afinfo *afinfo)
+static int __net_init ping_v4_proc_init_net(struct net *net)
{
- struct proc_dir_entry *p;
- p = proc_create_data(afinfo->name, 0444, net->proc_net,
- afinfo->seq_fops, afinfo);
- if (!p)
+ if (!proc_create_net("icmp", 0444, net->proc_net, &ping_v4_seq_ops,
+ sizeof(struct ping_iter_state)))
return -ENOMEM;
return 0;
}
-EXPORT_SYMBOL_GPL(ping_proc_register);
-
-void ping_proc_unregister(struct net *net, struct ping_seq_afinfo *afinfo)
-{
- remove_proc_entry(afinfo->name, net->proc_net);
-}
-EXPORT_SYMBOL_GPL(ping_proc_unregister);
-
-static int __net_init ping_v4_proc_init_net(struct net *net)
-{
- return ping_proc_register(net, &ping_v4_seq_afinfo);
-}
static void __net_exit ping_v4_proc_exit_net(struct net *net)
{
- ping_proc_unregister(net, &ping_v4_seq_afinfo);
+ remove_proc_entry("icmp", net->proc_net);
}
static struct pernet_operations ping_v4_net_ops = {
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index a058de677e94..573e43c8ed87 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -77,18 +77,6 @@ static int sockstat_seq_show(struct seq_file *seq, void *v)
return 0;
}
-static int sockstat_seq_open(struct inode *inode, struct file *file)
-{
- return single_open_net(inode, file, sockstat_seq_show);
-}
-
-static const struct file_operations sockstat_seq_fops = {
- .open = sockstat_seq_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release_net,
-};
-
/* snmp items */
static const struct snmp_mib snmp4_ipstats_list[] = {
SNMP_MIB_ITEM("InReceives", IPSTATS_MIB_INPKTS),
@@ -460,20 +448,6 @@ static int snmp_seq_show(struct seq_file *seq, void *v)
return 0;
}
-static int snmp_seq_open(struct inode *inode, struct file *file)
-{
- return single_open_net(inode, file, snmp_seq_show);
-}
-
-static const struct file_operations snmp_seq_fops = {
- .open = snmp_seq_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release_net,
-};
-
-
-
/*
* Output /proc/net/netstat
*/
@@ -507,26 +481,16 @@ static int netstat_seq_show(struct seq_file *seq, void *v)
return 0;
}
-static int netstat_seq_open(struct inode *inode, struct file *file)
-{
- return single_open_net(inode, file, netstat_seq_show);
-}
-
-static const struct file_operations netstat_seq_fops = {
- .open = netstat_seq_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release_net,
-};
-
static __net_init int ip_proc_init_net(struct net *net)
{
- if (!proc_create("sockstat", 0444, net->proc_net,
- &sockstat_seq_fops))
+ if (!proc_create_net_single("sockstat", 0444, net->proc_net,
+ sockstat_seq_show, NULL))
goto out_sockstat;
- if (!proc_create("netstat", 0444, net->proc_net, &netstat_seq_fops))
+ if (!proc_create_net_single("netstat", 0444, net->proc_net,
+ netstat_seq_show, NULL))
goto out_netstat;
- if (!proc_create("snmp", 0444, net->proc_net, &snmp_seq_fops))
+ if (!proc_create_net_single("snmp", 0444, net->proc_net, snmp_seq_show,
+ NULL))
goto out_snmp;
return 0;
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 1b4d3355624a..abb3c9490c55 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -1003,11 +1003,12 @@ struct proto raw_prot = {
static struct sock *raw_get_first(struct seq_file *seq)
{
struct sock *sk;
+ struct raw_hashinfo *h = PDE_DATA(file_inode(seq->file));
struct raw_iter_state *state = raw_seq_private(seq);
for (state->bucket = 0; state->bucket < RAW_HTABLE_SIZE;
++state->bucket) {
- sk_for_each(sk, &state->h->ht[state->bucket])
+ sk_for_each(sk, &h->ht[state->bucket])
if (sock_net(sk) == seq_file_net(seq))
goto found;
}
@@ -1018,6 +1019,7 @@ found:
static struct sock *raw_get_next(struct seq_file *seq, struct sock *sk)
{
+ struct raw_hashinfo *h = PDE_DATA(file_inode(seq->file));
struct raw_iter_state *state = raw_seq_private(seq);
do {
@@ -1027,7 +1029,7 @@ try_again:
} while (sk && sock_net(sk) != seq_file_net(seq));
if (!sk && ++state->bucket < RAW_HTABLE_SIZE) {
- sk = sk_head(&state->h->ht[state->bucket]);
+ sk = sk_head(&h->ht[state->bucket]);
goto try_again;
}
return sk;
@@ -1045,9 +1047,9 @@ static struct sock *raw_get_idx(struct seq_file *seq, loff_t pos)
void *raw_seq_start(struct seq_file *seq, loff_t *pos)
{
- struct raw_iter_state *state = raw_seq_private(seq);
+ struct raw_hashinfo *h = PDE_DATA(file_inode(seq->file));
- read_lock(&state->h->lock);
+ read_lock(&h->lock);
return *pos ? raw_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
}
EXPORT_SYMBOL_GPL(raw_seq_start);
@@ -1067,9 +1069,9 @@ EXPORT_SYMBOL_GPL(raw_seq_next);
void raw_seq_stop(struct seq_file *seq, void *v)
{
- struct raw_iter_state *state = raw_seq_private(seq);
+ struct raw_hashinfo *h = PDE_DATA(file_inode(seq->file));
- read_unlock(&state->h->lock);
+ read_unlock(&h->lock);
}
EXPORT_SYMBOL_GPL(raw_seq_stop);
@@ -1110,37 +1112,10 @@ static const struct seq_operations raw_seq_ops = {
.show = raw_seq_show,
};
-int raw_seq_open(struct inode *ino, struct file *file,
- struct raw_hashinfo *h, const struct seq_operations *ops)
-{
- int err;
- struct raw_iter_state *i;
-
- err = seq_open_net(ino, file, ops, sizeof(struct raw_iter_state));
- if (err < 0)
- return err;
-
- i = raw_seq_private((struct seq_file *)file->private_data);
- i->h = h;
- return 0;
-}
-EXPORT_SYMBOL_GPL(raw_seq_open);
-
-static int raw_v4_seq_open(struct inode *inode, struct file *file)
-{
- return raw_seq_open(inode, file, &raw_v4_hashinfo, &raw_seq_ops);
-}
-
-static const struct file_operations raw_seq_fops = {
- .open = raw_v4_seq_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release_net,
-};
-
static __net_init int raw_init_net(struct net *net)
{
- if (!proc_create("raw", 0444, net->proc_net, &raw_seq_fops))
+ if (!proc_create_net_data("raw", 0444, net->proc_net, &raw_seq_ops,
+ sizeof(struct raw_iter_state), &raw_v4_hashinfo))
return -ENOMEM;
return 0;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index ccb25d80f679..75fb8864be67 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -360,18 +360,6 @@ static int rt_acct_proc_show(struct seq_file *m, void *v)
kfree(dst);
return 0;
}
-
-static int rt_acct_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, rt_acct_proc_show, NULL);
-}
-
-static const struct file_operations rt_acct_proc_fops = {
- .open = rt_acct_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
#endif
static int __net_init ip_rt_do_proc_init(struct net *net)
@@ -389,7 +377,8 @@ static int __net_init ip_rt_do_proc_init(struct net *net)
goto err2;
#ifdef CONFIG_IP_ROUTE_CLASSID
- pde = proc_create("rt_acct", 0, net->proc_net, &rt_acct_proc_fops);
+ pde = proc_create_single("rt_acct", 0, net->proc_net,
+ rt_acct_proc_show);
if (!pde)
goto err3;
#endif
@@ -709,7 +698,7 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
fnhe->fnhe_gw = gw;
fnhe->fnhe_pmtu = pmtu;
fnhe->fnhe_mtu_locked = lock;
- fnhe->fnhe_expires = expires;
+ fnhe->fnhe_expires = max(1UL, expires);
/* Exception created; mark the cached routes for the nexthop
* stale, so anyone caching it rechecks if this exception
@@ -1297,6 +1286,36 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst)
return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
}
+static void ip_del_fnhe(struct fib_nh *nh, __be32 daddr)
+{
+ struct fnhe_hash_bucket *hash;
+ struct fib_nh_exception *fnhe, __rcu **fnhe_p;
+ u32 hval = fnhe_hashfun(daddr);
+
+ spin_lock_bh(&fnhe_lock);
+
+ hash = rcu_dereference_protected(nh->nh_exceptions,
+ lockdep_is_held(&fnhe_lock));
+ hash += hval;
+
+ fnhe_p = &hash->chain;
+ fnhe = rcu_dereference_protected(*fnhe_p, lockdep_is_held(&fnhe_lock));
+ while (fnhe) {
+ if (fnhe->fnhe_daddr == daddr) {
+ rcu_assign_pointer(*fnhe_p, rcu_dereference_protected(
+ fnhe->fnhe_next, lockdep_is_held(&fnhe_lock)));
+ fnhe_flush_routes(fnhe);
+ kfree_rcu(fnhe, rcu);
+ break;
+ }
+ fnhe_p = &fnhe->fnhe_next;
+ fnhe = rcu_dereference_protected(fnhe->fnhe_next,
+ lockdep_is_held(&fnhe_lock));
+ }
+
+ spin_unlock_bh(&fnhe_lock);
+}
+
static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr)
{
struct fnhe_hash_bucket *hash = rcu_dereference(nh->nh_exceptions);
@@ -1310,8 +1329,14 @@ static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr)
for (fnhe = rcu_dereference(hash[hval].chain); fnhe;
fnhe = rcu_dereference(fnhe->fnhe_next)) {
- if (fnhe->fnhe_daddr == daddr)
+ if (fnhe->fnhe_daddr == daddr) {
+ if (fnhe->fnhe_expires &&
+ time_after(jiffies, fnhe->fnhe_expires)) {
+ ip_del_fnhe(nh, daddr);
+ break;
+ }
return fnhe;
+ }
}
return NULL;
}
@@ -1339,6 +1364,7 @@ static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe,
fnhe->fnhe_gw = 0;
fnhe->fnhe_pmtu = 0;
fnhe->fnhe_expires = 0;
+ fnhe->fnhe_mtu_locked = false;
fnhe_flush_routes(fnhe);
orig = NULL;
}
@@ -1636,36 +1662,6 @@ static void ip_handle_martian_source(struct net_device *dev,
#endif
}
-static void ip_del_fnhe(struct fib_nh *nh, __be32 daddr)
-{
- struct fnhe_hash_bucket *hash;
- struct fib_nh_exception *fnhe, __rcu **fnhe_p;
- u32 hval = fnhe_hashfun(daddr);
-
- spin_lock_bh(&fnhe_lock);
-
- hash = rcu_dereference_protected(nh->nh_exceptions,
- lockdep_is_held(&fnhe_lock));
- hash += hval;
-
- fnhe_p = &hash->chain;
- fnhe = rcu_dereference_protected(*fnhe_p, lockdep_is_held(&fnhe_lock));
- while (fnhe) {
- if (fnhe->fnhe_daddr == daddr) {
- rcu_assign_pointer(*fnhe_p, rcu_dereference_protected(
- fnhe->fnhe_next, lockdep_is_held(&fnhe_lock)));
- fnhe_flush_routes(fnhe);
- kfree_rcu(fnhe, rcu);
- break;
- }
- fnhe_p = &fnhe->fnhe_next;
- fnhe = rcu_dereference_protected(fnhe->fnhe_next,
- lockdep_is_held(&fnhe_lock));
- }
-
- spin_unlock_bh(&fnhe_lock);
-}
-
/* called in rcu_read_lock() section */
static int __mkroute_input(struct sk_buff *skb,
const struct fib_result *res,
@@ -1719,20 +1715,10 @@ static int __mkroute_input(struct sk_buff *skb,
fnhe = find_exception(&FIB_RES_NH(*res), daddr);
if (do_cache) {
- if (fnhe) {
+ if (fnhe)
rth = rcu_dereference(fnhe->fnhe_rth_input);
- if (rth && rth->dst.expires &&
- time_after(jiffies, rth->dst.expires)) {
- ip_del_fnhe(&FIB_RES_NH(*res), daddr);
- fnhe = NULL;
- } else {
- goto rt_cache;
- }
- }
-
- rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input);
-
-rt_cache:
+ else
+ rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input);
if (rt_cache_valid(rth)) {
skb_dst_set_noref(skb, &rth->dst);
goto out;
@@ -1964,8 +1950,13 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
fl4.saddr = saddr;
fl4.flowi4_uid = sock_net_uid(net, NULL);
- if (fib4_rules_early_flow_dissect(net, skb, &fl4, &_flkeys))
+ if (fib4_rules_early_flow_dissect(net, skb, &fl4, &_flkeys)) {
flkeys = &_flkeys;
+ } else {
+ fl4.flowi4_proto = 0;
+ fl4.fl4_sport = 0;
+ fl4.fl4_dport = 0;
+ }
err = fib_lookup(net, &fl4, res, 0);
if (err != 0) {
@@ -2216,39 +2207,31 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
* the loopback interface and the IP_PKTINFO ipi_ifindex will
* be set to the loopback interface as well.
*/
- fi = NULL;
+ do_cache = false;
}
fnhe = NULL;
do_cache &= fi != NULL;
- if (do_cache) {
+ if (fi) {
struct rtable __rcu **prth;
struct fib_nh *nh = &FIB_RES_NH(*res);
fnhe = find_exception(nh, fl4->daddr);
+ if (!do_cache)
+ goto add;
if (fnhe) {
prth = &fnhe->fnhe_rth_output;
- rth = rcu_dereference(*prth);
- if (rth && rth->dst.expires &&
- time_after(jiffies, rth->dst.expires)) {
- ip_del_fnhe(nh, fl4->daddr);
- fnhe = NULL;
- } else {
- goto rt_cache;
+ } else {
+ if (unlikely(fl4->flowi4_flags &
+ FLOWI_FLAG_KNOWN_NH &&
+ !(nh->nh_gw &&
+ nh->nh_scope == RT_SCOPE_LINK))) {
+ do_cache = false;
+ goto add;
}
+ prth = raw_cpu_ptr(nh->nh_pcpu_rth_output);
}
-
- if (unlikely(fl4->flowi4_flags &
- FLOWI_FLAG_KNOWN_NH &&
- !(nh->nh_gw &&
- nh->nh_scope == RT_SCOPE_LINK))) {
- do_cache = false;
- goto add;
- }
- prth = raw_cpu_ptr(nh->nh_pcpu_rth_output);
rth = rcu_dereference(*prth);
-
-rt_cache:
if (rt_cache_valid(rth) && dst_hold_safe(&rth->dst))
return rth;
}
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 9ce1c726185e..dec47e6789e7 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -494,32 +494,21 @@ static inline bool tcp_stream_is_readable(const struct tcp_sock *tp,
}
/*
- * Wait for a TCP event.
- *
- * Note that we don't need to lock the socket, as the upper poll layers
- * take care of normal races (between the test and the event) and we don't
- * go look at any of the socket buffers directly.
+ * Socket is not locked. We are protected from async events by poll logic and
+ * correct handling of state changes made by other threads is impossible in
+ * any case.
*/
-__poll_t tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
+__poll_t tcp_poll_mask(struct socket *sock, __poll_t events)
{
- __poll_t mask;
struct sock *sk = sock->sk;
const struct tcp_sock *tp = tcp_sk(sk);
+ __poll_t mask = 0;
int state;
- sock_poll_wait(file, sk_sleep(sk), wait);
-
state = inet_sk_state_load(sk);
if (state == TCP_LISTEN)
return inet_csk_listen_poll(sk);
- /* Socket is not locked. We are protected from async events
- * by poll logic and correct handling of state changes
- * made by other threads is impossible in any case.
- */
-
- mask = 0;
-
/*
* EPOLLHUP is certainly not done right. But poll() doesn't
* have a notion of HUP in just one direction, and for a
@@ -600,7 +589,7 @@ __poll_t tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
return mask;
}
-EXPORT_SYMBOL(tcp_poll);
+EXPORT_SYMBOL(tcp_poll_mask);
int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
{
@@ -697,7 +686,7 @@ static bool tcp_should_autocork(struct sock *sk, struct sk_buff *skb,
{
return skb->len < size_goal &&
sock_net(sk)->ipv4.sysctl_tcp_autocorking &&
- skb != tcp_write_queue_head(sk) &&
+ !tcp_rtx_queue_empty(sk) &&
refcount_read(&sk->sk_wmem_alloc) > skb->truesize;
}
@@ -1204,7 +1193,8 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
uarg->zerocopy = 0;
}
- if (unlikely(flags & MSG_FASTOPEN || inet_sk(sk)->defer_connect)) {
+ if (unlikely(flags & MSG_FASTOPEN || inet_sk(sk)->defer_connect) &&
+ !tp->repair) {
err = tcp_sendmsg_fastopen(sk, msg, &copied_syn, size);
if (err == -EINPROGRESS && copied_syn > 0)
goto out;
@@ -2673,7 +2663,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
case TCP_REPAIR_QUEUE:
if (!tp->repair)
err = -EPERM;
- else if (val < TCP_QUEUES_NR)
+ else if ((unsigned int)val < TCP_QUEUES_NR)
tp->repair_queue = val;
else
err = -EINVAL;
diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c
index 158d105e76da..58e2f479ffb4 100644
--- a/net/ipv4/tcp_bbr.c
+++ b/net/ipv4/tcp_bbr.c
@@ -806,7 +806,9 @@ static void bbr_update_min_rtt(struct sock *sk, const struct rate_sample *rs)
}
}
}
- bbr->idle_restart = 0;
+ /* Restart after idle ends only once we process a new S/ACK for data */
+ if (rs->delivered > 0)
+ bbr->idle_restart = 0;
}
static void bbr_update_model(struct sock *sk, const struct rate_sample *rs)
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index f70586b50838..2c970626b398 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1961,6 +1961,7 @@ EXPORT_SYMBOL(tcp_v4_destroy_sock);
*/
static void *listening_get_next(struct seq_file *seq, void *cur)
{
+ struct tcp_seq_afinfo *afinfo = PDE_DATA(file_inode(seq->file));
struct tcp_iter_state *st = seq->private;
struct net *net = seq_file_net(seq);
struct inet_listen_hashbucket *ilb;
@@ -1983,7 +1984,7 @@ get_sk:
sk_for_each_from(sk) {
if (!net_eq(sock_net(sk), net))
continue;
- if (sk->sk_family == st->family)
+ if (sk->sk_family == afinfo->family)
return sk;
}
spin_unlock(&ilb->lock);
@@ -2020,6 +2021,7 @@ static inline bool empty_bucket(const struct tcp_iter_state *st)
*/
static void *established_get_first(struct seq_file *seq)
{
+ struct tcp_seq_afinfo *afinfo = PDE_DATA(file_inode(seq->file));
struct tcp_iter_state *st = seq->private;
struct net *net = seq_file_net(seq);
void *rc = NULL;
@@ -2036,7 +2038,7 @@ static void *established_get_first(struct seq_file *seq)
spin_lock_bh(lock);
sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
- if (sk->sk_family != st->family ||
+ if (sk->sk_family != afinfo->family ||
!net_eq(sock_net(sk), net)) {
continue;
}
@@ -2051,6 +2053,7 @@ out:
static void *established_get_next(struct seq_file *seq, void *cur)
{
+ struct tcp_seq_afinfo *afinfo = PDE_DATA(file_inode(seq->file));
struct sock *sk = cur;
struct hlist_nulls_node *node;
struct tcp_iter_state *st = seq->private;
@@ -2062,7 +2065,8 @@ static void *established_get_next(struct seq_file *seq, void *cur)
sk = sk_nulls_next(sk);
sk_nulls_for_each_from(sk, node) {
- if (sk->sk_family == st->family && net_eq(sock_net(sk), net))
+ if (sk->sk_family == afinfo->family &&
+ net_eq(sock_net(sk), net))
return sk;
}
@@ -2135,7 +2139,7 @@ static void *tcp_seek_last_pos(struct seq_file *seq)
return rc;
}
-static void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
+void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
{
struct tcp_iter_state *st = seq->private;
void *rc;
@@ -2156,8 +2160,9 @@ out:
st->last_pos = *pos;
return rc;
}
+EXPORT_SYMBOL(tcp_seq_start);
-static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
struct tcp_iter_state *st = seq->private;
void *rc = NULL;
@@ -2186,8 +2191,9 @@ out:
st->last_pos = *pos;
return rc;
}
+EXPORT_SYMBOL(tcp_seq_next);
-static void tcp_seq_stop(struct seq_file *seq, void *v)
+void tcp_seq_stop(struct seq_file *seq, void *v)
{
struct tcp_iter_state *st = seq->private;
@@ -2202,47 +2208,7 @@ static void tcp_seq_stop(struct seq_file *seq, void *v)
break;
}
}
-
-int tcp_seq_open(struct inode *inode, struct file *file)
-{
- struct tcp_seq_afinfo *afinfo = PDE_DATA(inode);
- struct tcp_iter_state *s;
- int err;
-
- err = seq_open_net(inode, file, &afinfo->seq_ops,
- sizeof(struct tcp_iter_state));
- if (err < 0)
- return err;
-
- s = ((struct seq_file *)file->private_data)->private;
- s->family = afinfo->family;
- s->last_pos = 0;
- return 0;
-}
-EXPORT_SYMBOL(tcp_seq_open);
-
-int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo)
-{
- int rc = 0;
- struct proc_dir_entry *p;
-
- afinfo->seq_ops.start = tcp_seq_start;
- afinfo->seq_ops.next = tcp_seq_next;
- afinfo->seq_ops.stop = tcp_seq_stop;
-
- p = proc_create_data(afinfo->name, 0444, net->proc_net,
- afinfo->seq_fops, afinfo);
- if (!p)
- rc = -ENOMEM;
- return rc;
-}
-EXPORT_SYMBOL(tcp_proc_register);
-
-void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo)
-{
- remove_proc_entry(afinfo->name, net->proc_net);
-}
-EXPORT_SYMBOL(tcp_proc_unregister);
+EXPORT_SYMBOL(tcp_seq_stop);
static void get_openreq4(const struct request_sock *req,
struct seq_file *f, int i)
@@ -2377,30 +2343,28 @@ out:
return 0;
}
-static const struct file_operations tcp_afinfo_seq_fops = {
- .open = tcp_seq_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release_net
+static const struct seq_operations tcp4_seq_ops = {
+ .show = tcp4_seq_show,
+ .start = tcp_seq_start,
+ .next = tcp_seq_next,
+ .stop = tcp_seq_stop,
};
static struct tcp_seq_afinfo tcp4_seq_afinfo = {
- .name = "tcp",
.family = AF_INET,
- .seq_fops = &tcp_afinfo_seq_fops,
- .seq_ops = {
- .show = tcp4_seq_show,
- },
};
static int __net_init tcp4_proc_init_net(struct net *net)
{
- return tcp_proc_register(net, &tcp4_seq_afinfo);
+ if (!proc_create_net_data("tcp", 0444, net->proc_net, &tcp4_seq_ops,
+ sizeof(struct tcp_iter_state), &tcp4_seq_afinfo))
+ return -ENOMEM;
+ return 0;
}
static void __net_exit tcp4_proc_exit_net(struct net *net)
{
- tcp_proc_unregister(net, &tcp4_seq_afinfo);
+ remove_proc_entry("tcp", net->proc_net);
}
static struct pernet_operations tcp4_net_ops = {
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 383cac0ff0ec..d07e34f8e309 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2833,8 +2833,10 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs)
return -EBUSY;
if (before(TCP_SKB_CB(skb)->seq, tp->snd_una)) {
- if (before(TCP_SKB_CB(skb)->end_seq, tp->snd_una))
- BUG();
+ if (unlikely(before(TCP_SKB_CB(skb)->end_seq, tp->snd_una))) {
+ WARN_ON_ONCE(1);
+ return -EINVAL;
+ }
if (tcp_trim_head(sk, skb, tp->snd_una - TCP_SKB_CB(skb)->seq))
return -ENOMEM;
}
@@ -3342,6 +3344,7 @@ static void tcp_connect_init(struct sock *sk)
sock_reset_flag(sk, SOCK_DONE);
tp->snd_wnd = 0;
tcp_init_wl(tp, 0);
+ tcp_write_queue_purge(sk);
tp->snd_una = tp->write_seq;
tp->snd_sml = tp->write_seq;
tp->snd_up = tp->write_seq;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 24b5c59b1c53..675433eb53a8 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -401,9 +401,9 @@ static int compute_score(struct sock *sk, struct net *net,
bool dev_match = (sk->sk_bound_dev_if == dif ||
sk->sk_bound_dev_if == sdif);
- if (exact_dif && !dev_match)
+ if (!dev_match)
return -1;
- if (sk->sk_bound_dev_if && dev_match)
+ if (sk->sk_bound_dev_if)
score += 4;
}
@@ -952,8 +952,10 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
sock_tx_timestamp(sk, ipc.sockc.tsflags, &ipc.tx_flags);
if (ipc.opt && ipc.opt->opt.srr) {
- if (!daddr)
- return -EINVAL;
+ if (!daddr) {
+ err = -EINVAL;
+ goto out_free;
+ }
faddr = ipc.opt->opt.faddr;
connected = 0;
}
@@ -1074,6 +1076,7 @@ do_append_data:
out:
ip_rt_put(rt);
+out_free:
if (free)
kfree(ipc.opt);
if (!err)
@@ -2498,7 +2501,7 @@ int compat_udp_getsockopt(struct sock *sk, int level, int optname,
* udp_poll - wait for a UDP event.
* @file - file struct
* @sock - socket
- * @wait - poll table
+ * @events - events to wait for
*
* This is same as datagram poll, except for the special case of
* blocking sockets. If application is using a blocking fd
@@ -2507,23 +2510,23 @@ int compat_udp_getsockopt(struct sock *sk, int level, int optname,
* but then block when reading it. Add special case code
* to work around these arguably broken applications.
*/
-__poll_t udp_poll(struct file *file, struct socket *sock, poll_table *wait)
+__poll_t udp_poll_mask(struct socket *sock, __poll_t events)
{
- __poll_t mask = datagram_poll(file, sock, wait);
+ __poll_t mask = datagram_poll_mask(sock, events);
struct sock *sk = sock->sk;
if (!skb_queue_empty(&udp_sk(sk)->reader_queue))
mask |= EPOLLIN | EPOLLRDNORM;
/* Check for false positives due to checksum errors */
- if ((mask & EPOLLRDNORM) && !(file->f_flags & O_NONBLOCK) &&
+ if ((mask & EPOLLRDNORM) && !(sock->file->f_flags & O_NONBLOCK) &&
!(sk->sk_shutdown & RCV_SHUTDOWN) && first_packet_length(sk) == -1)
mask &= ~(EPOLLIN | EPOLLRDNORM);
return mask;
}
-EXPORT_SYMBOL(udp_poll);
+EXPORT_SYMBOL(udp_poll_mask);
int udp_abort(struct sock *sk, int err)
{
@@ -2579,12 +2582,13 @@ EXPORT_SYMBOL(udp_prot);
static struct sock *udp_get_first(struct seq_file *seq, int start)
{
struct sock *sk;
+ struct udp_seq_afinfo *afinfo = PDE_DATA(file_inode(seq->file));
struct udp_iter_state *state = seq->private;
struct net *net = seq_file_net(seq);
- for (state->bucket = start; state->bucket <= state->udp_table->mask;
+ for (state->bucket = start; state->bucket <= afinfo->udp_table->mask;
++state->bucket) {
- struct udp_hslot *hslot = &state->udp_table->hash[state->bucket];
+ struct udp_hslot *hslot = &afinfo->udp_table->hash[state->bucket];
if (hlist_empty(&hslot->head))
continue;
@@ -2593,7 +2597,7 @@ static struct sock *udp_get_first(struct seq_file *seq, int start)
sk_for_each(sk, &hslot->head) {
if (!net_eq(sock_net(sk), net))
continue;
- if (sk->sk_family == state->family)
+ if (sk->sk_family == afinfo->family)
goto found;
}
spin_unlock_bh(&hslot->lock);
@@ -2605,16 +2609,17 @@ found:
static struct sock *udp_get_next(struct seq_file *seq, struct sock *sk)
{
+ struct udp_seq_afinfo *afinfo = PDE_DATA(file_inode(seq->file));
struct udp_iter_state *state = seq->private;
struct net *net = seq_file_net(seq);
do {
sk = sk_next(sk);
- } while (sk && (!net_eq(sock_net(sk), net) || sk->sk_family != state->family));
+ } while (sk && (!net_eq(sock_net(sk), net) || sk->sk_family != afinfo->family));
if (!sk) {
- if (state->bucket <= state->udp_table->mask)
- spin_unlock_bh(&state->udp_table->hash[state->bucket].lock);
+ if (state->bucket <= afinfo->udp_table->mask)
+ spin_unlock_bh(&afinfo->udp_table->hash[state->bucket].lock);
return udp_get_first(seq, state->bucket + 1);
}
return sk;
@@ -2630,15 +2635,16 @@ static struct sock *udp_get_idx(struct seq_file *seq, loff_t pos)
return pos ? NULL : sk;
}
-static void *udp_seq_start(struct seq_file *seq, loff_t *pos)
+void *udp_seq_start(struct seq_file *seq, loff_t *pos)
{
struct udp_iter_state *state = seq->private;
state->bucket = MAX_UDP_PORTS;
return *pos ? udp_get_idx(seq, *pos-1) : SEQ_START_TOKEN;
}
+EXPORT_SYMBOL(udp_seq_start);
-static void *udp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+void *udp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
struct sock *sk;
@@ -2650,56 +2656,17 @@ static void *udp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
++*pos;
return sk;
}
+EXPORT_SYMBOL(udp_seq_next);
-static void udp_seq_stop(struct seq_file *seq, void *v)
+void udp_seq_stop(struct seq_file *seq, void *v)
{
+ struct udp_seq_afinfo *afinfo = PDE_DATA(file_inode(seq->file));
struct udp_iter_state *state = seq->private;
- if (state->bucket <= state->udp_table->mask)
- spin_unlock_bh(&state->udp_table->hash[state->bucket].lock);
+ if (state->bucket <= afinfo->udp_table->mask)
+ spin_unlock_bh(&afinfo->udp_table->hash[state->bucket].lock);
}
-
-int udp_seq_open(struct inode *inode, struct file *file)
-{
- struct udp_seq_afinfo *afinfo = PDE_DATA(inode);
- struct udp_iter_state *s;
- int err;
-
- err = seq_open_net(inode, file, &afinfo->seq_ops,
- sizeof(struct udp_iter_state));
- if (err < 0)
- return err;
-
- s = ((struct seq_file *)file->private_data)->private;
- s->family = afinfo->family;
- s->udp_table = afinfo->udp_table;
- return err;
-}
-EXPORT_SYMBOL(udp_seq_open);
-
-/* ------------------------------------------------------------------------ */
-int udp_proc_register(struct net *net, struct udp_seq_afinfo *afinfo)
-{
- struct proc_dir_entry *p;
- int rc = 0;
-
- afinfo->seq_ops.start = udp_seq_start;
- afinfo->seq_ops.next = udp_seq_next;
- afinfo->seq_ops.stop = udp_seq_stop;
-
- p = proc_create_data(afinfo->name, 0444, net->proc_net,
- afinfo->seq_fops, afinfo);
- if (!p)
- rc = -ENOMEM;
- return rc;
-}
-EXPORT_SYMBOL(udp_proc_register);
-
-void udp_proc_unregister(struct net *net, struct udp_seq_afinfo *afinfo)
-{
- remove_proc_entry(afinfo->name, net->proc_net);
-}
-EXPORT_SYMBOL(udp_proc_unregister);
+EXPORT_SYMBOL(udp_seq_stop);
/* ------------------------------------------------------------------------ */
static void udp4_format_sock(struct sock *sp, struct seq_file *f,
@@ -2739,32 +2706,30 @@ int udp4_seq_show(struct seq_file *seq, void *v)
return 0;
}
-static const struct file_operations udp_afinfo_seq_fops = {
- .open = udp_seq_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release_net
+const struct seq_operations udp_seq_ops = {
+ .start = udp_seq_start,
+ .next = udp_seq_next,
+ .stop = udp_seq_stop,
+ .show = udp4_seq_show,
};
+EXPORT_SYMBOL(udp_seq_ops);
-/* ------------------------------------------------------------------------ */
static struct udp_seq_afinfo udp4_seq_afinfo = {
- .name = "udp",
.family = AF_INET,
.udp_table = &udp_table,
- .seq_fops = &udp_afinfo_seq_fops,
- .seq_ops = {
- .show = udp4_seq_show,
- },
};
static int __net_init udp4_proc_init_net(struct net *net)
{
- return udp_proc_register(net, &udp4_seq_afinfo);
+ if (!proc_create_net_data("udp", 0444, net->proc_net, &udp_seq_ops,
+ sizeof(struct udp_iter_state), &udp4_seq_afinfo))
+ return -ENOMEM;
+ return 0;
}
static void __net_exit udp4_proc_exit_net(struct net *net)
{
- udp_proc_unregister(net, &udp4_seq_afinfo);
+ remove_proc_entry("udp", net->proc_net);
}
static struct pernet_operations udp4_net_ops = {
diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c
index f96614e9b9a5..8545457752fb 100644
--- a/net/ipv4/udplite.c
+++ b/net/ipv4/udplite.c
@@ -14,6 +14,7 @@
#define pr_fmt(fmt) "UDPLite: " fmt
#include <linux/export.h>
+#include <linux/proc_fs.h>
#include "udp_impl.h"
struct udp_table udplite_table __read_mostly;
@@ -73,32 +74,22 @@ static struct inet_protosw udplite4_protosw = {
};
#ifdef CONFIG_PROC_FS
-
-static const struct file_operations udplite_afinfo_seq_fops = {
- .open = udp_seq_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release_net
-};
-
static struct udp_seq_afinfo udplite4_seq_afinfo = {
- .name = "udplite",
.family = AF_INET,
.udp_table = &udplite_table,
- .seq_fops = &udplite_afinfo_seq_fops,
- .seq_ops = {
- .show = udp4_seq_show,
- },
};
static int __net_init udplite4_proc_init_net(struct net *net)
{
- return udp_proc_register(net, &udplite4_seq_afinfo);
+ if (!proc_create_net_data("udplite", 0444, net->proc_net, &udp_seq_ops,
+ sizeof(struct udp_iter_state), &udplite4_seq_afinfo))
+ return -ENOMEM;
+ return 0;
}
static void __net_exit udplite4_proc_exit_net(struct net *net)
{
- udp_proc_unregister(net, &udplite4_seq_afinfo);
+ remove_proc_entry("udplite", net->proc_net);
}
static struct pernet_operations udplite4_net_ops = {
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index 6794ddf0547c..11e4e80cf7e9 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -34,16 +34,15 @@ config IPV6_ROUTE_INFO
bool "IPv6: Route Information (RFC 4191) support"
depends on IPV6_ROUTER_PREF
---help---
- This is experimental support of Route Information.
+ Support of Route Information.
If unsure, say N.
config IPV6_OPTIMISTIC_DAD
bool "IPv6: Enable RFC 4429 Optimistic DAD"
---help---
- This is experimental support for optimistic Duplicate
- Address Detection. It allows for autoconfigured addresses
- to be used more quickly.
+ Support for optimistic Duplicate Address Detection. It allows for
+ autoconfigured addresses to be used more quickly.
If unsure, say N.
@@ -280,7 +279,7 @@ config IPV6_MROUTE
depends on IPV6
select IP_MROUTE_COMMON
---help---
- Experimental support for IPv6 multicast forwarding.
+ Support for IPv6 multicast forwarding.
If unsure, say N.
config IPV6_MROUTE_MULTIPLE_TABLES
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 78cef00c9596..1b5ea3379d9b 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -4254,22 +4254,10 @@ static const struct seq_operations if6_seq_ops = {
.stop = if6_seq_stop,
};
-static int if6_seq_open(struct inode *inode, struct file *file)
-{
- return seq_open_net(inode, file, &if6_seq_ops,
- sizeof(struct if6_iter_state));
-}
-
-static const struct file_operations if6_fops = {
- .open = if6_seq_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release_net,
-};
-
static int __net_init if6_proc_net_init(struct net *net)
{
- if (!proc_create("if_inet6", 0444, net->proc_net, &if6_fops))
+ if (!proc_create_net("if_inet6", 0444, net->proc_net, &if6_seq_ops,
+ sizeof(struct if6_iter_state)))
return -ENOMEM;
return 0;
}
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 8da0b513f188..d443c18b45fe 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -571,7 +571,7 @@ const struct proto_ops inet6_stream_ops = {
.socketpair = sock_no_socketpair, /* a do nothing */
.accept = inet_accept, /* ok */
.getname = inet6_getname,
- .poll = tcp_poll, /* ok */
+ .poll_mask = tcp_poll_mask, /* ok */
.ioctl = inet6_ioctl, /* must change */
.listen = inet_listen, /* ok */
.shutdown = inet_shutdown, /* ok */
@@ -601,7 +601,7 @@ const struct proto_ops inet6_dgram_ops = {
.socketpair = sock_no_socketpair, /* a do nothing */
.accept = sock_no_accept, /* a do nothing */
.getname = inet6_getname,
- .poll = udp_poll, /* ok */
+ .poll_mask = udp_poll_mask, /* ok */
.ioctl = inet6_ioctl, /* must change */
.listen = sock_no_listen, /* ok */
.shutdown = inet_shutdown, /* ok */
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index bbcabbba9bd8..ebeaf47d5c8d 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -529,22 +529,10 @@ static const struct seq_operations ac6_seq_ops = {
.show = ac6_seq_show,
};
-static int ac6_seq_open(struct inode *inode, struct file *file)
-{
- return seq_open_net(inode, file, &ac6_seq_ops,
- sizeof(struct ac6_iter_state));
-}
-
-static const struct file_operations ac6_seq_fops = {
- .open = ac6_seq_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release_net,
-};
-
int __net_init ac6_proc_init(struct net *net)
{
- if (!proc_create("anycast6", 0444, net->proc_net, &ac6_seq_fops))
+ if (!proc_create_net("anycast6", 0444, net->proc_net, &ac6_seq_ops,
+ sizeof(struct ac6_iter_state)))
return -ENOMEM;
return 0;
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index deab2db6692e..01372dd74e38 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -2209,15 +2209,6 @@ void fib6_gc_cleanup(void)
}
#ifdef CONFIG_PROC_FS
-
-struct ipv6_route_iter {
- struct seq_net_private p;
- struct fib6_walker w;
- loff_t skip;
- struct fib6_table *tbl;
- int sernum;
-};
-
static int ipv6_route_seq_show(struct seq_file *seq, void *v)
{
struct rt6_info *rt = v;
@@ -2383,17 +2374,10 @@ static void ipv6_route_seq_stop(struct seq_file *seq, void *v)
rcu_read_unlock_bh();
}
-static const struct seq_operations ipv6_route_seq_ops = {
+const struct seq_operations ipv6_route_seq_ops = {
.start = ipv6_route_seq_start,
.next = ipv6_route_seq_next,
.stop = ipv6_route_seq_stop,
.show = ipv6_route_seq_show
};
-
-int ipv6_route_open(struct inode *inode, struct file *file)
-{
- return seq_open_net(inode, file, &ipv6_route_seq_ops,
- sizeof(struct ipv6_route_iter));
-}
-
#endif /* CONFIG_PROC_FS */
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index c05c4e82a7ca..3eee7637bdfe 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -754,6 +754,10 @@ static struct ip6_flowlabel *ip6fl_get_idx(struct seq_file *seq, loff_t pos)
static void *ip6fl_seq_start(struct seq_file *seq, loff_t *pos)
__acquires(RCU)
{
+ struct ip6fl_iter_state *state = ip6fl_seq_private(seq);
+
+ state->pid_ns = proc_pid_ns(file_inode(seq->file));
+
rcu_read_lock_bh();
return *pos ? ip6fl_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
}
@@ -808,44 +812,10 @@ static const struct seq_operations ip6fl_seq_ops = {
.show = ip6fl_seq_show,
};
-static int ip6fl_seq_open(struct inode *inode, struct file *file)
-{
- struct seq_file *seq;
- struct ip6fl_iter_state *state;
- int err;
-
- err = seq_open_net(inode, file, &ip6fl_seq_ops,
- sizeof(struct ip6fl_iter_state));
-
- if (!err) {
- seq = file->private_data;
- state = ip6fl_seq_private(seq);
- rcu_read_lock();
- state->pid_ns = get_pid_ns(task_active_pid_ns(current));
- rcu_read_unlock();
- }
- return err;
-}
-
-static int ip6fl_seq_release(struct inode *inode, struct file *file)
-{
- struct seq_file *seq = file->private_data;
- struct ip6fl_iter_state *state = ip6fl_seq_private(seq);
- put_pid_ns(state->pid_ns);
- return seq_release_net(inode, file);
-}
-
-static const struct file_operations ip6fl_seq_fops = {
- .open = ip6fl_seq_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = ip6fl_seq_release,
-};
-
static int __net_init ip6_flowlabel_proc_init(struct net *net)
{
- if (!proc_create("ip6_flowlabel", 0444, net->proc_net,
- &ip6fl_seq_fops))
+ if (!proc_create_net("ip6_flowlabel", 0444, net->proc_net,
+ &ip6fl_seq_ops, sizeof(struct ip6fl_iter_state)))
return -ENOMEM;
return 0;
}
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 69727bc168cb..458de353f5d9 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -71,6 +71,7 @@ struct ip6gre_net {
struct ip6_tnl __rcu *tunnels[4][IP6_GRE_HASH_SIZE];
struct ip6_tnl __rcu *collect_md_tun;
+ struct ip6_tnl __rcu *collect_md_tun_erspan;
struct net_device *fb_tunnel_dev;
};
@@ -81,6 +82,7 @@ static int ip6gre_tunnel_init(struct net_device *dev);
static void ip6gre_tunnel_setup(struct net_device *dev);
static void ip6gre_tunnel_link(struct ip6gre_net *ign, struct ip6_tnl *t);
static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu);
+static void ip6erspan_tnl_link_config(struct ip6_tnl *t, int set_mtu);
/* Tunnel hash table */
@@ -232,7 +234,12 @@ static struct ip6_tnl *ip6gre_tunnel_lookup(struct net_device *dev,
if (cand)
return cand;
- t = rcu_dereference(ign->collect_md_tun);
+ if (gre_proto == htons(ETH_P_ERSPAN) ||
+ gre_proto == htons(ETH_P_ERSPAN2))
+ t = rcu_dereference(ign->collect_md_tun_erspan);
+ else
+ t = rcu_dereference(ign->collect_md_tun);
+
if (t && t->dev->flags & IFF_UP)
return t;
@@ -261,6 +268,31 @@ static struct ip6_tnl __rcu **__ip6gre_bucket(struct ip6gre_net *ign,
return &ign->tunnels[prio][h];
}
+static void ip6gre_tunnel_link_md(struct ip6gre_net *ign, struct ip6_tnl *t)
+{
+ if (t->parms.collect_md)
+ rcu_assign_pointer(ign->collect_md_tun, t);
+}
+
+static void ip6erspan_tunnel_link_md(struct ip6gre_net *ign, struct ip6_tnl *t)
+{
+ if (t->parms.collect_md)
+ rcu_assign_pointer(ign->collect_md_tun_erspan, t);
+}
+
+static void ip6gre_tunnel_unlink_md(struct ip6gre_net *ign, struct ip6_tnl *t)
+{
+ if (t->parms.collect_md)
+ rcu_assign_pointer(ign->collect_md_tun, NULL);
+}
+
+static void ip6erspan_tunnel_unlink_md(struct ip6gre_net *ign,
+ struct ip6_tnl *t)
+{
+ if (t->parms.collect_md)
+ rcu_assign_pointer(ign->collect_md_tun_erspan, NULL);
+}
+
static inline struct ip6_tnl __rcu **ip6gre_bucket(struct ip6gre_net *ign,
const struct ip6_tnl *t)
{
@@ -271,9 +303,6 @@ static void ip6gre_tunnel_link(struct ip6gre_net *ign, struct ip6_tnl *t)
{
struct ip6_tnl __rcu **tp = ip6gre_bucket(ign, t);
- if (t->parms.collect_md)
- rcu_assign_pointer(ign->collect_md_tun, t);
-
rcu_assign_pointer(t->next, rtnl_dereference(*tp));
rcu_assign_pointer(*tp, t);
}
@@ -283,9 +312,6 @@ static void ip6gre_tunnel_unlink(struct ip6gre_net *ign, struct ip6_tnl *t)
struct ip6_tnl __rcu **tp;
struct ip6_tnl *iter;
- if (t->parms.collect_md)
- rcu_assign_pointer(ign->collect_md_tun, NULL);
-
for (tp = ip6gre_bucket(ign, t);
(iter = rtnl_dereference(*tp)) != NULL;
tp = &iter->next) {
@@ -374,11 +400,23 @@ failed_free:
return NULL;
}
+static void ip6erspan_tunnel_uninit(struct net_device *dev)
+{
+ struct ip6_tnl *t = netdev_priv(dev);
+ struct ip6gre_net *ign = net_generic(t->net, ip6gre_net_id);
+
+ ip6erspan_tunnel_unlink_md(ign, t);
+ ip6gre_tunnel_unlink(ign, t);
+ dst_cache_reset(&t->dst_cache);
+ dev_put(dev);
+}
+
static void ip6gre_tunnel_uninit(struct net_device *dev)
{
struct ip6_tnl *t = netdev_priv(dev);
struct ip6gre_net *ign = net_generic(t->net, ip6gre_net_id);
+ ip6gre_tunnel_unlink_md(ign, t);
ip6gre_tunnel_unlink(ign, t);
dst_cache_reset(&t->dst_cache);
dev_put(dev);
@@ -698,6 +736,9 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb,
else
fl6->daddr = tunnel->parms.raddr;
+ if (skb_cow_head(skb, dev->needed_headroom ?: tunnel->hlen))
+ return -ENOMEM;
+
/* Push GRE header. */
protocol = (dev->type == ARPHRD_ETHER) ? htons(ETH_P_TEB) : proto;
@@ -908,7 +949,7 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb,
truncate = true;
}
- if (skb_cow_head(skb, dev->needed_headroom))
+ if (skb_cow_head(skb, dev->needed_headroom ?: t->hlen))
goto tx_err;
t->parms.o_flags &= ~TUNNEL_KEY;
@@ -979,11 +1020,14 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb,
erspan_build_header(skb, ntohl(t->parms.o_key),
t->parms.index,
truncate, false);
- else
+ else if (t->parms.erspan_ver == 2)
erspan_build_header_v2(skb, ntohl(t->parms.o_key),
t->parms.dir,
t->parms.hwid,
truncate, false);
+ else
+ goto tx_err;
+
fl6.daddr = t->parms.raddr;
}
@@ -1019,12 +1063,11 @@ tx_err:
return NETDEV_TX_OK;
}
-static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu)
+static void ip6gre_tnl_link_config_common(struct ip6_tnl *t)
{
struct net_device *dev = t->dev;
struct __ip6_tnl_parm *p = &t->parms;
struct flowi6 *fl6 = &t->fl.u.ip6;
- int t_hlen;
if (dev->type != ARPHRD_ETHER) {
memcpy(dev->dev_addr, &p->laddr, sizeof(struct in6_addr));
@@ -1051,12 +1094,13 @@ static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu)
dev->flags |= IFF_POINTOPOINT;
else
dev->flags &= ~IFF_POINTOPOINT;
+}
- t->tun_hlen = gre_calc_hlen(t->parms.o_flags);
-
- t->hlen = t->encap_hlen + t->tun_hlen;
-
- t_hlen = t->hlen + sizeof(struct ipv6hdr);
+static void ip6gre_tnl_link_config_route(struct ip6_tnl *t, int set_mtu,
+ int t_hlen)
+{
+ const struct __ip6_tnl_parm *p = &t->parms;
+ struct net_device *dev = t->dev;
if (p->flags & IP6_TNL_F_CAP_XMIT) {
int strict = (ipv6_addr_type(&p->raddr) &
@@ -1088,8 +1132,26 @@ static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu)
}
}
-static int ip6gre_tnl_change(struct ip6_tnl *t,
- const struct __ip6_tnl_parm *p, int set_mtu)
+static int ip6gre_calc_hlen(struct ip6_tnl *tunnel)
+{
+ int t_hlen;
+
+ tunnel->tun_hlen = gre_calc_hlen(tunnel->parms.o_flags);
+ tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen;
+
+ t_hlen = tunnel->hlen + sizeof(struct ipv6hdr);
+ tunnel->dev->hard_header_len = LL_MAX_HEADER + t_hlen;
+ return t_hlen;
+}
+
+static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu)
+{
+ ip6gre_tnl_link_config_common(t);
+ ip6gre_tnl_link_config_route(t, set_mtu, ip6gre_calc_hlen(t));
+}
+
+static void ip6gre_tnl_copy_tnl_parm(struct ip6_tnl *t,
+ const struct __ip6_tnl_parm *p)
{
t->parms.laddr = p->laddr;
t->parms.raddr = p->raddr;
@@ -1105,6 +1167,12 @@ static int ip6gre_tnl_change(struct ip6_tnl *t,
t->parms.o_flags = p->o_flags;
t->parms.fwmark = p->fwmark;
dst_cache_reset(&t->dst_cache);
+}
+
+static int ip6gre_tnl_change(struct ip6_tnl *t, const struct __ip6_tnl_parm *p,
+ int set_mtu)
+{
+ ip6gre_tnl_copy_tnl_parm(t, p);
ip6gre_tnl_link_config(t, set_mtu);
return 0;
}
@@ -1381,11 +1449,7 @@ static int ip6gre_tunnel_init_common(struct net_device *dev)
return ret;
}
- tunnel->tun_hlen = gre_calc_hlen(tunnel->parms.o_flags);
- tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen;
- t_hlen = tunnel->hlen + sizeof(struct ipv6hdr);
-
- dev->hard_header_len = LL_MAX_HEADER + t_hlen;
+ t_hlen = ip6gre_calc_hlen(tunnel);
dev->mtu = ETH_DATA_LEN - t_hlen;
if (dev->type == ARPHRD_ETHER)
dev->mtu -= ETH_HLEN;
@@ -1728,6 +1792,19 @@ static const struct net_device_ops ip6gre_tap_netdev_ops = {
.ndo_get_iflink = ip6_tnl_get_iflink,
};
+static int ip6erspan_calc_hlen(struct ip6_tnl *tunnel)
+{
+ int t_hlen;
+
+ tunnel->tun_hlen = 8;
+ tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen +
+ erspan_hdr_len(tunnel->parms.erspan_ver);
+
+ t_hlen = tunnel->hlen + sizeof(struct ipv6hdr);
+ tunnel->dev->hard_header_len = LL_MAX_HEADER + t_hlen;
+ return t_hlen;
+}
+
static int ip6erspan_tap_init(struct net_device *dev)
{
struct ip6_tnl *tunnel;
@@ -1751,12 +1828,7 @@ static int ip6erspan_tap_init(struct net_device *dev)
return ret;
}
- tunnel->tun_hlen = 8;
- tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen +
- erspan_hdr_len(tunnel->parms.erspan_ver);
- t_hlen = tunnel->hlen + sizeof(struct ipv6hdr);
-
- dev->hard_header_len = LL_MAX_HEADER + t_hlen;
+ t_hlen = ip6erspan_calc_hlen(tunnel);
dev->mtu = ETH_DATA_LEN - t_hlen;
if (dev->type == ARPHRD_ETHER)
dev->mtu -= ETH_HLEN;
@@ -1764,14 +1836,14 @@ static int ip6erspan_tap_init(struct net_device *dev)
dev->mtu -= 8;
dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
- ip6gre_tnl_link_config(tunnel, 1);
+ ip6erspan_tnl_link_config(tunnel, 1);
return 0;
}
static const struct net_device_ops ip6erspan_netdev_ops = {
.ndo_init = ip6erspan_tap_init,
- .ndo_uninit = ip6gre_tunnel_uninit,
+ .ndo_uninit = ip6erspan_tunnel_uninit,
.ndo_start_xmit = ip6erspan_tunnel_xmit,
.ndo_set_mac_address = eth_mac_addr,
.ndo_validate_addr = eth_validate_addr,
@@ -1835,13 +1907,11 @@ static bool ip6gre_netlink_encap_parms(struct nlattr *data[],
return ret;
}
-static int ip6gre_newlink(struct net *src_net, struct net_device *dev,
- struct nlattr *tb[], struct nlattr *data[],
- struct netlink_ext_ack *extack)
+static int ip6gre_newlink_common(struct net *src_net, struct net_device *dev,
+ struct nlattr *tb[], struct nlattr *data[],
+ struct netlink_ext_ack *extack)
{
struct ip6_tnl *nt;
- struct net *net = dev_net(dev);
- struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
struct ip_tunnel_encap ipencap;
int err;
@@ -1854,16 +1924,6 @@ static int ip6gre_newlink(struct net *src_net, struct net_device *dev,
return err;
}
- ip6gre_netlink_parms(data, &nt->parms);
-
- if (nt->parms.collect_md) {
- if (rtnl_dereference(ign->collect_md_tun))
- return -EEXIST;
- } else {
- if (ip6gre_tunnel_find(net, &nt->parms, dev->type))
- return -EEXIST;
- }
-
if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
eth_hw_addr_random(dev);
@@ -1874,51 +1934,94 @@ static int ip6gre_newlink(struct net *src_net, struct net_device *dev,
if (err)
goto out;
- ip6gre_tnl_link_config(nt, !tb[IFLA_MTU]);
-
if (tb[IFLA_MTU])
ip6_tnl_change_mtu(dev, nla_get_u32(tb[IFLA_MTU]));
dev_hold(dev);
- ip6gre_tunnel_link(ign, nt);
out:
return err;
}
-static int ip6gre_changelink(struct net_device *dev, struct nlattr *tb[],
- struct nlattr *data[],
- struct netlink_ext_ack *extack)
+static int ip6gre_newlink(struct net *src_net, struct net_device *dev,
+ struct nlattr *tb[], struct nlattr *data[],
+ struct netlink_ext_ack *extack)
+{
+ struct ip6_tnl *nt = netdev_priv(dev);
+ struct net *net = dev_net(dev);
+ struct ip6gre_net *ign;
+ int err;
+
+ ip6gre_netlink_parms(data, &nt->parms);
+ ign = net_generic(net, ip6gre_net_id);
+
+ if (nt->parms.collect_md) {
+ if (rtnl_dereference(ign->collect_md_tun))
+ return -EEXIST;
+ } else {
+ if (ip6gre_tunnel_find(net, &nt->parms, dev->type))
+ return -EEXIST;
+ }
+
+ err = ip6gre_newlink_common(src_net, dev, tb, data, extack);
+ if (!err) {
+ ip6gre_tnl_link_config(nt, !tb[IFLA_MTU]);
+ ip6gre_tunnel_link_md(ign, nt);
+ ip6gre_tunnel_link(net_generic(net, ip6gre_net_id), nt);
+ }
+ return err;
+}
+
+static struct ip6_tnl *
+ip6gre_changelink_common(struct net_device *dev, struct nlattr *tb[],
+ struct nlattr *data[], struct __ip6_tnl_parm *p_p,
+ struct netlink_ext_ack *extack)
{
struct ip6_tnl *t, *nt = netdev_priv(dev);
struct net *net = nt->net;
struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
- struct __ip6_tnl_parm p;
struct ip_tunnel_encap ipencap;
if (dev == ign->fb_tunnel_dev)
- return -EINVAL;
+ return ERR_PTR(-EINVAL);
if (ip6gre_netlink_encap_parms(data, &ipencap)) {
int err = ip6_tnl_encap_setup(nt, &ipencap);
if (err < 0)
- return err;
+ return ERR_PTR(err);
}
- ip6gre_netlink_parms(data, &p);
+ ip6gre_netlink_parms(data, p_p);
- t = ip6gre_tunnel_locate(net, &p, 0);
+ t = ip6gre_tunnel_locate(net, p_p, 0);
if (t) {
if (t->dev != dev)
- return -EEXIST;
+ return ERR_PTR(-EEXIST);
} else {
t = nt;
}
+ return t;
+}
+
+static int ip6gre_changelink(struct net_device *dev, struct nlattr *tb[],
+ struct nlattr *data[],
+ struct netlink_ext_ack *extack)
+{
+ struct ip6gre_net *ign = net_generic(dev_net(dev), ip6gre_net_id);
+ struct __ip6_tnl_parm p;
+ struct ip6_tnl *t;
+
+ t = ip6gre_changelink_common(dev, tb, data, &p, extack);
+ if (IS_ERR(t))
+ return PTR_ERR(t);
+
+ ip6gre_tunnel_unlink_md(ign, t);
ip6gre_tunnel_unlink(ign, t);
ip6gre_tnl_change(t, &p, !tb[IFLA_MTU]);
+ ip6gre_tunnel_link_md(ign, t);
ip6gre_tunnel_link(ign, t);
return 0;
}
@@ -2068,6 +2171,69 @@ static void ip6erspan_tap_setup(struct net_device *dev)
netif_keep_dst(dev);
}
+static int ip6erspan_newlink(struct net *src_net, struct net_device *dev,
+ struct nlattr *tb[], struct nlattr *data[],
+ struct netlink_ext_ack *extack)
+{
+ struct ip6_tnl *nt = netdev_priv(dev);
+ struct net *net = dev_net(dev);
+ struct ip6gre_net *ign;
+ int err;
+
+ ip6gre_netlink_parms(data, &nt->parms);
+ ign = net_generic(net, ip6gre_net_id);
+
+ if (nt->parms.collect_md) {
+ if (rtnl_dereference(ign->collect_md_tun_erspan))
+ return -EEXIST;
+ } else {
+ if (ip6gre_tunnel_find(net, &nt->parms, dev->type))
+ return -EEXIST;
+ }
+
+ err = ip6gre_newlink_common(src_net, dev, tb, data, extack);
+ if (!err) {
+ ip6erspan_tnl_link_config(nt, !tb[IFLA_MTU]);
+ ip6erspan_tunnel_link_md(ign, nt);
+ ip6gre_tunnel_link(net_generic(net, ip6gre_net_id), nt);
+ }
+ return err;
+}
+
+static void ip6erspan_tnl_link_config(struct ip6_tnl *t, int set_mtu)
+{
+ ip6gre_tnl_link_config_common(t);
+ ip6gre_tnl_link_config_route(t, set_mtu, ip6erspan_calc_hlen(t));
+}
+
+static int ip6erspan_tnl_change(struct ip6_tnl *t,
+ const struct __ip6_tnl_parm *p, int set_mtu)
+{
+ ip6gre_tnl_copy_tnl_parm(t, p);
+ ip6erspan_tnl_link_config(t, set_mtu);
+ return 0;
+}
+
+static int ip6erspan_changelink(struct net_device *dev, struct nlattr *tb[],
+ struct nlattr *data[],
+ struct netlink_ext_ack *extack)
+{
+ struct ip6gre_net *ign = net_generic(dev_net(dev), ip6gre_net_id);
+ struct __ip6_tnl_parm p;
+ struct ip6_tnl *t;
+
+ t = ip6gre_changelink_common(dev, tb, data, &p, extack);
+ if (IS_ERR(t))
+ return PTR_ERR(t);
+
+ ip6gre_tunnel_unlink_md(ign, t);
+ ip6gre_tunnel_unlink(ign, t);
+ ip6erspan_tnl_change(t, &p, !tb[IFLA_MTU]);
+ ip6erspan_tunnel_link_md(ign, t);
+ ip6gre_tunnel_link(ign, t);
+ return 0;
+}
+
static struct rtnl_link_ops ip6gre_link_ops __read_mostly = {
.kind = "ip6gre",
.maxtype = IFLA_GRE_MAX,
@@ -2104,8 +2270,8 @@ static struct rtnl_link_ops ip6erspan_tap_ops __read_mostly = {
.priv_size = sizeof(struct ip6_tnl),
.setup = ip6erspan_tap_setup,
.validate = ip6erspan_tap_validate,
- .newlink = ip6gre_newlink,
- .changelink = ip6gre_changelink,
+ .newlink = ip6erspan_newlink,
+ .changelink = ip6erspan_changelink,
.get_size = ip6gre_get_size,
.fill_info = ip6gre_fill_info,
.get_link_net = ip6_tnl_get_link_net,
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 2e891d2c30ef..7b6d1689087b 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1503,7 +1503,8 @@ alloc_new_skb:
if (copy > length)
copy = length;
- if (!(rt->dst.dev->features&NETIF_F_SG)) {
+ if (!(rt->dst.dev->features&NETIF_F_SG) &&
+ skb_tailroom(skb) >= copy) {
unsigned int off;
off = skb->len;
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index da66aaac51ce..00e138a44cbb 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -1692,8 +1692,13 @@ int ip6_tnl_change_mtu(struct net_device *dev, int new_mtu)
if (new_mtu < ETH_MIN_MTU)
return -EINVAL;
}
- if (new_mtu > 0xFFF8 - dev->hard_header_len)
- return -EINVAL;
+ if (tnl->parms.proto == IPPROTO_IPV6 || tnl->parms.proto == 0) {
+ if (new_mtu > IP6_MAX_MTU - dev->hard_header_len)
+ return -EINVAL;
+ } else {
+ if (new_mtu > IP_MAX_MTU - dev->hard_header_len)
+ return -EINVAL;
+ }
dev->mtu = new_mtu;
return 0;
}
@@ -1841,7 +1846,7 @@ ip6_tnl_dev_init_gen(struct net_device *dev)
if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
dev->mtu -= 8;
dev->min_mtu = ETH_MIN_MTU;
- dev->max_mtu = 0xFFF8 - dev->hard_header_len;
+ dev->max_mtu = IP6_MAX_MTU - dev->hard_header_len;
return 0;
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index c214ffec02f0..ca957dd93a29 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -669,7 +669,7 @@ static void vti6_link_config(struct ip6_tnl *t, bool keep_mtu)
else
mtu = ETH_DATA_LEN - LL_MAX_HEADER - sizeof(struct ipv6hdr);
- dev->mtu = max_t(int, mtu, IPV6_MIN_MTU);
+ dev->mtu = max_t(int, mtu, IPV4_MIN_MTU);
}
/**
@@ -881,7 +881,7 @@ static void vti6_dev_setup(struct net_device *dev)
dev->priv_destructor = vti6_dev_free;
dev->type = ARPHRD_TUNNEL6;
- dev->min_mtu = IPV6_MIN_MTU;
+ dev->min_mtu = IPV4_MIN_MTU;
dev->max_mtu = IP_MAX_MTU - sizeof(struct ipv6hdr);
dev->flags |= IFF_NOARP;
dev->addr_len = sizeof(struct in6_addr);
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 298fd8b6ed17..4a15529d33eb 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -439,19 +439,6 @@ static const struct seq_operations ip6mr_vif_seq_ops = {
.show = ip6mr_vif_seq_show,
};
-static int ip6mr_vif_open(struct inode *inode, struct file *file)
-{
- return seq_open_net(inode, file, &ip6mr_vif_seq_ops,
- sizeof(struct mr_vif_iter));
-}
-
-static const struct file_operations ip6mr_vif_fops = {
- .open = ip6mr_vif_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release_net,
-};
-
static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
{
struct net *net = seq_file_net(seq);
@@ -512,19 +499,6 @@ static const struct seq_operations ipmr_mfc_seq_ops = {
.stop = mr_mfc_seq_stop,
.show = ipmr_mfc_seq_show,
};
-
-static int ipmr_mfc_open(struct inode *inode, struct file *file)
-{
- return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
- sizeof(struct mr_mfc_iter));
-}
-
-static const struct file_operations ip6mr_mfc_fops = {
- .open = ipmr_mfc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release_net,
-};
#endif
#ifdef CONFIG_IPV6_PIMSM_V2
@@ -1316,9 +1290,11 @@ static int __net_init ip6mr_net_init(struct net *net)
#ifdef CONFIG_PROC_FS
err = -ENOMEM;
- if (!proc_create("ip6_mr_vif", 0, net->proc_net, &ip6mr_vif_fops))
+ if (!proc_create_net("ip6_mr_vif", 0, net->proc_net, &ip6mr_vif_seq_ops,
+ sizeof(struct mr_vif_iter)))
goto proc_vif_fail;
- if (!proc_create("ip6_mr_cache", 0, net->proc_net, &ip6mr_mfc_fops))
+ if (!proc_create_net("ip6_mr_cache", 0, net->proc_net, &ipmr_mfc_seq_ops,
+ sizeof(struct mr_mfc_iter)))
goto proc_cache_fail;
#endif
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 793159d77d8a..975021df7c1c 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -2749,19 +2749,6 @@ static const struct seq_operations igmp6_mc_seq_ops = {
.show = igmp6_mc_seq_show,
};
-static int igmp6_mc_seq_open(struct inode *inode, struct file *file)
-{
- return seq_open_net(inode, file, &igmp6_mc_seq_ops,
- sizeof(struct igmp6_mc_iter_state));
-}
-
-static const struct file_operations igmp6_mc_seq_fops = {
- .open = igmp6_mc_seq_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release_net,
-};
-
struct igmp6_mcf_iter_state {
struct seq_net_private p;
struct net_device *dev;
@@ -2903,28 +2890,17 @@ static const struct seq_operations igmp6_mcf_seq_ops = {
.show = igmp6_mcf_seq_show,
};
-static int igmp6_mcf_seq_open(struct inode *inode, struct file *file)
-{
- return seq_open_net(inode, file, &igmp6_mcf_seq_ops,
- sizeof(struct igmp6_mcf_iter_state));
-}
-
-static const struct file_operations igmp6_mcf_seq_fops = {
- .open = igmp6_mcf_seq_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release_net,
-};
-
static int __net_init igmp6_proc_init(struct net *net)
{
int err;
err = -ENOMEM;
- if (!proc_create("igmp6", 0444, net->proc_net, &igmp6_mc_seq_fops))
+ if (!proc_create_net("igmp6", 0444, net->proc_net, &igmp6_mc_seq_ops,
+ sizeof(struct igmp6_mc_iter_state)))
goto out;
- if (!proc_create("mcfilter6", 0444, net->proc_net,
- &igmp6_mcf_seq_fops))
+ if (!proc_create_net("mcfilter6", 0444, net->proc_net,
+ &igmp6_mcf_seq_ops,
+ sizeof(struct igmp6_mcf_iter_state)))
goto out_proc_net_igmp6;
err = 0;
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 65c9e1a58305..97f79dc943d7 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -38,6 +38,7 @@
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
MODULE_DESCRIPTION("IPv6 packet filter");
+MODULE_ALIAS("ip6t_icmp6");
void *ip6t_alloc_initial_table(const struct xt_table *info)
{
diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c
index 746eeae7f581..96f56bf49a30 100644
--- a/net/ipv6/ping.c
+++ b/net/ipv6/ping.c
@@ -24,6 +24,7 @@
#include <net/protocol.h>
#include <net/udp.h>
#include <net/transp_v6.h>
+#include <linux/proc_fs.h>
#include <net/ping.h>
/* Compatibility glue so we can support IPv6 when it's compiled as a module */
@@ -215,26 +216,24 @@ static int ping_v6_seq_show(struct seq_file *seq, void *v)
return 0;
}
-static struct ping_seq_afinfo ping_v6_seq_afinfo = {
- .name = "icmp6",
- .family = AF_INET6,
- .seq_fops = &ping_seq_fops,
- .seq_ops = {
- .start = ping_v6_seq_start,
- .show = ping_v6_seq_show,
- .next = ping_seq_next,
- .stop = ping_seq_stop,
- },
+static const struct seq_operations ping_v6_seq_ops = {
+ .start = ping_v6_seq_start,
+ .show = ping_v6_seq_show,
+ .next = ping_seq_next,
+ .stop = ping_seq_stop,
};
static int __net_init ping_v6_proc_init_net(struct net *net)
{
- return ping_proc_register(net, &ping_v6_seq_afinfo);
+ if (!proc_create_net("icmp6", 0444, net->proc_net, &ping_v6_seq_ops,
+ sizeof(struct ping_iter_state)))
+ return -ENOMEM;
+ return 0;
}
static void __net_init ping_v6_proc_exit_net(struct net *net)
{
- return ping_proc_unregister(net, &ping_v6_seq_afinfo);
+ remove_proc_entry("icmp6", net->proc_net);
}
static struct pernet_operations ping_v6_net_ops = {
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index a85f7e0b14b1..2356b4af7309 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -53,18 +53,6 @@ static int sockstat6_seq_show(struct seq_file *seq, void *v)
return 0;
}
-static int sockstat6_seq_open(struct inode *inode, struct file *file)
-{
- return single_open_net(inode, file, sockstat6_seq_show);
-}
-
-static const struct file_operations sockstat6_seq_fops = {
- .open = sockstat6_seq_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release_net,
-};
-
static const struct snmp_mib snmp6_ipstats_list[] = {
/* ipv6 mib according to RFC 2465 */
SNMP_MIB_ITEM("Ip6InReceives", IPSTATS_MIB_INPKTS),
@@ -242,18 +230,6 @@ static int snmp6_seq_show(struct seq_file *seq, void *v)
return 0;
}
-static int snmp6_seq_open(struct inode *inode, struct file *file)
-{
- return single_open_net(inode, file, snmp6_seq_show);
-}
-
-static const struct file_operations snmp6_seq_fops = {
- .open = snmp6_seq_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release_net,
-};
-
static int snmp6_dev_seq_show(struct seq_file *seq, void *v)
{
struct inet6_dev *idev = (struct inet6_dev *)seq->private;
@@ -267,18 +243,6 @@ static int snmp6_dev_seq_show(struct seq_file *seq, void *v)
return 0;
}
-static int snmp6_dev_seq_open(struct inode *inode, struct file *file)
-{
- return single_open(file, snmp6_dev_seq_show, PDE_DATA(inode));
-}
-
-static const struct file_operations snmp6_dev_seq_fops = {
- .open = snmp6_dev_seq_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
int snmp6_register_dev(struct inet6_dev *idev)
{
struct proc_dir_entry *p;
@@ -291,9 +255,8 @@ int snmp6_register_dev(struct inet6_dev *idev)
if (!net->mib.proc_net_devsnmp6)
return -ENOENT;
- p = proc_create_data(idev->dev->name, 0444,
- net->mib.proc_net_devsnmp6,
- &snmp6_dev_seq_fops, idev);
+ p = proc_create_single_data(idev->dev->name, 0444,
+ net->mib.proc_net_devsnmp6, snmp6_dev_seq_show, idev);
if (!p)
return -ENOMEM;
@@ -315,11 +278,12 @@ int snmp6_unregister_dev(struct inet6_dev *idev)
static int __net_init ipv6_proc_init_net(struct net *net)
{
- if (!proc_create("sockstat6", 0444, net->proc_net,
- &sockstat6_seq_fops))
+ if (!proc_create_net_single("sockstat6", 0444, net->proc_net,
+ sockstat6_seq_show, NULL))
return -ENOMEM;
- if (!proc_create("snmp6", 0444, net->proc_net, &snmp6_seq_fops))
+ if (!proc_create_net_single("snmp6", 0444, net->proc_net,
+ snmp6_seq_show, NULL))
goto proc_snmp6_fail;
net->mib.proc_net_devsnmp6 = proc_mkdir("dev_snmp6", net->proc_net);
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 5eb9b08947ed..ce6f0d15b5dd 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -1304,21 +1304,10 @@ static const struct seq_operations raw6_seq_ops = {
.show = raw6_seq_show,
};
-static int raw6_seq_open(struct inode *inode, struct file *file)
-{
- return raw_seq_open(inode, file, &raw_v6_hashinfo, &raw6_seq_ops);
-}
-
-static const struct file_operations raw6_seq_fops = {
- .open = raw6_seq_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release_net,
-};
-
static int __net_init raw6_init_net(struct net *net)
{
- if (!proc_create("raw6", 0444, net->proc_net, &raw6_seq_fops))
+ if (!proc_create_net_data("raw6", 0444, net->proc_net, &raw6_seq_ops,
+ sizeof(struct raw_iter_state), &raw_v6_hashinfo))
return -ENOMEM;
return 0;
@@ -1345,7 +1334,7 @@ void raw6_proc_exit(void)
}
#endif /* CONFIG_PROC_FS */
-/* Same as inet6_dgram_ops, sans udp_poll. */
+/* Same as inet6_dgram_ops, sans udp_poll_mask. */
const struct proto_ops inet6_sockraw_ops = {
.family = PF_INET6,
.owner = THIS_MODULE,
@@ -1355,7 +1344,7 @@ const struct proto_ops inet6_sockraw_ops = {
.socketpair = sock_no_socketpair, /* a do nothing */
.accept = sock_no_accept, /* a do nothing */
.getname = inet6_getname,
- .poll = datagram_poll, /* ok */
+ .poll_mask = datagram_poll_mask, /* ok */
.ioctl = inet6_ioctl, /* must change */
.listen = sock_no_listen, /* ok */
.shutdown = inet_shutdown, /* ok */
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index cde7d8251377..a6598762d2c1 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1835,11 +1835,16 @@ static void ip6_multipath_l3_keys(const struct sk_buff *skb,
const struct ipv6hdr *inner_iph;
const struct icmp6hdr *icmph;
struct ipv6hdr _inner_iph;
+ struct icmp6hdr _icmph;
if (likely(outer_iph->nexthdr != IPPROTO_ICMPV6))
goto out;
- icmph = icmp6_hdr(skb);
+ icmph = skb_header_pointer(skb, skb_transport_offset(skb),
+ sizeof(_icmph), &_icmph);
+ if (!icmph)
+ goto out;
+
if (icmph->icmp6_type != ICMPV6_DEST_UNREACH &&
icmph->icmp6_type != ICMPV6_PKT_TOOBIG &&
icmph->icmp6_type != ICMPV6_TIME_EXCEED &&
@@ -4857,14 +4862,6 @@ static int ip6_route_dev_notify(struct notifier_block *this,
*/
#ifdef CONFIG_PROC_FS
-
-static const struct file_operations ipv6_route_proc_fops = {
- .open = ipv6_route_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release_net,
-};
-
static int rt6_stats_seq_show(struct seq_file *seq, void *v)
{
struct net *net = (struct net *)seq->private;
@@ -4879,18 +4876,6 @@ static int rt6_stats_seq_show(struct seq_file *seq, void *v)
return 0;
}
-
-static int rt6_stats_seq_open(struct inode *inode, struct file *file)
-{
- return single_open_net(inode, file, rt6_stats_seq_show);
-}
-
-static const struct file_operations rt6_stats_seq_fops = {
- .open = rt6_stats_seq_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release_net,
-};
#endif /* CONFIG_PROC_FS */
#ifdef CONFIG_SYSCTL
@@ -5095,8 +5080,10 @@ static void __net_exit ip6_route_net_exit(struct net *net)
static int __net_init ip6_route_net_init_late(struct net *net)
{
#ifdef CONFIG_PROC_FS
- proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
- proc_create("rt6_stats", 0444, net->proc_net, &rt6_stats_seq_fops);
+ proc_create_net("ipv6_route", 0, net->proc_net, &ipv6_route_seq_ops,
+ sizeof(struct ipv6_route_iter));
+ proc_create_net_single("rt6_stats", 0444, net->proc_net,
+ rt6_stats_seq_show, NULL);
#endif
return 0;
}
diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c
index 5fe139484919..bf4763fd68c2 100644
--- a/net/ipv6/seg6_iptunnel.c
+++ b/net/ipv6/seg6_iptunnel.c
@@ -103,7 +103,7 @@ int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto)
hdrlen = (osrh->hdrlen + 1) << 3;
tot_len = hdrlen + sizeof(*hdr);
- err = skb_cow_head(skb, tot_len);
+ err = skb_cow_head(skb, tot_len + skb->mac_len);
if (unlikely(err))
return err;
@@ -161,7 +161,7 @@ int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh)
hdrlen = (osrh->hdrlen + 1) << 3;
- err = skb_cow_head(skb, hdrlen);
+ err = skb_cow_head(skb, hdrlen + skb->mac_len);
if (unlikely(err))
return err;
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 2afce37a7177..e9400ffa7875 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -1371,7 +1371,7 @@ static void ipip6_tunnel_setup(struct net_device *dev)
dev->hard_header_len = LL_MAX_HEADER + t_hlen;
dev->mtu = ETH_DATA_LEN - t_hlen;
dev->min_mtu = IPV6_MIN_MTU;
- dev->max_mtu = 0xFFF8 - t_hlen;
+ dev->max_mtu = IP6_MAX_MTU - t_hlen;
dev->flags = IFF_NOARP;
netif_keep_dst(dev);
dev->addr_len = 4;
@@ -1583,7 +1583,8 @@ static int ipip6_newlink(struct net *src_net, struct net_device *dev,
if (tb[IFLA_MTU]) {
u32 mtu = nla_get_u32(tb[IFLA_MTU]);
- if (mtu >= IPV6_MIN_MTU && mtu <= 0xFFF8 - dev->hard_header_len)
+ if (mtu >= IPV6_MIN_MTU &&
+ mtu <= IP6_MAX_MTU - dev->hard_header_len)
dev->mtu = mtu;
}
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 6d664d83cd16..d2ce66b23430 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1909,30 +1909,28 @@ out:
return 0;
}
-static const struct file_operations tcp6_afinfo_seq_fops = {
- .open = tcp_seq_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release_net
+static const struct seq_operations tcp6_seq_ops = {
+ .show = tcp6_seq_show,
+ .start = tcp_seq_start,
+ .next = tcp_seq_next,
+ .stop = tcp_seq_stop,
};
static struct tcp_seq_afinfo tcp6_seq_afinfo = {
- .name = "tcp6",
.family = AF_INET6,
- .seq_fops = &tcp6_afinfo_seq_fops,
- .seq_ops = {
- .show = tcp6_seq_show,
- },
};
int __net_init tcp6_proc_init(struct net *net)
{
- return tcp_proc_register(net, &tcp6_seq_afinfo);
+ if (!proc_create_net_data("tcp6", 0444, net->proc_net, &tcp6_seq_ops,
+ sizeof(struct tcp_iter_state), &tcp6_seq_afinfo))
+ return -ENOMEM;
+ return 0;
}
void tcp6_proc_exit(struct net *net)
{
- tcp_proc_unregister(net, &tcp6_seq_afinfo);
+ remove_proc_entry("tcp6", net->proc_net);
}
#endif
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 4ec76a87aeb8..00e2112da26d 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -148,9 +148,9 @@ static int compute_score(struct sock *sk, struct net *net,
bool dev_match = (sk->sk_bound_dev_if == dif ||
sk->sk_bound_dev_if == sdif);
- if (exact_dif && !dev_match)
+ if (!dev_match)
return -1;
- if (sk->sk_bound_dev_if && dev_match)
+ if (sk->sk_bound_dev_if)
score++;
}
@@ -1480,31 +1480,30 @@ int udp6_seq_show(struct seq_file *seq, void *v)
return 0;
}
-static const struct file_operations udp6_afinfo_seq_fops = {
- .open = udp_seq_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release_net
+const struct seq_operations udp6_seq_ops = {
+ .start = udp_seq_start,
+ .next = udp_seq_next,
+ .stop = udp_seq_stop,
+ .show = udp6_seq_show,
};
+EXPORT_SYMBOL(udp6_seq_ops);
static struct udp_seq_afinfo udp6_seq_afinfo = {
- .name = "udp6",
.family = AF_INET6,
.udp_table = &udp_table,
- .seq_fops = &udp6_afinfo_seq_fops,
- .seq_ops = {
- .show = udp6_seq_show,
- },
};
int __net_init udp6_proc_init(struct net *net)
{
- return udp_proc_register(net, &udp6_seq_afinfo);
+ if (!proc_create_net_data("udp6", 0444, net->proc_net, &udp6_seq_ops,
+ sizeof(struct udp_iter_state), &udp6_seq_afinfo))
+ return -ENOMEM;
+ return 0;
}
void udp6_proc_exit(struct net *net)
{
- udp_proc_unregister(net, &udp6_seq_afinfo);
+ remove_proc_entry("udp6", net->proc_net);
}
#endif /* CONFIG_PROC_FS */
diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c
index 14ae32bb1f3d..5000ad6878e6 100644
--- a/net/ipv6/udplite.c
+++ b/net/ipv6/udplite.c
@@ -12,6 +12,7 @@
* 2 of the License, or (at your option) any later version.
*/
#include <linux/export.h>
+#include <linux/proc_fs.h>
#include "udp_impl.h"
static int udplitev6_rcv(struct sk_buff *skb)
@@ -92,32 +93,23 @@ void udplitev6_exit(void)
}
#ifdef CONFIG_PROC_FS
-
-static const struct file_operations udplite6_afinfo_seq_fops = {
- .open = udp_seq_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release_net
-};
-
static struct udp_seq_afinfo udplite6_seq_afinfo = {
- .name = "udplite6",
.family = AF_INET6,
.udp_table = &udplite_table,
- .seq_fops = &udplite6_afinfo_seq_fops,
- .seq_ops = {
- .show = udp6_seq_show,
- },
};
static int __net_init udplite6_proc_init_net(struct net *net)
{
- return udp_proc_register(net, &udplite6_seq_afinfo);
+ if (!proc_create_net_data("udplite6", 0444, net->proc_net,
+ &udp6_seq_ops, sizeof(struct udp_iter_state),
+ &udplite6_seq_afinfo))
+ return -ENOMEM;
+ return 0;
}
static void __net_exit udplite6_proc_exit_net(struct net *net)
{
- udp_proc_unregister(net, &udplite6_seq_afinfo);
+ remove_proc_entry("udplite6", net->proc_net);
}
static struct pernet_operations udplite6_net_ops = {
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 416fe67271a9..86dba282a147 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -126,7 +126,7 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse)
struct flowi6 *fl6 = &fl->u.ip6;
int onlyproto = 0;
const struct ipv6hdr *hdr = ipv6_hdr(skb);
- u16 offset = sizeof(*hdr);
+ u32 offset = sizeof(*hdr);
struct ipv6_opt_hdr *exthdr;
const unsigned char *nh = skb_network_header(skb);
u16 nhoff = IP6CB(skb)->nhoff;
diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c
index f85f0d7480ac..4a46df8441c9 100644
--- a/net/ipv6/xfrm6_tunnel.c
+++ b/net/ipv6/xfrm6_tunnel.c
@@ -341,6 +341,9 @@ static void __net_exit xfrm6_tunnel_net_exit(struct net *net)
struct xfrm6_tunnel_net *xfrm6_tn = xfrm6_tunnel_pernet(net);
unsigned int i;
+ xfrm_state_flush(net, IPSEC_PROTO_ANY, false);
+ xfrm_flush_gc();
+
for (i = 0; i < XFRM6_TUNNEL_SPI_BYADDR_HSIZE; i++)
WARN_ON_ONCE(!hlist_empty(&xfrm6_tn->spi_byaddr[i]));
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index 893a022f9620..68e86257a549 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -1488,14 +1488,11 @@ static inline __poll_t iucv_accept_poll(struct sock *parent)
return 0;
}
-__poll_t iucv_sock_poll(struct file *file, struct socket *sock,
- poll_table *wait)
+static __poll_t iucv_sock_poll_mask(struct socket *sock, __poll_t events)
{
struct sock *sk = sock->sk;
__poll_t mask = 0;
- sock_poll_wait(file, sk_sleep(sk), wait);
-
if (sk->sk_state == IUCV_LISTEN)
return iucv_accept_poll(sk);
@@ -2388,7 +2385,7 @@ static const struct proto_ops iucv_sock_ops = {
.getname = iucv_sock_getname,
.sendmsg = iucv_sock_sendmsg,
.recvmsg = iucv_sock_recvmsg,
- .poll = iucv_sock_poll,
+ .poll_mask = iucv_sock_poll_mask,
.ioctl = sock_no_ioctl,
.mmap = sock_no_mmap,
.socketpair = sock_no_socketpair,
diff --git a/net/kcm/kcmproc.c b/net/kcm/kcmproc.c
index 1fac92543094..370da2f80e3c 100644
--- a/net/kcm/kcmproc.c
+++ b/net/kcm/kcmproc.c
@@ -15,12 +15,6 @@
#include <net/tcp.h>
#ifdef CONFIG_PROC_FS
-struct kcm_seq_muxinfo {
- char *name;
- const struct file_operations *seq_fops;
- const struct seq_operations seq_ops;
-};
-
static struct kcm_mux *kcm_get_first(struct seq_file *seq)
{
struct net *net = seq_file_net(seq);
@@ -86,14 +80,6 @@ struct kcm_proc_mux_state {
int idx;
};
-static int kcm_seq_open(struct inode *inode, struct file *file)
-{
- struct kcm_seq_muxinfo *muxinfo = PDE_DATA(inode);
-
- return seq_open_net(inode, file, &muxinfo->seq_ops,
- sizeof(struct kcm_proc_mux_state));
-}
-
static void kcm_format_mux_header(struct seq_file *seq)
{
struct net *net = seq_file_net(seq);
@@ -246,44 +232,13 @@ static int kcm_seq_show(struct seq_file *seq, void *v)
return 0;
}
-static const struct file_operations kcm_seq_fops = {
- .open = kcm_seq_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release_net,
+static const struct seq_operations kcm_seq_ops = {
+ .show = kcm_seq_show,
+ .start = kcm_seq_start,
+ .next = kcm_seq_next,
+ .stop = kcm_seq_stop,
};
-static struct kcm_seq_muxinfo kcm_seq_muxinfo = {
- .name = "kcm",
- .seq_fops = &kcm_seq_fops,
- .seq_ops = {
- .show = kcm_seq_show,
- .start = kcm_seq_start,
- .next = kcm_seq_next,
- .stop = kcm_seq_stop,
- }
-};
-
-static int kcm_proc_register(struct net *net, struct kcm_seq_muxinfo *muxinfo)
-{
- struct proc_dir_entry *p;
- int rc = 0;
-
- p = proc_create_data(muxinfo->name, 0444, net->proc_net,
- muxinfo->seq_fops, muxinfo);
- if (!p)
- rc = -ENOMEM;
- return rc;
-}
-EXPORT_SYMBOL(kcm_proc_register);
-
-static void kcm_proc_unregister(struct net *net,
- struct kcm_seq_muxinfo *muxinfo)
-{
- remove_proc_entry(muxinfo->name, net->proc_net);
-}
-EXPORT_SYMBOL(kcm_proc_unregister);
-
static int kcm_stats_seq_show(struct seq_file *seq, void *v)
{
struct kcm_psock_stats psock_stats;
@@ -390,30 +345,14 @@ static int kcm_stats_seq_show(struct seq_file *seq, void *v)
return 0;
}
-static int kcm_stats_seq_open(struct inode *inode, struct file *file)
-{
- return single_open_net(inode, file, kcm_stats_seq_show);
-}
-
-static const struct file_operations kcm_stats_seq_fops = {
- .open = kcm_stats_seq_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release_net,
-};
-
static int kcm_proc_init_net(struct net *net)
{
- int err;
-
- if (!proc_create("kcm_stats", 0444, net->proc_net,
- &kcm_stats_seq_fops)) {
- err = -ENOMEM;
+ if (!proc_create_net_single("kcm_stats", 0444, net->proc_net,
+ kcm_stats_seq_show, NULL))
goto out_kcm_stats;
- }
- err = kcm_proc_register(net, &kcm_seq_muxinfo);
- if (err)
+ if (!proc_create_net("kcm", 0444, net->proc_net, &kcm_seq_ops,
+ sizeof(struct kcm_proc_mux_state)))
goto out_kcm;
return 0;
@@ -421,12 +360,12 @@ static int kcm_proc_init_net(struct net *net)
out_kcm:
remove_proc_entry("kcm_stats", net->proc_net);
out_kcm_stats:
- return err;
+ return -ENOMEM;
}
static void kcm_proc_exit_net(struct net *net)
{
- kcm_proc_unregister(net, &kcm_seq_muxinfo);
+ remove_proc_entry("kcm", net->proc_net);
remove_proc_entry("kcm_stats", net->proc_net);
}
diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c
index dc76bc346829..84b7d5c6fec8 100644
--- a/net/kcm/kcmsock.c
+++ b/net/kcm/kcmsock.c
@@ -1336,9 +1336,9 @@ static void init_kcm_sock(struct kcm_sock *kcm, struct kcm_mux *mux)
struct list_head *head;
int index = 0;
- /* For SOCK_SEQPACKET sock type, datagram_poll checks the sk_state, so
- * we set sk_state, otherwise epoll_wait always returns right away with
- * EPOLLHUP
+ /* For SOCK_SEQPACKET sock type, datagram_poll_mask checks the sk_state,
+ * so we set sk_state, otherwise epoll_wait always returns right away
+ * with EPOLLHUP
*/
kcm->sk.sk_state = TCP_ESTABLISHED;
@@ -1671,7 +1671,7 @@ static struct file *kcm_clone(struct socket *osock)
__module_get(newsock->ops->owner);
newsk = sk_alloc(sock_net(osock->sk), PF_KCM, GFP_KERNEL,
- &kcm_proto, true);
+ &kcm_proto, false);
if (!newsk) {
sock_release(newsock);
return ERR_PTR(-ENOMEM);
@@ -1903,7 +1903,7 @@ static const struct proto_ops kcm_dgram_ops = {
.socketpair = sock_no_socketpair,
.accept = sock_no_accept,
.getname = sock_no_getname,
- .poll = datagram_poll,
+ .poll_mask = datagram_poll_mask,
.ioctl = kcm_ioctl,
.listen = sock_no_listen,
.shutdown = sock_no_shutdown,
@@ -1924,7 +1924,7 @@ static const struct proto_ops kcm_seqpacket_ops = {
.socketpair = sock_no_socketpair,
.accept = sock_no_accept,
.getname = sock_no_getname,
- .poll = datagram_poll,
+ .poll_mask = datagram_poll_mask,
.ioctl = kcm_ioctl,
.listen = sock_no_listen,
.shutdown = sock_no_shutdown,
diff --git a/net/key/af_key.c b/net/key/af_key.c
index 7e2e7188e7f4..8bdc1cbe490a 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -437,6 +437,24 @@ static int verify_address_len(const void *p)
return 0;
}
+static inline int sadb_key_len(const struct sadb_key *key)
+{
+ int key_bytes = DIV_ROUND_UP(key->sadb_key_bits, 8);
+
+ return DIV_ROUND_UP(sizeof(struct sadb_key) + key_bytes,
+ sizeof(uint64_t));
+}
+
+static int verify_key_len(const void *p)
+{
+ const struct sadb_key *key = p;
+
+ if (sadb_key_len(key) > key->sadb_key_len)
+ return -EINVAL;
+
+ return 0;
+}
+
static inline int pfkey_sec_ctx_len(const struct sadb_x_sec_ctx *sec_ctx)
{
return DIV_ROUND_UP(sizeof(struct sadb_x_sec_ctx) +
@@ -533,16 +551,25 @@ static int parse_exthdrs(struct sk_buff *skb, const struct sadb_msg *hdr, void *
return -EINVAL;
if (ext_hdrs[ext_type-1] != NULL)
return -EINVAL;
- if (ext_type == SADB_EXT_ADDRESS_SRC ||
- ext_type == SADB_EXT_ADDRESS_DST ||
- ext_type == SADB_EXT_ADDRESS_PROXY ||
- ext_type == SADB_X_EXT_NAT_T_OA) {
+ switch (ext_type) {
+ case SADB_EXT_ADDRESS_SRC:
+ case SADB_EXT_ADDRESS_DST:
+ case SADB_EXT_ADDRESS_PROXY:
+ case SADB_X_EXT_NAT_T_OA:
if (verify_address_len(p))
return -EINVAL;
- }
- if (ext_type == SADB_X_EXT_SEC_CTX) {
+ break;
+ case SADB_X_EXT_SEC_CTX:
if (verify_sec_ctx_len(p))
return -EINVAL;
+ break;
+ case SADB_EXT_KEY_AUTH:
+ case SADB_EXT_KEY_ENCRYPT:
+ if (verify_key_len(p))
+ return -EINVAL;
+ break;
+ default:
+ break;
}
ext_hdrs[ext_type-1] = (void *) p;
}
@@ -1104,14 +1131,12 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net,
key = ext_hdrs[SADB_EXT_KEY_AUTH - 1];
if (key != NULL &&
sa->sadb_sa_auth != SADB_X_AALG_NULL &&
- ((key->sadb_key_bits+7) / 8 == 0 ||
- (key->sadb_key_bits+7) / 8 > key->sadb_key_len * sizeof(uint64_t)))
+ key->sadb_key_bits == 0)
return ERR_PTR(-EINVAL);
key = ext_hdrs[SADB_EXT_KEY_ENCRYPT-1];
if (key != NULL &&
sa->sadb_sa_encrypt != SADB_EALG_NULL &&
- ((key->sadb_key_bits+7) / 8 == 0 ||
- (key->sadb_key_bits+7) / 8 > key->sadb_key_len * sizeof(uint64_t)))
+ key->sadb_key_bits == 0)
return ERR_PTR(-EINVAL);
x = xfrm_state_alloc(net);
@@ -3726,7 +3751,7 @@ static const struct proto_ops pfkey_ops = {
/* Now the operations that really occur. */
.release = pfkey_release,
- .poll = datagram_poll,
+ .poll_mask = datagram_poll_mask,
.sendmsg = pfkey_sendmsg,
.recvmsg = pfkey_recvmsg,
};
@@ -3787,24 +3812,12 @@ static const struct seq_operations pfkey_seq_ops = {
.show = pfkey_seq_show,
};
-static int pfkey_seq_open(struct inode *inode, struct file *file)
-{
- return seq_open_net(inode, file, &pfkey_seq_ops,
- sizeof(struct seq_net_private));
-}
-
-static const struct file_operations pfkey_proc_ops = {
- .open = pfkey_seq_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release_net,
-};
-
static int __net_init pfkey_init_proc(struct net *net)
{
struct proc_dir_entry *e;
- e = proc_create("pfkey", 0, net->proc_net, &pfkey_proc_ops);
+ e = proc_create_net("pfkey", 0, net->proc_net, &pfkey_seq_ops,
+ sizeof(struct seq_net_private));
if (e == NULL)
return -ENOMEM;
diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
index a9c05b2bc1b0..181073bf6925 100644
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c
@@ -613,7 +613,7 @@ static const struct proto_ops l2tp_ip_ops = {
.socketpair = sock_no_socketpair,
.accept = sock_no_accept,
.getname = l2tp_ip_getname,
- .poll = datagram_poll,
+ .poll_mask = datagram_poll_mask,
.ioctl = inet_ioctl,
.listen = sock_no_listen,
.shutdown = inet_shutdown,
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index 957369192ca1..336e4c00abbc 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -754,7 +754,7 @@ static const struct proto_ops l2tp_ip6_ops = {
.socketpair = sock_no_socketpair,
.accept = sock_no_accept,
.getname = l2tp_ip6_getname,
- .poll = datagram_poll,
+ .poll_mask = datagram_poll_mask,
.ioctl = inet6_ioctl,
.listen = sock_no_listen,
.shutdown = inet_shutdown,
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index 1fd9e145076a..3d8ca1231f8f 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -1742,24 +1742,6 @@ static const struct seq_operations pppol2tp_seq_ops = {
.stop = pppol2tp_seq_stop,
.show = pppol2tp_seq_show,
};
-
-/* Called when our /proc file is opened. We allocate data for use when
- * iterating our tunnel / session contexts and store it in the private
- * data of the seq_file.
- */
-static int pppol2tp_proc_open(struct inode *inode, struct file *file)
-{
- return seq_open_net(inode, file, &pppol2tp_seq_ops,
- sizeof(struct pppol2tp_seq_data));
-}
-
-static const struct file_operations pppol2tp_proc_fops = {
- .open = pppol2tp_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release_net,
-};
-
#endif /* CONFIG_PROC_FS */
/*****************************************************************************
@@ -1771,8 +1753,8 @@ static __net_init int pppol2tp_init_net(struct net *net)
struct proc_dir_entry *pde;
int err = 0;
- pde = proc_create("pppol2tp", 0444, net->proc_net,
- &pppol2tp_proc_fops);
+ pde = proc_create_net("pppol2tp", 0444, net->proc_net,
+ &pppol2tp_seq_ops, sizeof(struct pppol2tp_seq_data));
if (!pde) {
err = -ENOMEM;
goto out;
@@ -1806,7 +1788,7 @@ static const struct proto_ops pppol2tp_ops = {
.socketpair = sock_no_socketpair,
.accept = sock_no_accept,
.getname = pppol2tp_getname,
- .poll = datagram_poll,
+ .poll_mask = datagram_poll_mask,
.listen = sock_no_listen,
.shutdown = sock_no_shutdown,
.setsockopt = pppol2tp_setsockopt,
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index cb80ebb38311..804de8490186 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -930,6 +930,9 @@ static int llc_ui_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
if (size > llc->dev->mtu)
size = llc->dev->mtu;
copied = size - hdrlen;
+ rc = -EINVAL;
+ if (copied < 0)
+ goto release;
release_sock(sk);
skb = sock_alloc_send_skb(sk, size, noblock, &rc);
lock_sock(sk);
@@ -1189,7 +1192,7 @@ static const struct proto_ops llc_ui_ops = {
.socketpair = sock_no_socketpair,
.accept = llc_ui_accept,
.getname = llc_ui_getname,
- .poll = datagram_poll,
+ .poll_mask = datagram_poll_mask,
.ioctl = llc_ui_ioctl,
.listen = llc_ui_listen,
.shutdown = llc_ui_shutdown,
diff --git a/net/llc/llc_proc.c b/net/llc/llc_proc.c
index 62ea0aed94b4..f3a36c16a5e7 100644
--- a/net/llc/llc_proc.c
+++ b/net/llc/llc_proc.c
@@ -214,30 +214,6 @@ static const struct seq_operations llc_seq_core_ops = {
.show = llc_seq_core_show,
};
-static int llc_seq_socket_open(struct inode *inode, struct file *file)
-{
- return seq_open(file, &llc_seq_socket_ops);
-}
-
-static int llc_seq_core_open(struct inode *inode, struct file *file)
-{
- return seq_open(file, &llc_seq_core_ops);
-}
-
-static const struct file_operations llc_seq_socket_fops = {
- .open = llc_seq_socket_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
-static const struct file_operations llc_seq_core_fops = {
- .open = llc_seq_core_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
static struct proc_dir_entry *llc_proc_dir;
int __init llc_proc_init(void)
@@ -249,11 +225,11 @@ int __init llc_proc_init(void)
if (!llc_proc_dir)
goto out;
- p = proc_create("socket", 0444, llc_proc_dir, &llc_seq_socket_fops);
+ p = proc_create_seq("socket", 0444, llc_proc_dir, &llc_seq_socket_ops);
if (!p)
goto out_socket;
- p = proc_create("core", 0444, llc_proc_dir, &llc_seq_core_fops);
+ p = proc_create_seq("core", 0444, llc_proc_dir, &llc_seq_core_ops);
if (!p)
goto out_core;
diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c
index 595c662a61e8..ac4295296514 100644
--- a/net/mac80211/agg-tx.c
+++ b/net/mac80211/agg-tx.c
@@ -8,6 +8,7 @@
* Copyright 2007, Michael Wu <flamingice@sourmilk.net>
* Copyright 2007-2010, Intel Corporation
* Copyright(c) 2015-2017 Intel Deutschland GmbH
+ * Copyright (C) 2018 Intel Corporation
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -970,6 +971,9 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local,
sta->ampdu_mlme.addba_req_num[tid] = 0;
+ tid_tx->timeout =
+ le16_to_cpu(mgmt->u.action.u.addba_resp.timeout);
+
if (tid_tx->timeout) {
mod_timer(&tid_tx->session_timer,
TU_TO_EXP_TIME(tid_tx->timeout));
diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c
index 0f6c9ca59062..5b5b0f95ffd1 100644
--- a/net/mac80211/mesh_plink.c
+++ b/net/mac80211/mesh_plink.c
@@ -401,7 +401,7 @@ u32 mesh_plink_deactivate(struct sta_info *sta)
static void mesh_sta_info_init(struct ieee80211_sub_if_data *sdata,
struct sta_info *sta,
- struct ieee802_11_elems *elems, bool insert)
+ struct ieee802_11_elems *elems)
{
struct ieee80211_local *local = sdata->local;
struct ieee80211_supported_band *sband;
@@ -447,7 +447,7 @@ static void mesh_sta_info_init(struct ieee80211_sub_if_data *sdata,
sta->sta.bandwidth = IEEE80211_STA_RX_BW_20;
}
- if (insert)
+ if (!test_sta_flag(sta, WLAN_STA_RATE_CONTROL))
rate_control_rate_init(sta);
else
rate_control_rate_update(local, sband, sta, changed);
@@ -551,7 +551,7 @@ mesh_sta_info_get(struct ieee80211_sub_if_data *sdata,
rcu_read_lock();
sta = sta_info_get(sdata, addr);
if (sta) {
- mesh_sta_info_init(sdata, sta, elems, false);
+ mesh_sta_info_init(sdata, sta, elems);
} else {
rcu_read_unlock();
/* can't run atomic */
@@ -561,7 +561,7 @@ mesh_sta_info_get(struct ieee80211_sub_if_data *sdata,
return NULL;
}
- mesh_sta_info_init(sdata, sta, elems, true);
+ mesh_sta_info_init(sdata, sta, elems);
if (sta_info_insert_rcu(sta))
return NULL;
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 69449db7e283..233068756502 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -36,6 +36,7 @@
#define IEEE80211_AUTH_TIMEOUT (HZ / 5)
#define IEEE80211_AUTH_TIMEOUT_LONG (HZ / 2)
#define IEEE80211_AUTH_TIMEOUT_SHORT (HZ / 10)
+#define IEEE80211_AUTH_TIMEOUT_SAE (HZ * 2)
#define IEEE80211_AUTH_MAX_TRIES 3
#define IEEE80211_AUTH_WAIT_ASSOC (HZ * 5)
#define IEEE80211_ASSOC_TIMEOUT (HZ / 5)
@@ -1787,7 +1788,7 @@ static bool ieee80211_sta_wmm_params(struct ieee80211_local *local,
params[ac].acm = acm;
params[ac].uapsd = uapsd;
- if (params->cw_min == 0 ||
+ if (params[ac].cw_min == 0 ||
params[ac].cw_min > params[ac].cw_max) {
sdata_info(sdata,
"AP has invalid WMM params (CWmin/max=%d/%d for ACI %d), using defaults\n",
@@ -3814,16 +3815,19 @@ static int ieee80211_auth(struct ieee80211_sub_if_data *sdata)
tx_flags);
if (tx_flags == 0) {
- auth_data->timeout = jiffies + IEEE80211_AUTH_TIMEOUT;
- auth_data->timeout_started = true;
- run_again(sdata, auth_data->timeout);
+ if (auth_data->algorithm == WLAN_AUTH_SAE)
+ auth_data->timeout = jiffies +
+ IEEE80211_AUTH_TIMEOUT_SAE;
+ else
+ auth_data->timeout = jiffies + IEEE80211_AUTH_TIMEOUT;
} else {
auth_data->timeout =
round_jiffies_up(jiffies + IEEE80211_AUTH_TIMEOUT_LONG);
- auth_data->timeout_started = true;
- run_again(sdata, auth_data->timeout);
}
+ auth_data->timeout_started = true;
+ run_again(sdata, auth_data->timeout);
+
return 0;
}
@@ -3894,8 +3898,15 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata)
ifmgd->status_received = false;
if (ifmgd->auth_data && ieee80211_is_auth(fc)) {
if (status_acked) {
- ifmgd->auth_data->timeout =
- jiffies + IEEE80211_AUTH_TIMEOUT_SHORT;
+ if (ifmgd->auth_data->algorithm ==
+ WLAN_AUTH_SAE)
+ ifmgd->auth_data->timeout =
+ jiffies +
+ IEEE80211_AUTH_TIMEOUT_SAE;
+ else
+ ifmgd->auth_data->timeout =
+ jiffies +
+ IEEE80211_AUTH_TIMEOUT_SHORT;
run_again(sdata, ifmgd->auth_data->timeout);
} else {
ifmgd->auth_data->timeout = jiffies - 1;
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 535de3161a78..05a265cd573d 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -4,6 +4,7 @@
* Copyright 2006-2007 Jiri Benc <jbenc@suse.cz>
* Copyright 2007 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
+ * Copyright (C) 2018 Intel Corporation
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -1135,7 +1136,7 @@ static bool ieee80211_tx_prep_agg(struct ieee80211_tx_data *tx,
}
/* reset session timer */
- if (reset_agg_timer && tid_tx->timeout)
+ if (reset_agg_timer)
tid_tx->last_tx = jiffies;
return queued;
diff --git a/net/ncsi/ncsi-netlink.c b/net/ncsi/ncsi-netlink.c
index 8d7e849d4825..41cede4041d3 100644
--- a/net/ncsi/ncsi-netlink.c
+++ b/net/ncsi/ncsi-netlink.c
@@ -215,7 +215,7 @@ err:
static int ncsi_pkg_info_all_nl(struct sk_buff *skb,
struct netlink_callback *cb)
{
- struct nlattr *attrs[NCSI_ATTR_MAX];
+ struct nlattr *attrs[NCSI_ATTR_MAX + 1];
struct ncsi_package *np, *package;
struct ncsi_dev_priv *ndp;
unsigned int package_id;
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 0f6b8172fb9a..206fb2c4c319 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -585,7 +585,8 @@ void (*nf_nat_decode_session_hook)(struct sk_buff *, struct flowi *);
EXPORT_SYMBOL(nf_nat_decode_session_hook);
#endif
-static void __net_init __netfilter_net_init(struct nf_hook_entries **e, int max)
+static void __net_init
+__netfilter_net_init(struct nf_hook_entries __rcu **e, int max)
{
int h;
diff --git a/net/netfilter/ipvs/ip_vs_app.c b/net/netfilter/ipvs/ip_vs_app.c
index 1c98c907bc63..c3db074fc1f7 100644
--- a/net/netfilter/ipvs/ip_vs_app.c
+++ b/net/netfilter/ipvs/ip_vs_app.c
@@ -587,25 +587,13 @@ static const struct seq_operations ip_vs_app_seq_ops = {
.stop = ip_vs_app_seq_stop,
.show = ip_vs_app_seq_show,
};
-
-static int ip_vs_app_open(struct inode *inode, struct file *file)
-{
- return seq_open_net(inode, file, &ip_vs_app_seq_ops,
- sizeof(struct seq_net_private));
-}
-
-static const struct file_operations ip_vs_app_fops = {
- .open = ip_vs_app_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release_net,
-};
#endif
int __net_init ip_vs_app_net_init(struct netns_ipvs *ipvs)
{
INIT_LIST_HEAD(&ipvs->app_list);
- proc_create("ip_vs_app", 0, ipvs->net->proc_net, &ip_vs_app_fops);
+ proc_create_net("ip_vs_app", 0, ipvs->net->proc_net, &ip_vs_app_seq_ops,
+ sizeof(struct seq_net_private));
return 0;
}
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index 370abbf6f421..61c3a389da89 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -232,7 +232,10 @@ static inline int ip_vs_conn_unhash(struct ip_vs_conn *cp)
static inline bool ip_vs_conn_unlink(struct ip_vs_conn *cp)
{
unsigned int hash;
- bool ret;
+ bool ret = false;
+
+ if (cp->flags & IP_VS_CONN_F_ONE_PACKET)
+ return refcount_dec_if_one(&cp->refcnt);
hash = ip_vs_conn_hashkey_conn(cp);
@@ -240,15 +243,13 @@ static inline bool ip_vs_conn_unlink(struct ip_vs_conn *cp)
spin_lock(&cp->lock);
if (cp->flags & IP_VS_CONN_F_HASHED) {
- ret = false;
/* Decrease refcnt and unlink conn only if we are last user */
if (refcount_dec_if_one(&cp->refcnt)) {
hlist_del_rcu(&cp->c_list);
cp->flags &= ~IP_VS_CONN_F_HASHED;
ret = true;
}
- } else
- ret = refcount_read(&cp->refcnt) ? false : true;
+ }
spin_unlock(&cp->lock);
ct_write_unlock_bh(hash);
@@ -454,12 +455,6 @@ ip_vs_conn_out_get_proto(struct netns_ipvs *ipvs, int af,
}
EXPORT_SYMBOL_GPL(ip_vs_conn_out_get_proto);
-static void __ip_vs_conn_put_notimer(struct ip_vs_conn *cp)
-{
- __ip_vs_conn_put(cp);
- ip_vs_conn_expire(&cp->timer);
-}
-
/*
* Put back the conn and restart its timer with its timeout
*/
@@ -478,7 +473,7 @@ void ip_vs_conn_put(struct ip_vs_conn *cp)
(refcount_read(&cp->refcnt) == 1) &&
!timer_pending(&cp->timer))
/* expire connection immediately */
- __ip_vs_conn_put_notimer(cp);
+ ip_vs_conn_expire(&cp->timer);
else
__ip_vs_conn_put_timer(cp);
}
@@ -1136,19 +1131,6 @@ static const struct seq_operations ip_vs_conn_seq_ops = {
.show = ip_vs_conn_seq_show,
};
-static int ip_vs_conn_open(struct inode *inode, struct file *file)
-{
- return seq_open_net(inode, file, &ip_vs_conn_seq_ops,
- sizeof(struct ip_vs_iter_state));
-}
-
-static const struct file_operations ip_vs_conn_fops = {
- .open = ip_vs_conn_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release_net,
-};
-
static const char *ip_vs_origin_name(unsigned int flags)
{
if (flags & IP_VS_CONN_F_SYNC)
@@ -1212,20 +1194,6 @@ static const struct seq_operations ip_vs_conn_sync_seq_ops = {
.stop = ip_vs_conn_seq_stop,
.show = ip_vs_conn_sync_seq_show,
};
-
-static int ip_vs_conn_sync_open(struct inode *inode, struct file *file)
-{
- return seq_open_net(inode, file, &ip_vs_conn_sync_seq_ops,
- sizeof(struct ip_vs_iter_state));
-}
-
-static const struct file_operations ip_vs_conn_sync_fops = {
- .open = ip_vs_conn_sync_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release_net,
-};
-
#endif
@@ -1385,9 +1353,11 @@ int __net_init ip_vs_conn_net_init(struct netns_ipvs *ipvs)
{
atomic_set(&ipvs->conn_count, 0);
- proc_create("ip_vs_conn", 0, ipvs->net->proc_net, &ip_vs_conn_fops);
- proc_create("ip_vs_conn_sync", 0, ipvs->net->proc_net,
- &ip_vs_conn_sync_fops);
+ proc_create_net("ip_vs_conn", 0, ipvs->net->proc_net,
+ &ip_vs_conn_seq_ops, sizeof(struct ip_vs_iter_state));
+ proc_create_net("ip_vs_conn_sync", 0, ipvs->net->proc_net,
+ &ip_vs_conn_sync_seq_ops,
+ sizeof(struct ip_vs_iter_state));
return 0;
}
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 5f6f73cf2174..0679dd101e72 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -119,6 +119,8 @@ ip_vs_in_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
struct ip_vs_cpu_stats *s;
struct ip_vs_service *svc;
+ local_bh_disable();
+
s = this_cpu_ptr(dest->stats.cpustats);
u64_stats_update_begin(&s->syncp);
s->cnt.inpkts++;
@@ -137,6 +139,8 @@ ip_vs_in_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
s->cnt.inpkts++;
s->cnt.inbytes += skb->len;
u64_stats_update_end(&s->syncp);
+
+ local_bh_enable();
}
}
@@ -151,6 +155,8 @@ ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
struct ip_vs_cpu_stats *s;
struct ip_vs_service *svc;
+ local_bh_disable();
+
s = this_cpu_ptr(dest->stats.cpustats);
u64_stats_update_begin(&s->syncp);
s->cnt.outpkts++;
@@ -169,6 +175,8 @@ ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
s->cnt.outpkts++;
s->cnt.outbytes += skb->len;
u64_stats_update_end(&s->syncp);
+
+ local_bh_enable();
}
}
@@ -179,6 +187,8 @@ ip_vs_conn_stats(struct ip_vs_conn *cp, struct ip_vs_service *svc)
struct netns_ipvs *ipvs = svc->ipvs;
struct ip_vs_cpu_stats *s;
+ local_bh_disable();
+
s = this_cpu_ptr(cp->dest->stats.cpustats);
u64_stats_update_begin(&s->syncp);
s->cnt.conns++;
@@ -193,6 +203,8 @@ ip_vs_conn_stats(struct ip_vs_conn *cp, struct ip_vs_service *svc)
u64_stats_update_begin(&s->syncp);
s->cnt.conns++;
u64_stats_update_end(&s->syncp);
+
+ local_bh_enable();
}
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index f36098887ad0..141b1509c948 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -2109,19 +2109,6 @@ static const struct seq_operations ip_vs_info_seq_ops = {
.show = ip_vs_info_seq_show,
};
-static int ip_vs_info_open(struct inode *inode, struct file *file)
-{
- return seq_open_net(inode, file, &ip_vs_info_seq_ops,
- sizeof(struct ip_vs_iter));
-}
-
-static const struct file_operations ip_vs_info_fops = {
- .open = ip_vs_info_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release_net,
-};
-
static int ip_vs_stats_show(struct seq_file *seq, void *v)
{
struct net *net = seq_file_single_net(seq);
@@ -2154,18 +2141,6 @@ static int ip_vs_stats_show(struct seq_file *seq, void *v)
return 0;
}
-static int ip_vs_stats_seq_open(struct inode *inode, struct file *file)
-{
- return single_open_net(inode, file, ip_vs_stats_show);
-}
-
-static const struct file_operations ip_vs_stats_fops = {
- .open = ip_vs_stats_seq_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release_net,
-};
-
static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v)
{
struct net *net = seq_file_single_net(seq);
@@ -2221,18 +2196,6 @@ static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v)
return 0;
}
-
-static int ip_vs_stats_percpu_seq_open(struct inode *inode, struct file *file)
-{
- return single_open_net(inode, file, ip_vs_stats_percpu_show);
-}
-
-static const struct file_operations ip_vs_stats_percpu_fops = {
- .open = ip_vs_stats_percpu_seq_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release_net,
-};
#endif
/*
@@ -2381,8 +2344,10 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
struct ipvs_sync_daemon_cfg cfg;
memset(&cfg, 0, sizeof(cfg));
- strlcpy(cfg.mcast_ifn, dm->mcast_ifn,
- sizeof(cfg.mcast_ifn));
+ ret = -EINVAL;
+ if (strscpy(cfg.mcast_ifn, dm->mcast_ifn,
+ sizeof(cfg.mcast_ifn)) <= 0)
+ goto out_dec;
cfg.syncid = dm->syncid;
ret = start_sync_thread(ipvs, &cfg, dm->state);
} else {
@@ -2420,12 +2385,19 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
}
}
+ if ((cmd == IP_VS_SO_SET_ADD || cmd == IP_VS_SO_SET_EDIT) &&
+ strnlen(usvc.sched_name, IP_VS_SCHEDNAME_MAXLEN) ==
+ IP_VS_SCHEDNAME_MAXLEN) {
+ ret = -EINVAL;
+ goto out_unlock;
+ }
+
/* Check for valid protocol: TCP or UDP or SCTP, even for fwmark!=0 */
if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP &&
usvc.protocol != IPPROTO_SCTP) {
- pr_err("set_ctl: invalid protocol: %d %pI4:%d %s\n",
+ pr_err("set_ctl: invalid protocol: %d %pI4:%d\n",
usvc.protocol, &usvc.addr.ip,
- ntohs(usvc.port), usvc.sched_name);
+ ntohs(usvc.port));
ret = -EFAULT;
goto out_unlock;
}
@@ -2847,7 +2819,7 @@ static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = {
static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = {
[IPVS_DAEMON_ATTR_STATE] = { .type = NLA_U32 },
[IPVS_DAEMON_ATTR_MCAST_IFN] = { .type = NLA_NUL_STRING,
- .len = IP_VS_IFNAME_MAXLEN },
+ .len = IP_VS_IFNAME_MAXLEN - 1 },
[IPVS_DAEMON_ATTR_SYNC_ID] = { .type = NLA_U32 },
[IPVS_DAEMON_ATTR_SYNC_MAXLEN] = { .type = NLA_U16 },
[IPVS_DAEMON_ATTR_MCAST_GROUP] = { .type = NLA_U32 },
@@ -2865,7 +2837,7 @@ static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = {
[IPVS_SVC_ATTR_PORT] = { .type = NLA_U16 },
[IPVS_SVC_ATTR_FWMARK] = { .type = NLA_U32 },
[IPVS_SVC_ATTR_SCHED_NAME] = { .type = NLA_NUL_STRING,
- .len = IP_VS_SCHEDNAME_MAXLEN },
+ .len = IP_VS_SCHEDNAME_MAXLEN - 1 },
[IPVS_SVC_ATTR_PE_NAME] = { .type = NLA_NUL_STRING,
.len = IP_VS_PENAME_MAXLEN },
[IPVS_SVC_ATTR_FLAGS] = { .type = NLA_BINARY,
@@ -4030,10 +4002,12 @@ int __net_init ip_vs_control_net_init(struct netns_ipvs *ipvs)
spin_lock_init(&ipvs->tot_stats.lock);
- proc_create("ip_vs", 0, ipvs->net->proc_net, &ip_vs_info_fops);
- proc_create("ip_vs_stats", 0, ipvs->net->proc_net, &ip_vs_stats_fops);
- proc_create("ip_vs_stats_percpu", 0, ipvs->net->proc_net,
- &ip_vs_stats_percpu_fops);
+ proc_create_net("ip_vs", 0, ipvs->net->proc_net, &ip_vs_info_seq_ops,
+ sizeof(struct ip_vs_iter));
+ proc_create_net_single("ip_vs_stats", 0, ipvs->net->proc_net,
+ ip_vs_stats_show, NULL);
+ proc_create_net_single("ip_vs_stats_percpu", 0, ipvs->net->proc_net,
+ ip_vs_stats_percpu_show, NULL);
if (ip_vs_control_net_init_sysctl(ipvs))
goto err;
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index 4b2b3d53acfc..853b23206bb7 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -644,19 +644,6 @@ static const struct seq_operations exp_seq_ops = {
.stop = exp_seq_stop,
.show = exp_seq_show
};
-
-static int exp_open(struct inode *inode, struct file *file)
-{
- return seq_open_net(inode, file, &exp_seq_ops,
- sizeof(struct ct_expect_iter_state));
-}
-
-static const struct file_operations exp_file_ops = {
- .open = exp_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release_net,
-};
#endif /* CONFIG_NF_CONNTRACK_PROCFS */
static int exp_proc_init(struct net *net)
@@ -666,8 +653,8 @@ static int exp_proc_init(struct net *net)
kuid_t root_uid;
kgid_t root_gid;
- proc = proc_create("nf_conntrack_expect", 0440, net->proc_net,
- &exp_file_ops);
+ proc = proc_create_net("nf_conntrack_expect", 0440, net->proc_net,
+ &exp_seq_ops, sizeof(struct ct_expect_iter_state));
if (!proc)
return -ENOMEM;
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index e97cdc1cf98c..8e67910185a0 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -981,6 +981,17 @@ static int tcp_packet(struct nf_conn *ct,
return NF_ACCEPT; /* Don't change state */
}
break;
+ case TCP_CONNTRACK_SYN_SENT2:
+ /* tcp_conntracks table is not smart enough to handle
+ * simultaneous open.
+ */
+ ct->proto.tcp.last_flags |= IP_CT_TCP_SIMULTANEOUS_OPEN;
+ break;
+ case TCP_CONNTRACK_SYN_RECV:
+ if (dir == IP_CT_DIR_REPLY && index == TCP_ACK_SET &&
+ ct->proto.tcp.last_flags & IP_CT_TCP_SIMULTANEOUS_OPEN)
+ new_state = TCP_CONNTRACK_ESTABLISHED;
+ break;
case TCP_CONNTRACK_CLOSE:
if (index == TCP_RST_SET
&& (ct->proto.tcp.seen[!dir].flags & IP_CT_TCP_FLAG_MAXACK_SET)
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 037fec54c850..b642c0b2495c 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -375,19 +375,6 @@ static const struct seq_operations ct_seq_ops = {
.show = ct_seq_show
};
-static int ct_open(struct inode *inode, struct file *file)
-{
- return seq_open_net(inode, file, &ct_seq_ops,
- sizeof(struct ct_iter_state));
-}
-
-static const struct file_operations ct_file_ops = {
- .open = ct_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release_net,
-};
-
static void *ct_cpu_seq_start(struct seq_file *seq, loff_t *pos)
{
struct net *net = seq_file_net(seq);
@@ -467,26 +454,14 @@ static const struct seq_operations ct_cpu_seq_ops = {
.show = ct_cpu_seq_show,
};
-static int ct_cpu_seq_open(struct inode *inode, struct file *file)
-{
- return seq_open_net(inode, file, &ct_cpu_seq_ops,
- sizeof(struct seq_net_private));
-}
-
-static const struct file_operations ct_cpu_seq_fops = {
- .open = ct_cpu_seq_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release_net,
-};
-
static int nf_conntrack_standalone_init_proc(struct net *net)
{
struct proc_dir_entry *pde;
kuid_t root_uid;
kgid_t root_gid;
- pde = proc_create("nf_conntrack", 0440, net->proc_net, &ct_file_ops);
+ pde = proc_create_net("nf_conntrack", 0440, net->proc_net, &ct_seq_ops,
+ sizeof(struct ct_iter_state));
if (!pde)
goto out_nf_conntrack;
@@ -495,8 +470,8 @@ static int nf_conntrack_standalone_init_proc(struct net *net)
if (uid_valid(root_uid) && gid_valid(root_gid))
proc_set_user(pde, root_uid, root_gid);
- pde = proc_create("nf_conntrack", 0444, net->proc_net_stat,
- &ct_cpu_seq_fops);
+ pde = proc_create_net("nf_conntrack", 0444, net->proc_net_stat,
+ &ct_cpu_seq_ops, sizeof(struct seq_net_private));
if (!pde)
goto out_stat_nf_conntrack;
return 0;
diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c
index 6d0357817cda..426457047578 100644
--- a/net/netfilter/nf_log.c
+++ b/net/netfilter/nf_log.c
@@ -394,21 +394,6 @@ static const struct seq_operations nflog_seq_ops = {
.stop = seq_stop,
.show = seq_show,
};
-
-static int nflog_open(struct inode *inode, struct file *file)
-{
- return seq_open_net(inode, file, &nflog_seq_ops,
- sizeof(struct seq_net_private));
-}
-
-static const struct file_operations nflog_file_ops = {
- .open = nflog_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release_net,
-};
-
-
#endif /* PROC_FS */
#ifdef CONFIG_SYSCTL
@@ -549,8 +534,8 @@ static int __net_init nf_log_net_init(struct net *net)
int ret = -ENOMEM;
#ifdef CONFIG_PROC_FS
- if (!proc_create("nf_log", 0444,
- net->nf.proc_netfilter, &nflog_file_ops))
+ if (!proc_create_net("nf_log", 0444, net->nf.proc_netfilter,
+ &nflog_seq_ops, sizeof(struct seq_net_private)))
return ret;
#endif
ret = netfilter_log_sysctl_init(net);
diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c
index 6039b350abbe..8ff4d22f10b2 100644
--- a/net/netfilter/nf_synproxy_core.c
+++ b/net/netfilter/nf_synproxy_core.c
@@ -310,23 +310,10 @@ static const struct seq_operations synproxy_cpu_seq_ops = {
.show = synproxy_cpu_seq_show,
};
-static int synproxy_cpu_seq_open(struct inode *inode, struct file *file)
-{
- return seq_open_net(inode, file, &synproxy_cpu_seq_ops,
- sizeof(struct seq_net_private));
-}
-
-static const struct file_operations synproxy_cpu_seq_fops = {
- .open = synproxy_cpu_seq_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release_net,
-};
-
static int __net_init synproxy_proc_init(struct net *net)
{
- if (!proc_create("synproxy", 0444, net->proc_net_stat,
- &synproxy_cpu_seq_fops))
+ if (!proc_create_net("synproxy", 0444, net->proc_net_stat,
+ &synproxy_cpu_seq_ops, sizeof(struct seq_net_private)))
return -ENOMEM;
return 0;
}
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 04d4e3772584..501e48a7965b 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -214,6 +214,34 @@ static int nft_delchain(struct nft_ctx *ctx)
return err;
}
+static void nft_rule_expr_activate(const struct nft_ctx *ctx,
+ struct nft_rule *rule)
+{
+ struct nft_expr *expr;
+
+ expr = nft_expr_first(rule);
+ while (expr != nft_expr_last(rule) && expr->ops) {
+ if (expr->ops->activate)
+ expr->ops->activate(ctx, expr);
+
+ expr = nft_expr_next(expr);
+ }
+}
+
+static void nft_rule_expr_deactivate(const struct nft_ctx *ctx,
+ struct nft_rule *rule)
+{
+ struct nft_expr *expr;
+
+ expr = nft_expr_first(rule);
+ while (expr != nft_expr_last(rule) && expr->ops) {
+ if (expr->ops->deactivate)
+ expr->ops->deactivate(ctx, expr);
+
+ expr = nft_expr_next(expr);
+ }
+}
+
static int
nf_tables_delrule_deactivate(struct nft_ctx *ctx, struct nft_rule *rule)
{
@@ -259,6 +287,7 @@ static int nft_delrule(struct nft_ctx *ctx, struct nft_rule *rule)
nft_trans_destroy(trans);
return err;
}
+ nft_rule_expr_deactivate(ctx, rule);
return 0;
}
@@ -1269,8 +1298,10 @@ static void nft_chain_stats_replace(struct nft_base_chain *chain,
rcu_assign_pointer(chain->stats, newstats);
synchronize_rcu();
free_percpu(oldstats);
- } else
+ } else {
rcu_assign_pointer(chain->stats, newstats);
+ static_branch_inc(&nft_counters_enabled);
+ }
}
static void nf_tables_chain_destroy(struct nft_ctx *ctx)
@@ -2238,6 +2269,13 @@ static void nf_tables_rule_destroy(const struct nft_ctx *ctx,
kfree(rule);
}
+static void nf_tables_rule_release(const struct nft_ctx *ctx,
+ struct nft_rule *rule)
+{
+ nft_rule_expr_deactivate(ctx, rule);
+ nf_tables_rule_destroy(ctx, rule);
+}
+
#define NFT_RULE_MAXEXPRS 128
static struct nft_expr_info *info;
@@ -2402,7 +2440,7 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk,
return 0;
err2:
- nf_tables_rule_destroy(&ctx, rule);
+ nf_tables_rule_release(&ctx, rule);
err1:
for (i = 0; i < n; i++) {
if (info[i].ops != NULL)
@@ -4044,8 +4082,10 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA) ^
nft_set_ext_exists(ext2, NFT_SET_EXT_DATA) ||
nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF) ^
- nft_set_ext_exists(ext2, NFT_SET_EXT_OBJREF))
- return -EBUSY;
+ nft_set_ext_exists(ext2, NFT_SET_EXT_OBJREF)) {
+ err = -EBUSY;
+ goto err5;
+ }
if ((nft_set_ext_exists(ext, NFT_SET_EXT_DATA) &&
nft_set_ext_exists(ext2, NFT_SET_EXT_DATA) &&
memcmp(nft_set_ext_data(ext),
@@ -4130,7 +4170,7 @@ static int nf_tables_newsetelem(struct net *net, struct sock *nlsk,
* NFT_GOTO verdicts. This function must be called on active data objects
* from the second phase of the commit protocol.
*/
-static void nft_data_hold(const struct nft_data *data, enum nft_data_types type)
+void nft_data_hold(const struct nft_data *data, enum nft_data_types type)
{
if (type == NFT_DATA_VERDICT) {
switch (data->verdict.code) {
@@ -4668,7 +4708,7 @@ static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb)
if (idx > s_idx)
memset(&cb->args[1], 0,
sizeof(cb->args) - sizeof(cb->args[0]));
- if (filter && filter->table[0] &&
+ if (filter && filter->table &&
strcmp(filter->table, table->name))
goto cont;
if (filter &&
@@ -5342,7 +5382,7 @@ static int nf_tables_dump_flowtable(struct sk_buff *skb,
if (idx > s_idx)
memset(&cb->args[1], 0,
sizeof(cb->args) - sizeof(cb->args[0]));
- if (filter && filter->table[0] &&
+ if (filter && filter->table &&
strcmp(filter->table, table->name))
goto cont;
@@ -5761,7 +5801,7 @@ static void nft_chain_commit_update(struct nft_trans *trans)
}
}
-static void nf_tables_commit_release(struct nft_trans *trans)
+static void nft_commit_release(struct nft_trans *trans)
{
switch (trans->msg_type) {
case NFT_MSG_DELTABLE:
@@ -5790,6 +5830,21 @@ static void nf_tables_commit_release(struct nft_trans *trans)
kfree(trans);
}
+static void nf_tables_commit_release(struct net *net)
+{
+ struct nft_trans *trans, *next;
+
+ if (list_empty(&net->nft.commit_list))
+ return;
+
+ synchronize_rcu();
+
+ list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) {
+ list_del(&trans->list);
+ nft_commit_release(trans);
+ }
+}
+
static int nf_tables_commit(struct net *net, struct sk_buff *skb)
{
struct nft_trans *trans, *next;
@@ -5920,13 +5975,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
}
}
- synchronize_rcu();
-
- list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) {
- list_del(&trans->list);
- nf_tables_commit_release(trans);
- }
-
+ nf_tables_commit_release(net);
nf_tables_gen_notify(net, skb, NFT_MSG_NEWGEN);
return 0;
@@ -6006,10 +6055,12 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb)
case NFT_MSG_NEWRULE:
trans->ctx.chain->use--;
list_del_rcu(&nft_trans_rule(trans)->list);
+ nft_rule_expr_deactivate(&trans->ctx, nft_trans_rule(trans));
break;
case NFT_MSG_DELRULE:
trans->ctx.chain->use++;
nft_clear(trans->ctx.net, nft_trans_rule(trans));
+ nft_rule_expr_activate(&trans->ctx, nft_trans_rule(trans));
nft_trans_destroy(trans);
break;
case NFT_MSG_NEWSET:
@@ -6585,7 +6636,7 @@ int __nft_release_basechain(struct nft_ctx *ctx)
list_for_each_entry_safe(rule, nr, &ctx->chain->rules, list) {
list_del(&rule->list);
ctx->chain->use--;
- nf_tables_rule_destroy(ctx, rule);
+ nf_tables_rule_release(ctx, rule);
}
list_del(&ctx->chain->list);
ctx->table->use--;
@@ -6623,7 +6674,7 @@ static void __nft_release_tables(struct net *net)
list_for_each_entry_safe(rule, nr, &chain->rules, list) {
list_del(&rule->list);
chain->use--;
- nf_tables_rule_destroy(&ctx, rule);
+ nf_tables_rule_release(&ctx, rule);
}
}
list_for_each_entry_safe(flowtable, nf, &table->flowtables, list) {
diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c
index dfd0bf3810d2..40e744572283 100644
--- a/net/netfilter/nf_tables_core.c
+++ b/net/netfilter/nf_tables_core.c
@@ -119,14 +119,21 @@ DEFINE_STATIC_KEY_FALSE(nft_counters_enabled);
static noinline void nft_update_chain_stats(const struct nft_chain *chain,
const struct nft_pktinfo *pkt)
{
+ struct nft_base_chain *base_chain;
struct nft_stats *stats;
+ base_chain = nft_base_chain(chain);
+ if (!base_chain->stats)
+ return;
+
local_bh_disable();
- stats = this_cpu_ptr(rcu_dereference(nft_base_chain(chain)->stats));
- u64_stats_update_begin(&stats->syncp);
- stats->pkts++;
- stats->bytes += pkt->skb->len;
- u64_stats_update_end(&stats->syncp);
+ stats = this_cpu_ptr(rcu_dereference(base_chain->stats));
+ if (stats) {
+ u64_stats_update_begin(&stats->syncp);
+ stats->pkts++;
+ stats->bytes += pkt->skb->len;
+ u64_stats_update_end(&stats->syncp);
+ }
local_bh_enable();
}
diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c
index b9505bcd3827..a0e5adf0b3b6 100644
--- a/net/netfilter/nfnetlink_acct.c
+++ b/net/netfilter/nfnetlink_acct.c
@@ -115,7 +115,7 @@ static int nfnl_acct_new(struct net *net, struct sock *nfnl,
nfacct->flags = flags;
}
- strncpy(nfacct->name, nla_data(tb[NFACCT_NAME]), NFACCT_NAME_MAX);
+ nla_strlcpy(nfacct->name, tb[NFACCT_NAME], NFACCT_NAME_MAX);
if (tb[NFACCT_BYTES]) {
atomic64_set(&nfacct->bytes,
diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c
index 4a4b293fb2e5..cb5b5f207777 100644
--- a/net/netfilter/nfnetlink_cthelper.c
+++ b/net/netfilter/nfnetlink_cthelper.c
@@ -149,8 +149,8 @@ nfnl_cthelper_expect_policy(struct nf_conntrack_expect_policy *expect_policy,
!tb[NFCTH_POLICY_EXPECT_TIMEOUT])
return -EINVAL;
- strncpy(expect_policy->name,
- nla_data(tb[NFCTH_POLICY_NAME]), NF_CT_HELPER_NAME_LEN);
+ nla_strlcpy(expect_policy->name,
+ tb[NFCTH_POLICY_NAME], NF_CT_HELPER_NAME_LEN);
expect_policy->max_expected =
ntohl(nla_get_be32(tb[NFCTH_POLICY_EXPECT_MAX]));
if (expect_policy->max_expected > NF_CT_EXPECT_MAX_CNT)
@@ -234,7 +234,8 @@ nfnl_cthelper_create(const struct nlattr * const tb[],
if (ret < 0)
goto err1;
- strncpy(helper->name, nla_data(tb[NFCTH_NAME]), NF_CT_HELPER_NAME_LEN);
+ nla_strlcpy(helper->name,
+ tb[NFCTH_NAME], NF_CT_HELPER_NAME_LEN);
size = ntohl(nla_get_be32(tb[NFCTH_PRIV_DATA_LEN]));
if (size > FIELD_SIZEOF(struct nf_conn_help, data)) {
ret = -ENOMEM;
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index 7b46aa4c478d..c14822b9729f 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -1046,20 +1046,6 @@ static const struct seq_operations nful_seq_ops = {
.stop = seq_stop,
.show = seq_show,
};
-
-static int nful_open(struct inode *inode, struct file *file)
-{
- return seq_open_net(inode, file, &nful_seq_ops,
- sizeof(struct iter_state));
-}
-
-static const struct file_operations nful_file_ops = {
- .open = nful_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release_net,
-};
-
#endif /* PROC_FS */
static int __net_init nfnl_log_net_init(struct net *net)
@@ -1077,8 +1063,8 @@ static int __net_init nfnl_log_net_init(struct net *net)
spin_lock_init(&log->instances_lock);
#ifdef CONFIG_PROC_FS
- proc = proc_create("nfnetlink_log", 0440,
- net->nf.proc_netfilter, &nful_file_ops);
+ proc = proc_create_net("nfnetlink_log", 0440, net->nf.proc_netfilter,
+ &nful_seq_ops, sizeof(struct iter_state));
if (!proc)
return -ENOMEM;
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 74a04638ef03..494a9ab35cb6 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -1469,20 +1469,6 @@ static const struct seq_operations nfqnl_seq_ops = {
.stop = seq_stop,
.show = seq_show,
};
-
-static int nfqnl_open(struct inode *inode, struct file *file)
-{
- return seq_open_net(inode, file, &nfqnl_seq_ops,
- sizeof(struct iter_state));
-}
-
-static const struct file_operations nfqnl_file_ops = {
- .open = nfqnl_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release_net,
-};
-
#endif /* PROC_FS */
static int __net_init nfnl_queue_net_init(struct net *net)
@@ -1496,8 +1482,8 @@ static int __net_init nfnl_queue_net_init(struct net *net)
spin_lock_init(&q->instances_lock);
#ifdef CONFIG_PROC_FS
- if (!proc_create("nfnetlink_queue", 0440,
- net->nf.proc_netfilter, &nfqnl_file_ops))
+ if (!proc_create_net("nfnetlink_queue", 0440, net->nf.proc_netfilter,
+ &nfqnl_seq_ops, sizeof(struct iter_state)))
return -ENOMEM;
#endif
nf_register_queue_handler(net, &nfqh);
diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c
index 8e23726b9081..1d99a1efdafc 100644
--- a/net/netfilter/nft_compat.c
+++ b/net/netfilter/nft_compat.c
@@ -27,14 +27,31 @@ struct nft_xt {
struct list_head head;
struct nft_expr_ops ops;
unsigned int refcnt;
+
+ /* Unlike other expressions, ops doesn't have static storage duration.
+ * nft core assumes they do. We use kfree_rcu so that nft core can
+ * can check expr->ops->size even after nft_compat->destroy() frees
+ * the nft_xt struct that holds the ops structure.
+ */
+ struct rcu_head rcu_head;
+};
+
+/* Used for matches where *info is larger than X byte */
+#define NFT_MATCH_LARGE_THRESH 192
+
+struct nft_xt_match_priv {
+ void *info;
};
-static void nft_xt_put(struct nft_xt *xt)
+static bool nft_xt_put(struct nft_xt *xt)
{
if (--xt->refcnt == 0) {
list_del(&xt->head);
- kfree(xt);
+ kfree_rcu(xt, rcu_head);
+ return true;
}
+
+ return false;
}
static int nft_compat_chain_validate_dependency(const char *tablename,
@@ -226,6 +243,7 @@ nft_target_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
struct xt_target *target = expr->ops->data;
struct xt_tgchk_param par;
size_t size = XT_ALIGN(nla_len(tb[NFTA_TARGET_INFO]));
+ struct nft_xt *nft_xt;
u16 proto = 0;
bool inv = false;
union nft_entry e = {};
@@ -236,25 +254,22 @@ nft_target_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
if (ctx->nla[NFTA_RULE_COMPAT]) {
ret = nft_parse_compat(ctx->nla[NFTA_RULE_COMPAT], &proto, &inv);
if (ret < 0)
- goto err;
+ return ret;
}
nft_target_set_tgchk_param(&par, ctx, target, info, &e, proto, inv);
ret = xt_check_target(&par, size, proto, inv);
if (ret < 0)
- goto err;
+ return ret;
/* The standard target cannot be used */
- if (target->target == NULL) {
- ret = -EINVAL;
- goto err;
- }
+ if (!target->target)
+ return -EINVAL;
+ nft_xt = container_of(expr->ops, struct nft_xt, ops);
+ nft_xt->refcnt++;
return 0;
-err:
- module_put(target->me);
- return ret;
}
static void
@@ -271,8 +286,8 @@ nft_target_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr)
if (par.target->destroy != NULL)
par.target->destroy(&par);
- nft_xt_put(container_of(expr->ops, struct nft_xt, ops));
- module_put(target->me);
+ if (nft_xt_put(container_of(expr->ops, struct nft_xt, ops)))
+ module_put(target->me);
}
static int nft_target_dump(struct sk_buff *skb, const struct nft_expr *expr)
@@ -316,11 +331,11 @@ static int nft_target_validate(const struct nft_ctx *ctx,
return 0;
}
-static void nft_match_eval(const struct nft_expr *expr,
- struct nft_regs *regs,
- const struct nft_pktinfo *pkt)
+static void __nft_match_eval(const struct nft_expr *expr,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt,
+ void *info)
{
- void *info = nft_expr_priv(expr);
struct xt_match *match = expr->ops->data;
struct sk_buff *skb = pkt->skb;
bool ret;
@@ -344,6 +359,22 @@ static void nft_match_eval(const struct nft_expr *expr,
}
}
+static void nft_match_large_eval(const struct nft_expr *expr,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
+{
+ struct nft_xt_match_priv *priv = nft_expr_priv(expr);
+
+ __nft_match_eval(expr, regs, pkt, priv->info);
+}
+
+static void nft_match_eval(const struct nft_expr *expr,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
+{
+ __nft_match_eval(expr, regs, pkt, nft_expr_priv(expr));
+}
+
static const struct nla_policy nft_match_policy[NFTA_MATCH_MAX + 1] = {
[NFTA_MATCH_NAME] = { .type = NLA_NUL_STRING },
[NFTA_MATCH_REV] = { .type = NLA_U32 },
@@ -404,13 +435,14 @@ static void match_compat_from_user(struct xt_match *m, void *in, void *out)
}
static int
-nft_match_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
- const struct nlattr * const tb[])
+__nft_match_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
+ const struct nlattr * const tb[],
+ void *info)
{
- void *info = nft_expr_priv(expr);
struct xt_match *match = expr->ops->data;
struct xt_mtchk_param par;
size_t size = XT_ALIGN(nla_len(tb[NFTA_MATCH_INFO]));
+ struct nft_xt *nft_xt;
u16 proto = 0;
bool inv = false;
union nft_entry e = {};
@@ -421,26 +453,50 @@ nft_match_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
if (ctx->nla[NFTA_RULE_COMPAT]) {
ret = nft_parse_compat(ctx->nla[NFTA_RULE_COMPAT], &proto, &inv);
if (ret < 0)
- goto err;
+ return ret;
}
nft_match_set_mtchk_param(&par, ctx, match, info, &e, proto, inv);
ret = xt_check_match(&par, size, proto, inv);
if (ret < 0)
- goto err;
+ return ret;
+ nft_xt = container_of(expr->ops, struct nft_xt, ops);
+ nft_xt->refcnt++;
return 0;
-err:
- module_put(match->me);
+}
+
+static int
+nft_match_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
+ const struct nlattr * const tb[])
+{
+ return __nft_match_init(ctx, expr, tb, nft_expr_priv(expr));
+}
+
+static int
+nft_match_large_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
+ const struct nlattr * const tb[])
+{
+ struct nft_xt_match_priv *priv = nft_expr_priv(expr);
+ struct xt_match *m = expr->ops->data;
+ int ret;
+
+ priv->info = kmalloc(XT_ALIGN(m->matchsize), GFP_KERNEL);
+ if (!priv->info)
+ return -ENOMEM;
+
+ ret = __nft_match_init(ctx, expr, tb, priv->info);
+ if (ret)
+ kfree(priv->info);
return ret;
}
static void
-nft_match_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr)
+__nft_match_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr,
+ void *info)
{
struct xt_match *match = expr->ops->data;
- void *info = nft_expr_priv(expr);
struct xt_mtdtor_param par;
par.net = ctx->net;
@@ -450,13 +506,28 @@ nft_match_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr)
if (par.match->destroy != NULL)
par.match->destroy(&par);
- nft_xt_put(container_of(expr->ops, struct nft_xt, ops));
- module_put(match->me);
+ if (nft_xt_put(container_of(expr->ops, struct nft_xt, ops)))
+ module_put(match->me);
}
-static int nft_match_dump(struct sk_buff *skb, const struct nft_expr *expr)
+static void
+nft_match_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr)
+{
+ __nft_match_destroy(ctx, expr, nft_expr_priv(expr));
+}
+
+static void
+nft_match_large_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr)
+{
+ struct nft_xt_match_priv *priv = nft_expr_priv(expr);
+
+ __nft_match_destroy(ctx, expr, priv->info);
+ kfree(priv->info);
+}
+
+static int __nft_match_dump(struct sk_buff *skb, const struct nft_expr *expr,
+ void *info)
{
- void *info = nft_expr_priv(expr);
struct xt_match *match = expr->ops->data;
if (nla_put_string(skb, NFTA_MATCH_NAME, match->name) ||
@@ -470,6 +541,18 @@ nla_put_failure:
return -1;
}
+static int nft_match_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+ return __nft_match_dump(skb, expr, nft_expr_priv(expr));
+}
+
+static int nft_match_large_dump(struct sk_buff *skb, const struct nft_expr *e)
+{
+ struct nft_xt_match_priv *priv = nft_expr_priv(e);
+
+ return __nft_match_dump(skb, e, priv->info);
+}
+
static int nft_match_validate(const struct nft_ctx *ctx,
const struct nft_expr *expr,
const struct nft_data **data)
@@ -637,6 +720,7 @@ nft_match_select_ops(const struct nft_ctx *ctx,
{
struct nft_xt *nft_match;
struct xt_match *match;
+ unsigned int matchsize;
char *mt_name;
u32 rev, family;
int err;
@@ -654,13 +738,8 @@ nft_match_select_ops(const struct nft_ctx *ctx,
list_for_each_entry(nft_match, &nft_match_list, head) {
struct xt_match *match = nft_match->ops.data;
- if (nft_match_cmp(match, mt_name, rev, family)) {
- if (!try_module_get(match->me))
- return ERR_PTR(-ENOENT);
-
- nft_match->refcnt++;
+ if (nft_match_cmp(match, mt_name, rev, family))
return &nft_match->ops;
- }
}
match = xt_request_find_match(family, mt_name, rev);
@@ -679,9 +758,8 @@ nft_match_select_ops(const struct nft_ctx *ctx,
goto err;
}
- nft_match->refcnt = 1;
+ nft_match->refcnt = 0;
nft_match->ops.type = &nft_match_type;
- nft_match->ops.size = NFT_EXPR_SIZE(XT_ALIGN(match->matchsize));
nft_match->ops.eval = nft_match_eval;
nft_match->ops.init = nft_match_init;
nft_match->ops.destroy = nft_match_destroy;
@@ -689,6 +767,18 @@ nft_match_select_ops(const struct nft_ctx *ctx,
nft_match->ops.validate = nft_match_validate;
nft_match->ops.data = match;
+ matchsize = NFT_EXPR_SIZE(XT_ALIGN(match->matchsize));
+ if (matchsize > NFT_MATCH_LARGE_THRESH) {
+ matchsize = NFT_EXPR_SIZE(sizeof(struct nft_xt_match_priv));
+
+ nft_match->ops.eval = nft_match_large_eval;
+ nft_match->ops.init = nft_match_large_init;
+ nft_match->ops.destroy = nft_match_large_destroy;
+ nft_match->ops.dump = nft_match_large_dump;
+ }
+
+ nft_match->ops.size = matchsize;
+
list_add(&nft_match->head, &nft_match_list);
return &nft_match->ops;
@@ -739,13 +829,8 @@ nft_target_select_ops(const struct nft_ctx *ctx,
list_for_each_entry(nft_target, &nft_target_list, head) {
struct xt_target *target = nft_target->ops.data;
- if (nft_target_cmp(target, tg_name, rev, family)) {
- if (!try_module_get(target->me))
- return ERR_PTR(-ENOENT);
-
- nft_target->refcnt++;
+ if (nft_target_cmp(target, tg_name, rev, family))
return &nft_target->ops;
- }
}
target = xt_request_find_target(family, tg_name, rev);
@@ -764,7 +849,7 @@ nft_target_select_ops(const struct nft_ctx *ctx,
goto err;
}
- nft_target->refcnt = 1;
+ nft_target->refcnt = 0;
nft_target->ops.type = &nft_target_type;
nft_target->ops.size = NFT_EXPR_SIZE(XT_ALIGN(target->targetsize));
nft_target->ops.init = nft_target_init;
@@ -823,6 +908,32 @@ err_match:
static void __exit nft_compat_module_exit(void)
{
+ struct nft_xt *xt, *next;
+
+ /* list should be empty here, it can be non-empty only in case there
+ * was an error that caused nft_xt expr to not be initialized fully
+ * and noone else requested the same expression later.
+ *
+ * In this case, the lists contain 0-refcount entries that still
+ * hold module reference.
+ */
+ list_for_each_entry_safe(xt, next, &nft_target_list, head) {
+ struct xt_target *target = xt->ops.data;
+
+ if (WARN_ON_ONCE(xt->refcnt))
+ continue;
+ module_put(target->me);
+ kfree(xt);
+ }
+
+ list_for_each_entry_safe(xt, next, &nft_match_list, head) {
+ struct xt_match *match = xt->ops.data;
+
+ if (WARN_ON_ONCE(xt->refcnt))
+ continue;
+ module_put(match->me);
+ kfree(xt);
+ }
nfnetlink_subsys_unregister(&nfnl_compat_subsys);
nft_unregister_expr(&nft_target_type);
nft_unregister_expr(&nft_match_type);
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index ea737fd789e8..5c0de704bad5 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -880,22 +880,26 @@ static int nft_ct_helper_obj_dump(struct sk_buff *skb,
struct nft_object *obj, bool reset)
{
const struct nft_ct_helper_obj *priv = nft_obj_data(obj);
- const struct nf_conntrack_helper *helper = priv->helper4;
+ const struct nf_conntrack_helper *helper;
u16 family;
+ if (priv->helper4 && priv->helper6) {
+ family = NFPROTO_INET;
+ helper = priv->helper4;
+ } else if (priv->helper6) {
+ family = NFPROTO_IPV6;
+ helper = priv->helper6;
+ } else {
+ family = NFPROTO_IPV4;
+ helper = priv->helper4;
+ }
+
if (nla_put_string(skb, NFTA_CT_HELPER_NAME, helper->name))
return -1;
if (nla_put_u8(skb, NFTA_CT_HELPER_L4PROTO, priv->l4proto))
return -1;
- if (priv->helper4 && priv->helper6)
- family = NFPROTO_INET;
- else if (priv->helper6)
- family = NFPROTO_IPV6;
- else
- family = NFPROTO_IPV4;
-
if (nla_put_be16(skb, NFTA_CT_HELPER_L3PROTO, htons(family)))
return -1;
diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c
index 4717d7796927..aa87ff8beae8 100644
--- a/net/netfilter/nft_immediate.c
+++ b/net/netfilter/nft_immediate.c
@@ -69,8 +69,16 @@ err1:
return err;
}
-static void nft_immediate_destroy(const struct nft_ctx *ctx,
- const struct nft_expr *expr)
+static void nft_immediate_activate(const struct nft_ctx *ctx,
+ const struct nft_expr *expr)
+{
+ const struct nft_immediate_expr *priv = nft_expr_priv(expr);
+
+ return nft_data_hold(&priv->data, nft_dreg_to_type(priv->dreg));
+}
+
+static void nft_immediate_deactivate(const struct nft_ctx *ctx,
+ const struct nft_expr *expr)
{
const struct nft_immediate_expr *priv = nft_expr_priv(expr);
@@ -108,7 +116,8 @@ static const struct nft_expr_ops nft_imm_ops = {
.size = NFT_EXPR_SIZE(sizeof(struct nft_immediate_expr)),
.eval = nft_immediate_eval,
.init = nft_immediate_init,
- .destroy = nft_immediate_destroy,
+ .activate = nft_immediate_activate,
+ .deactivate = nft_immediate_deactivate,
.dump = nft_immediate_dump,
.validate = nft_immediate_validate,
};
diff --git a/net/netfilter/nft_limit.c b/net/netfilter/nft_limit.c
index a9fc298ef4c3..72f13a1144dd 100644
--- a/net/netfilter/nft_limit.c
+++ b/net/netfilter/nft_limit.c
@@ -51,10 +51,13 @@ static inline bool nft_limit_eval(struct nft_limit *limit, u64 cost)
return !limit->invert;
}
+/* Use same default as in iptables. */
+#define NFT_LIMIT_PKT_BURST_DEFAULT 5
+
static int nft_limit_init(struct nft_limit *limit,
- const struct nlattr * const tb[])
+ const struct nlattr * const tb[], bool pkts)
{
- u64 unit;
+ u64 unit, tokens;
if (tb[NFTA_LIMIT_RATE] == NULL ||
tb[NFTA_LIMIT_UNIT] == NULL)
@@ -68,18 +71,25 @@ static int nft_limit_init(struct nft_limit *limit,
if (tb[NFTA_LIMIT_BURST])
limit->burst = ntohl(nla_get_be32(tb[NFTA_LIMIT_BURST]));
- else
- limit->burst = 0;
+
+ if (pkts && limit->burst == 0)
+ limit->burst = NFT_LIMIT_PKT_BURST_DEFAULT;
if (limit->rate + limit->burst < limit->rate)
return -EOVERFLOW;
- /* The token bucket size limits the number of tokens can be
- * accumulated. tokens_max specifies the bucket size.
- * tokens_max = unit * (rate + burst) / rate.
- */
- limit->tokens = div_u64(limit->nsecs * (limit->rate + limit->burst),
- limit->rate);
+ if (pkts) {
+ tokens = div_u64(limit->nsecs, limit->rate) * limit->burst;
+ } else {
+ /* The token bucket size limits the number of tokens can be
+ * accumulated. tokens_max specifies the bucket size.
+ * tokens_max = unit * (rate + burst) / rate.
+ */
+ tokens = div_u64(limit->nsecs * (limit->rate + limit->burst),
+ limit->rate);
+ }
+
+ limit->tokens = tokens;
limit->tokens_max = limit->tokens;
if (tb[NFTA_LIMIT_FLAGS]) {
@@ -144,7 +154,7 @@ static int nft_limit_pkts_init(const struct nft_ctx *ctx,
struct nft_limit_pkts *priv = nft_expr_priv(expr);
int err;
- err = nft_limit_init(&priv->limit, tb);
+ err = nft_limit_init(&priv->limit, tb, true);
if (err < 0)
return err;
@@ -185,7 +195,7 @@ static int nft_limit_bytes_init(const struct nft_ctx *ctx,
{
struct nft_limit *priv = nft_expr_priv(expr);
- return nft_limit_init(priv, tb);
+ return nft_limit_init(priv, tb, false);
}
static int nft_limit_bytes_dump(struct sk_buff *skb,
@@ -246,7 +256,7 @@ static int nft_limit_obj_pkts_init(const struct nft_ctx *ctx,
struct nft_limit_pkts *priv = nft_obj_data(obj);
int err;
- err = nft_limit_init(&priv->limit, tb);
+ err = nft_limit_init(&priv->limit, tb, true);
if (err < 0)
return err;
@@ -289,7 +299,7 @@ static int nft_limit_obj_bytes_init(const struct nft_ctx *ctx,
{
struct nft_limit *priv = nft_obj_data(obj);
- return nft_limit_init(priv, tb);
+ return nft_limit_init(priv, tb, false);
}
static int nft_limit_obj_bytes_dump(struct sk_buff *skb,
diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c
index 8fb91940e2e7..204af9899482 100644
--- a/net/netfilter/nft_meta.c
+++ b/net/netfilter/nft_meta.c
@@ -234,7 +234,7 @@ void nft_meta_set_eval(const struct nft_expr *expr,
struct sk_buff *skb = pkt->skb;
u32 *sreg = &regs->data[meta->sreg];
u32 value = *sreg;
- u8 pkt_type;
+ u8 value8;
switch (meta->key) {
case NFT_META_MARK:
@@ -244,15 +244,17 @@ void nft_meta_set_eval(const struct nft_expr *expr,
skb->priority = value;
break;
case NFT_META_PKTTYPE:
- pkt_type = nft_reg_load8(sreg);
+ value8 = nft_reg_load8(sreg);
- if (skb->pkt_type != pkt_type &&
- skb_pkt_type_ok(pkt_type) &&
+ if (skb->pkt_type != value8 &&
+ skb_pkt_type_ok(value8) &&
skb_pkt_type_ok(skb->pkt_type))
- skb->pkt_type = pkt_type;
+ skb->pkt_type = value8;
break;
case NFT_META_NFTRACE:
- skb->nf_trace = !!value;
+ value8 = nft_reg_load8(sreg);
+
+ skb->nf_trace = !!value8;
break;
default:
WARN_ON(1);
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 71325fef647d..55cb4d197184 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -183,6 +183,9 @@ struct xt_match *xt_find_match(u8 af, const char *name, u8 revision)
struct xt_match *m;
int err = -ENOENT;
+ if (strnlen(name, XT_EXTENSION_MAXNAMELEN) == XT_EXTENSION_MAXNAMELEN)
+ return ERR_PTR(-EINVAL);
+
mutex_lock(&xt[af].mutex);
list_for_each_entry(m, &xt[af].match, list) {
if (strcmp(m->name, name) == 0) {
@@ -229,6 +232,9 @@ struct xt_target *xt_find_target(u8 af, const char *name, u8 revision)
struct xt_target *t;
int err = -ENOENT;
+ if (strnlen(name, XT_EXTENSION_MAXNAMELEN) == XT_EXTENSION_MAXNAMELEN)
+ return ERR_PTR(-EINVAL);
+
mutex_lock(&xt[af].mutex);
list_for_each_entry(t, &xt[af].target, list) {
if (strcmp(t->name, name) == 0) {
@@ -1489,15 +1495,10 @@ void *xt_unregister_table(struct xt_table *table)
EXPORT_SYMBOL_GPL(xt_unregister_table);
#ifdef CONFIG_PROC_FS
-struct xt_names_priv {
- struct seq_net_private p;
- u_int8_t af;
-};
static void *xt_table_seq_start(struct seq_file *seq, loff_t *pos)
{
- struct xt_names_priv *priv = seq->private;
struct net *net = seq_file_net(seq);
- u_int8_t af = priv->af;
+ u_int8_t af = (unsigned long)PDE_DATA(file_inode(seq->file));
mutex_lock(&xt[af].mutex);
return seq_list_start(&net->xt.tables[af], *pos);
@@ -1505,17 +1506,15 @@ static void *xt_table_seq_start(struct seq_file *seq, loff_t *pos)
static void *xt_table_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
- struct xt_names_priv *priv = seq->private;
struct net *net = seq_file_net(seq);
- u_int8_t af = priv->af;
+ u_int8_t af = (unsigned long)PDE_DATA(file_inode(seq->file));
return seq_list_next(v, &net->xt.tables[af], pos);
}
static void xt_table_seq_stop(struct seq_file *seq, void *v)
{
- struct xt_names_priv *priv = seq->private;
- u_int8_t af = priv->af;
+ u_int8_t af = (unsigned long)PDE_DATA(file_inode(seq->file));
mutex_unlock(&xt[af].mutex);
}
@@ -1536,34 +1535,13 @@ static const struct seq_operations xt_table_seq_ops = {
.show = xt_table_seq_show,
};
-static int xt_table_open(struct inode *inode, struct file *file)
-{
- int ret;
- struct xt_names_priv *priv;
-
- ret = seq_open_net(inode, file, &xt_table_seq_ops,
- sizeof(struct xt_names_priv));
- if (!ret) {
- priv = ((struct seq_file *)file->private_data)->private;
- priv->af = (unsigned long)PDE_DATA(inode);
- }
- return ret;
-}
-
-static const struct file_operations xt_table_ops = {
- .open = xt_table_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release_net,
-};
-
/*
* Traverse state for ip{,6}_{tables,matches} for helping crossing
* the multi-AF mutexes.
*/
struct nf_mttg_trav {
struct list_head *head, *curr;
- uint8_t class, nfproto;
+ uint8_t class;
};
enum {
@@ -1580,6 +1558,7 @@ static void *xt_mttg_seq_next(struct seq_file *seq, void *v, loff_t *ppos,
[MTTG_TRAV_NFP_UNSPEC] = MTTG_TRAV_NFP_SPEC,
[MTTG_TRAV_NFP_SPEC] = MTTG_TRAV_DONE,
};
+ uint8_t nfproto = (unsigned long)PDE_DATA(file_inode(seq->file));
struct nf_mttg_trav *trav = seq->private;
switch (trav->class) {
@@ -1594,9 +1573,9 @@ static void *xt_mttg_seq_next(struct seq_file *seq, void *v, loff_t *ppos,
if (trav->curr != trav->head)
break;
mutex_unlock(&xt[NFPROTO_UNSPEC].mutex);
- mutex_lock(&xt[trav->nfproto].mutex);
+ mutex_lock(&xt[nfproto].mutex);
trav->head = trav->curr = is_target ?
- &xt[trav->nfproto].target : &xt[trav->nfproto].match;
+ &xt[nfproto].target : &xt[nfproto].match;
trav->class = next_class[trav->class];
break;
case MTTG_TRAV_NFP_SPEC:
@@ -1628,6 +1607,7 @@ static void *xt_mttg_seq_start(struct seq_file *seq, loff_t *pos,
static void xt_mttg_seq_stop(struct seq_file *seq, void *v)
{
+ uint8_t nfproto = (unsigned long)PDE_DATA(file_inode(seq->file));
struct nf_mttg_trav *trav = seq->private;
switch (trav->class) {
@@ -1635,7 +1615,7 @@ static void xt_mttg_seq_stop(struct seq_file *seq, void *v)
mutex_unlock(&xt[NFPROTO_UNSPEC].mutex);
break;
case MTTG_TRAV_NFP_SPEC:
- mutex_unlock(&xt[trav->nfproto].mutex);
+ mutex_unlock(&xt[nfproto].mutex);
break;
}
}
@@ -1674,24 +1654,6 @@ static const struct seq_operations xt_match_seq_ops = {
.show = xt_match_seq_show,
};
-static int xt_match_open(struct inode *inode, struct file *file)
-{
- struct nf_mttg_trav *trav;
- trav = __seq_open_private(file, &xt_match_seq_ops, sizeof(*trav));
- if (!trav)
- return -ENOMEM;
-
- trav->nfproto = (unsigned long)PDE_DATA(inode);
- return 0;
-}
-
-static const struct file_operations xt_match_ops = {
- .open = xt_match_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release_private,
-};
-
static void *xt_target_seq_start(struct seq_file *seq, loff_t *pos)
{
return xt_mttg_seq_start(seq, pos, true);
@@ -1726,24 +1688,6 @@ static const struct seq_operations xt_target_seq_ops = {
.show = xt_target_seq_show,
};
-static int xt_target_open(struct inode *inode, struct file *file)
-{
- struct nf_mttg_trav *trav;
- trav = __seq_open_private(file, &xt_target_seq_ops, sizeof(*trav));
- if (!trav)
- return -ENOMEM;
-
- trav->nfproto = (unsigned long)PDE_DATA(inode);
- return 0;
-}
-
-static const struct file_operations xt_target_ops = {
- .open = xt_target_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release_private,
-};
-
#define FORMAT_TABLES "_tables_names"
#define FORMAT_MATCHES "_tables_matches"
#define FORMAT_TARGETS "_tables_targets"
@@ -1807,8 +1751,9 @@ int xt_proto_init(struct net *net, u_int8_t af)
strlcpy(buf, xt_prefix[af], sizeof(buf));
strlcat(buf, FORMAT_TABLES, sizeof(buf));
- proc = proc_create_data(buf, 0440, net->proc_net, &xt_table_ops,
- (void *)(unsigned long)af);
+ proc = proc_create_net_data(buf, 0440, net->proc_net, &xt_table_seq_ops,
+ sizeof(struct seq_net_private),
+ (void *)(unsigned long)af);
if (!proc)
goto out;
if (uid_valid(root_uid) && gid_valid(root_gid))
@@ -1816,8 +1761,9 @@ int xt_proto_init(struct net *net, u_int8_t af)
strlcpy(buf, xt_prefix[af], sizeof(buf));
strlcat(buf, FORMAT_MATCHES, sizeof(buf));
- proc = proc_create_data(buf, 0440, net->proc_net, &xt_match_ops,
- (void *)(unsigned long)af);
+ proc = proc_create_seq_private(buf, 0440, net->proc_net,
+ &xt_match_seq_ops, sizeof(struct nf_mttg_trav),
+ (void *)(unsigned long)af);
if (!proc)
goto out_remove_tables;
if (uid_valid(root_uid) && gid_valid(root_gid))
@@ -1825,8 +1771,9 @@ int xt_proto_init(struct net *net, u_int8_t af)
strlcpy(buf, xt_prefix[af], sizeof(buf));
strlcat(buf, FORMAT_TARGETS, sizeof(buf));
- proc = proc_create_data(buf, 0440, net->proc_net, &xt_target_ops,
- (void *)(unsigned long)af);
+ proc = proc_create_seq_private(buf, 0440, net->proc_net,
+ &xt_target_seq_ops, sizeof(struct nf_mttg_trav),
+ (void *)(unsigned long)af);
if (!proc)
goto out_remove_matches;
if (uid_valid(root_uid) && gid_valid(root_gid))
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index 0cd73567e7ff..9b16402f29af 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -57,9 +57,9 @@ static inline struct hashlimit_net *hashlimit_pernet(struct net *net)
}
/* need to declare this at the top */
-static const struct file_operations dl_file_ops_v2;
-static const struct file_operations dl_file_ops_v1;
-static const struct file_operations dl_file_ops;
+static const struct seq_operations dl_seq_ops_v2;
+static const struct seq_operations dl_seq_ops_v1;
+static const struct seq_operations dl_seq_ops;
/* hash table crap */
struct dsthash_dst {
@@ -272,7 +272,7 @@ static int htable_create(struct net *net, struct hashlimit_cfg3 *cfg,
{
struct hashlimit_net *hashlimit_net = hashlimit_pernet(net);
struct xt_hashlimit_htable *hinfo;
- const struct file_operations *fops;
+ const struct seq_operations *ops;
unsigned int size, i;
int ret;
@@ -321,19 +321,19 @@ static int htable_create(struct net *net, struct hashlimit_cfg3 *cfg,
switch (revision) {
case 1:
- fops = &dl_file_ops_v1;
+ ops = &dl_seq_ops_v1;
break;
case 2:
- fops = &dl_file_ops_v2;
+ ops = &dl_seq_ops_v2;
break;
default:
- fops = &dl_file_ops;
+ ops = &dl_seq_ops;
}
- hinfo->pde = proc_create_data(name, 0,
+ hinfo->pde = proc_create_seq_data(name, 0,
(family == NFPROTO_IPV4) ?
hashlimit_net->ipt_hashlimit : hashlimit_net->ip6t_hashlimit,
- fops, hinfo);
+ ops, hinfo);
if (hinfo->pde == NULL) {
kfree(hinfo->name);
vfree(hinfo);
@@ -1057,7 +1057,7 @@ static struct xt_match hashlimit_mt_reg[] __read_mostly = {
static void *dl_seq_start(struct seq_file *s, loff_t *pos)
__acquires(htable->lock)
{
- struct xt_hashlimit_htable *htable = s->private;
+ struct xt_hashlimit_htable *htable = PDE_DATA(file_inode(s->private));
unsigned int *bucket;
spin_lock_bh(&htable->lock);
@@ -1074,7 +1074,7 @@ static void *dl_seq_start(struct seq_file *s, loff_t *pos)
static void *dl_seq_next(struct seq_file *s, void *v, loff_t *pos)
{
- struct xt_hashlimit_htable *htable = s->private;
+ struct xt_hashlimit_htable *htable = PDE_DATA(file_inode(s->private));
unsigned int *bucket = v;
*pos = ++(*bucket);
@@ -1088,7 +1088,7 @@ static void *dl_seq_next(struct seq_file *s, void *v, loff_t *pos)
static void dl_seq_stop(struct seq_file *s, void *v)
__releases(htable->lock)
{
- struct xt_hashlimit_htable *htable = s->private;
+ struct xt_hashlimit_htable *htable = PDE_DATA(file_inode(s->private));
unsigned int *bucket = v;
if (!IS_ERR(bucket))
@@ -1130,7 +1130,7 @@ static void dl_seq_print(struct dsthash_ent *ent, u_int8_t family,
static int dl_seq_real_show_v2(struct dsthash_ent *ent, u_int8_t family,
struct seq_file *s)
{
- const struct xt_hashlimit_htable *ht = s->private;
+ struct xt_hashlimit_htable *ht = PDE_DATA(file_inode(s->private));
spin_lock(&ent->lock);
/* recalculate to show accurate numbers */
@@ -1145,7 +1145,7 @@ static int dl_seq_real_show_v2(struct dsthash_ent *ent, u_int8_t family,
static int dl_seq_real_show_v1(struct dsthash_ent *ent, u_int8_t family,
struct seq_file *s)
{
- const struct xt_hashlimit_htable *ht = s->private;
+ struct xt_hashlimit_htable *ht = PDE_DATA(file_inode(s->private));
spin_lock(&ent->lock);
/* recalculate to show accurate numbers */
@@ -1160,7 +1160,7 @@ static int dl_seq_real_show_v1(struct dsthash_ent *ent, u_int8_t family,
static int dl_seq_real_show(struct dsthash_ent *ent, u_int8_t family,
struct seq_file *s)
{
- const struct xt_hashlimit_htable *ht = s->private;
+ struct xt_hashlimit_htable *ht = PDE_DATA(file_inode(s->private));
spin_lock(&ent->lock);
/* recalculate to show accurate numbers */
@@ -1174,7 +1174,7 @@ static int dl_seq_real_show(struct dsthash_ent *ent, u_int8_t family,
static int dl_seq_show_v2(struct seq_file *s, void *v)
{
- struct xt_hashlimit_htable *htable = s->private;
+ struct xt_hashlimit_htable *htable = PDE_DATA(file_inode(s->private));
unsigned int *bucket = (unsigned int *)v;
struct dsthash_ent *ent;
@@ -1188,7 +1188,7 @@ static int dl_seq_show_v2(struct seq_file *s, void *v)
static int dl_seq_show_v1(struct seq_file *s, void *v)
{
- struct xt_hashlimit_htable *htable = s->private;
+ struct xt_hashlimit_htable *htable = PDE_DATA(file_inode(s->private));
unsigned int *bucket = v;
struct dsthash_ent *ent;
@@ -1202,7 +1202,7 @@ static int dl_seq_show_v1(struct seq_file *s, void *v)
static int dl_seq_show(struct seq_file *s, void *v)
{
- struct xt_hashlimit_htable *htable = s->private;
+ struct xt_hashlimit_htable *htable = PDE_DATA(file_inode(s->private));
unsigned int *bucket = v;
struct dsthash_ent *ent;
@@ -1235,62 +1235,6 @@ static const struct seq_operations dl_seq_ops = {
.show = dl_seq_show
};
-static int dl_proc_open_v2(struct inode *inode, struct file *file)
-{
- int ret = seq_open(file, &dl_seq_ops_v2);
-
- if (!ret) {
- struct seq_file *sf = file->private_data;
-
- sf->private = PDE_DATA(inode);
- }
- return ret;
-}
-
-static int dl_proc_open_v1(struct inode *inode, struct file *file)
-{
- int ret = seq_open(file, &dl_seq_ops_v1);
-
- if (!ret) {
- struct seq_file *sf = file->private_data;
- sf->private = PDE_DATA(inode);
- }
- return ret;
-}
-
-static int dl_proc_open(struct inode *inode, struct file *file)
-{
- int ret = seq_open(file, &dl_seq_ops);
-
- if (!ret) {
- struct seq_file *sf = file->private_data;
-
- sf->private = PDE_DATA(inode);
- }
- return ret;
-}
-
-static const struct file_operations dl_file_ops_v2 = {
- .open = dl_proc_open_v2,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release
-};
-
-static const struct file_operations dl_file_ops_v1 = {
- .open = dl_proc_open_v1,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release
-};
-
-static const struct file_operations dl_file_ops = {
- .open = dl_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release
-};
-
static int __net_init hashlimit_proc_net_init(struct net *net)
{
struct hashlimit_net *hashlimit_net = hashlimit_pernet(net);
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 55342c4d5cec..1189b84413d5 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -2606,13 +2606,13 @@ static int netlink_seq_show(struct seq_file *seq, void *v)
{
if (v == SEQ_START_TOKEN) {
seq_puts(seq,
- "sk Eth Pid Groups "
- "Rmem Wmem Dump Locks Drops Inode\n");
+ "sk Eth Pid Groups "
+ "Rmem Wmem Dump Locks Drops Inode\n");
} else {
struct sock *s = v;
struct netlink_sock *nlk = nlk_sk(s);
- seq_printf(seq, "%pK %-3d %-6u %08x %-8d %-8d %d %-8d %-8d %-8lu\n",
+ seq_printf(seq, "%pK %-3d %-10u %08x %-8d %-8d %-5d %-8d %-8d %-8lu\n",
s,
s->sk_protocol,
nlk->portid,
@@ -2635,21 +2635,6 @@ static const struct seq_operations netlink_seq_ops = {
.stop = netlink_seq_stop,
.show = netlink_seq_show,
};
-
-
-static int netlink_seq_open(struct inode *inode, struct file *file)
-{
- return seq_open_net(inode, file, &netlink_seq_ops,
- sizeof(struct nl_seq_iter));
-}
-
-static const struct file_operations netlink_seq_fops = {
- .open = netlink_seq_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release_net,
-};
-
#endif
int netlink_register_notifier(struct notifier_block *nb)
@@ -2673,7 +2658,7 @@ static const struct proto_ops netlink_ops = {
.socketpair = sock_no_socketpair,
.accept = sock_no_accept,
.getname = netlink_getname,
- .poll = datagram_poll,
+ .poll_mask = datagram_poll_mask,
.ioctl = netlink_ioctl,
.listen = sock_no_listen,
.shutdown = sock_no_shutdown,
@@ -2694,7 +2679,8 @@ static const struct net_proto_family netlink_family_ops = {
static int __net_init netlink_net_init(struct net *net)
{
#ifdef CONFIG_PROC_FS
- if (!proc_create("netlink", 0, net->proc_net, &netlink_seq_fops))
+ if (!proc_create_net("netlink", 0, net->proc_net, &netlink_seq_ops,
+ sizeof(struct nl_seq_iter)))
return -ENOMEM;
#endif
return 0;
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index 4221d98a314b..b97eb766a1d5 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -1338,18 +1338,6 @@ static const struct seq_operations nr_info_seqops = {
.stop = nr_info_stop,
.show = nr_info_show,
};
-
-static int nr_info_open(struct inode *inode, struct file *file)
-{
- return seq_open(file, &nr_info_seqops);
-}
-
-static const struct file_operations nr_info_fops = {
- .open = nr_info_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
#endif /* CONFIG_PROC_FS */
static const struct net_proto_family nr_family_ops = {
@@ -1367,7 +1355,7 @@ static const struct proto_ops nr_proto_ops = {
.socketpair = sock_no_socketpair,
.accept = nr_accept,
.getname = nr_getname,
- .poll = datagram_poll,
+ .poll_mask = datagram_poll_mask,
.ioctl = nr_ioctl,
.listen = nr_listen,
.shutdown = sock_no_shutdown,
@@ -1450,9 +1438,9 @@ static int __init nr_proto_init(void)
nr_loopback_init();
- proc_create("nr", 0444, init_net.proc_net, &nr_info_fops);
- proc_create("nr_neigh", 0444, init_net.proc_net, &nr_neigh_fops);
- proc_create("nr_nodes", 0444, init_net.proc_net, &nr_nodes_fops);
+ proc_create_seq("nr", 0444, init_net.proc_net, &nr_info_seqops);
+ proc_create_seq("nr_neigh", 0444, init_net.proc_net, &nr_neigh_seqops);
+ proc_create_seq("nr_nodes", 0444, init_net.proc_net, &nr_node_seqops);
out:
return rc;
fail:
diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c
index b5a7dcb30991..6485f593e2f0 100644
--- a/net/netrom/nr_route.c
+++ b/net/netrom/nr_route.c
@@ -888,25 +888,13 @@ static int nr_node_show(struct seq_file *seq, void *v)
return 0;
}
-static const struct seq_operations nr_node_seqops = {
+const struct seq_operations nr_node_seqops = {
.start = nr_node_start,
.next = nr_node_next,
.stop = nr_node_stop,
.show = nr_node_show,
};
-static int nr_node_info_open(struct inode *inode, struct file *file)
-{
- return seq_open(file, &nr_node_seqops);
-}
-
-const struct file_operations nr_nodes_fops = {
- .open = nr_node_info_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
static void *nr_neigh_start(struct seq_file *seq, loff_t *pos)
{
spin_lock_bh(&nr_neigh_list_lock);
@@ -954,25 +942,12 @@ static int nr_neigh_show(struct seq_file *seq, void *v)
return 0;
}
-static const struct seq_operations nr_neigh_seqops = {
+const struct seq_operations nr_neigh_seqops = {
.start = nr_neigh_start,
.next = nr_neigh_next,
.stop = nr_neigh_stop,
.show = nr_neigh_show,
};
-
-static int nr_neigh_info_open(struct inode *inode, struct file *file)
-{
- return seq_open(file, &nr_neigh_seqops);
-}
-
-const struct file_operations nr_neigh_fops = {
- .open = nr_neigh_info_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
#endif
/*
diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c
index ea0c0c6f1874..ab5bb14b49af 100644
--- a/net/nfc/llcp_sock.c
+++ b/net/nfc/llcp_sock.c
@@ -548,16 +548,13 @@ static inline __poll_t llcp_accept_poll(struct sock *parent)
return 0;
}
-static __poll_t llcp_sock_poll(struct file *file, struct socket *sock,
- poll_table *wait)
+static __poll_t llcp_sock_poll_mask(struct socket *sock, __poll_t events)
{
struct sock *sk = sock->sk;
__poll_t mask = 0;
pr_debug("%p\n", sk);
- sock_poll_wait(file, sk_sleep(sk), wait);
-
if (sk->sk_state == LLCP_LISTEN)
return llcp_accept_poll(sk);
@@ -899,7 +896,7 @@ static const struct proto_ops llcp_sock_ops = {
.socketpair = sock_no_socketpair,
.accept = llcp_sock_accept,
.getname = llcp_sock_getname,
- .poll = llcp_sock_poll,
+ .poll_mask = llcp_sock_poll_mask,
.ioctl = sock_no_ioctl,
.listen = llcp_sock_listen,
.shutdown = sock_no_shutdown,
@@ -919,7 +916,7 @@ static const struct proto_ops llcp_rawsock_ops = {
.socketpair = sock_no_socketpair,
.accept = sock_no_accept,
.getname = llcp_sock_getname,
- .poll = llcp_sock_poll,
+ .poll_mask = llcp_sock_poll_mask,
.ioctl = sock_no_ioctl,
.listen = sock_no_listen,
.shutdown = sock_no_shutdown,
diff --git a/net/nfc/rawsock.c b/net/nfc/rawsock.c
index e2188deb08dc..60c322531c49 100644
--- a/net/nfc/rawsock.c
+++ b/net/nfc/rawsock.c
@@ -284,7 +284,7 @@ static const struct proto_ops rawsock_ops = {
.socketpair = sock_no_socketpair,
.accept = sock_no_accept,
.getname = sock_no_getname,
- .poll = datagram_poll,
+ .poll_mask = datagram_poll_mask,
.ioctl = sock_no_ioctl,
.listen = sock_no_listen,
.shutdown = sock_no_shutdown,
@@ -304,7 +304,7 @@ static const struct proto_ops rawsock_raw_ops = {
.socketpair = sock_no_socketpair,
.accept = sock_no_accept,
.getname = sock_no_getname,
- .poll = datagram_poll,
+ .poll_mask = datagram_poll_mask,
.ioctl = sock_no_ioctl,
.listen = sock_no_listen,
.shutdown = sock_no_shutdown,
diff --git a/net/nsh/nsh.c b/net/nsh/nsh.c
index d7da99a0b0b8..9696ef96b719 100644
--- a/net/nsh/nsh.c
+++ b/net/nsh/nsh.c
@@ -57,6 +57,8 @@ int nsh_pop(struct sk_buff *skb)
return -ENOMEM;
nh = (struct nshhdr *)(skb->data);
length = nsh_hdr_len(nh);
+ if (length < NSH_BASE_HDR_LEN)
+ return -EINVAL;
inner_proto = tun_p_to_eth_p(nh->np);
if (!pskb_may_pull(skb, length))
return -ENOMEM;
@@ -90,6 +92,8 @@ static struct sk_buff *nsh_gso_segment(struct sk_buff *skb,
if (unlikely(!pskb_may_pull(skb, NSH_BASE_HDR_LEN)))
goto out;
nsh_len = nsh_hdr_len(nsh_hdr(skb));
+ if (nsh_len < NSH_BASE_HDR_LEN)
+ goto out;
if (unlikely(!pskb_may_pull(skb, nsh_len)))
goto out;
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index 7322aa1e382e..492ab0c36f7c 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -1712,13 +1712,10 @@ static void nlattr_set(struct nlattr *attr, u8 val,
/* The nlattr stream should already have been validated */
nla_for_each_nested(nla, attr, rem) {
- if (tbl[nla_type(nla)].len == OVS_ATTR_NESTED) {
- if (tbl[nla_type(nla)].next)
- tbl = tbl[nla_type(nla)].next;
- nlattr_set(nla, val, tbl);
- } else {
+ if (tbl[nla_type(nla)].len == OVS_ATTR_NESTED)
+ nlattr_set(nla, val, tbl[nla_type(nla)].next ? : tbl);
+ else
memset(nla_data(nla), val, nla_len(nla));
- }
if (nla_type(nla) == OVS_KEY_ATTR_CT_STATE)
*(u32 *)nla_data(nla) &= CT_SUPPORTED_MASK;
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 01f3515cada0..674390b1f084 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -2903,13 +2903,15 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
if (skb == NULL)
goto out_unlock;
- skb_set_network_header(skb, reserve);
+ skb_reset_network_header(skb);
err = -EINVAL;
if (sock->type == SOCK_DGRAM) {
offset = dev_hard_header(skb, dev, ntohs(proto), addr, NULL, len);
if (unlikely(offset < 0))
goto out_free;
+ } else if (reserve) {
+ skb_reserve(skb, -reserve);
}
/* Returns -EFAULT on error */
@@ -4108,12 +4110,11 @@ static int packet_ioctl(struct socket *sock, unsigned int cmd,
return 0;
}
-static __poll_t packet_poll(struct file *file, struct socket *sock,
- poll_table *wait)
+static __poll_t packet_poll_mask(struct socket *sock, __poll_t events)
{
struct sock *sk = sock->sk;
struct packet_sock *po = pkt_sk(sk);
- __poll_t mask = datagram_poll(file, sock, wait);
+ __poll_t mask = datagram_poll_mask(sock, events);
spin_lock_bh(&sk->sk_receive_queue.lock);
if (po->rx_ring.pg_vec) {
@@ -4455,7 +4456,7 @@ static const struct proto_ops packet_ops_spkt = {
.socketpair = sock_no_socketpair,
.accept = sock_no_accept,
.getname = packet_getname_spkt,
- .poll = datagram_poll,
+ .poll_mask = datagram_poll_mask,
.ioctl = packet_ioctl,
.listen = sock_no_listen,
.shutdown = sock_no_shutdown,
@@ -4476,7 +4477,7 @@ static const struct proto_ops packet_ops = {
.socketpair = sock_no_socketpair,
.accept = sock_no_accept,
.getname = packet_getname,
- .poll = packet_poll,
+ .poll_mask = packet_poll_mask,
.ioctl = packet_ioctl,
.listen = sock_no_listen,
.shutdown = sock_no_shutdown,
@@ -4554,20 +4555,6 @@ static const struct seq_operations packet_seq_ops = {
.stop = packet_seq_stop,
.show = packet_seq_show,
};
-
-static int packet_seq_open(struct inode *inode, struct file *file)
-{
- return seq_open_net(inode, file, &packet_seq_ops,
- sizeof(struct seq_net_private));
-}
-
-static const struct file_operations packet_seq_fops = {
- .open = packet_seq_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release_net,
-};
-
#endif
static int __net_init packet_net_init(struct net *net)
@@ -4575,7 +4562,8 @@ static int __net_init packet_net_init(struct net *net)
mutex_init(&net->packet.sklist_lock);
INIT_HLIST_HEAD(&net->packet.sklist);
- if (!proc_create("packet", 0, net->proc_net, &packet_seq_fops))
+ if (!proc_create_net("packet", 0, net->proc_net, &packet_seq_ops,
+ sizeof(struct seq_net_private)))
return -ENOMEM;
return 0;
diff --git a/net/phonet/pn_dev.c b/net/phonet/pn_dev.c
index 77787512fc32..6cb4f602ab71 100644
--- a/net/phonet/pn_dev.c
+++ b/net/phonet/pn_dev.c
@@ -320,7 +320,8 @@ static int __net_init phonet_init_net(struct net *net)
{
struct phonet_net *pnn = phonet_pernet(net);
- if (!proc_create("phonet", 0, net->proc_net, &pn_sock_seq_fops))
+ if (!proc_create_net("phonet", 0, net->proc_net, &pn_sock_seq_ops,
+ sizeof(struct seq_net_private)))
return -ENOMEM;
INIT_LIST_HEAD(&pnn->pndevs.list);
@@ -351,7 +352,8 @@ int __init phonet_device_init(void)
if (err)
return err;
- proc_create("pnresource", 0, init_net.proc_net, &pn_res_seq_fops);
+ proc_create_net("pnresource", 0, init_net.proc_net, &pn_res_seq_ops,
+ sizeof(struct seq_net_private));
register_netdevice_notifier(&phonet_device_notifier);
err = phonet_netlink_register();
if (err)
diff --git a/net/phonet/socket.c b/net/phonet/socket.c
index f9b40e6a18a5..c295c4e20f01 100644
--- a/net/phonet/socket.c
+++ b/net/phonet/socket.c
@@ -340,15 +340,12 @@ static int pn_socket_getname(struct socket *sock, struct sockaddr *addr,
return sizeof(struct sockaddr_pn);
}
-static __poll_t pn_socket_poll(struct file *file, struct socket *sock,
- poll_table *wait)
+static __poll_t pn_socket_poll_mask(struct socket *sock, __poll_t events)
{
struct sock *sk = sock->sk;
struct pep_sock *pn = pep_sk(sk);
__poll_t mask = 0;
- poll_wait(file, sk_sleep(sk), wait);
-
if (sk->sk_state == TCP_CLOSE)
return EPOLLERR;
if (!skb_queue_empty(&sk->sk_receive_queue))
@@ -448,7 +445,7 @@ const struct proto_ops phonet_dgram_ops = {
.socketpair = sock_no_socketpair,
.accept = sock_no_accept,
.getname = pn_socket_getname,
- .poll = datagram_poll,
+ .poll_mask = datagram_poll_mask,
.ioctl = pn_socket_ioctl,
.listen = sock_no_listen,
.shutdown = sock_no_shutdown,
@@ -473,7 +470,7 @@ const struct proto_ops phonet_stream_ops = {
.socketpair = sock_no_socketpair,
.accept = pn_socket_accept,
.getname = pn_socket_getname,
- .poll = pn_socket_poll,
+ .poll_mask = pn_socket_poll_mask,
.ioctl = pn_socket_ioctl,
.listen = pn_socket_listen,
.shutdown = sock_no_shutdown,
@@ -620,25 +617,12 @@ static int pn_sock_seq_show(struct seq_file *seq, void *v)
return 0;
}
-static const struct seq_operations pn_sock_seq_ops = {
+const struct seq_operations pn_sock_seq_ops = {
.start = pn_sock_seq_start,
.next = pn_sock_seq_next,
.stop = pn_sock_seq_stop,
.show = pn_sock_seq_show,
};
-
-static int pn_sock_open(struct inode *inode, struct file *file)
-{
- return seq_open_net(inode, file, &pn_sock_seq_ops,
- sizeof(struct seq_net_private));
-}
-
-const struct file_operations pn_sock_seq_fops = {
- .open = pn_sock_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release_net,
-};
#endif
static struct {
@@ -802,23 +786,10 @@ static int pn_res_seq_show(struct seq_file *seq, void *v)
return 0;
}
-static const struct seq_operations pn_res_seq_ops = {
+const struct seq_operations pn_res_seq_ops = {
.start = pn_res_seq_start,
.next = pn_res_seq_next,
.stop = pn_res_seq_stop,
.show = pn_res_seq_show,
};
-
-static int pn_res_open(struct inode *inode, struct file *file)
-{
- return seq_open_net(inode, file, &pn_res_seq_ops,
- sizeof(struct seq_net_private));
-}
-
-const struct file_operations pn_res_seq_fops = {
- .open = pn_res_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release_net,
-};
#endif
diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c
index 2aa07b547b16..1b5025ea5b04 100644
--- a/net/qrtr/qrtr.c
+++ b/net/qrtr/qrtr.c
@@ -1023,7 +1023,7 @@ static const struct proto_ops qrtr_proto_ops = {
.recvmsg = qrtr_recvmsg,
.getname = qrtr_getname,
.ioctl = qrtr_ioctl,
- .poll = datagram_poll,
+ .poll_mask = datagram_poll_mask,
.shutdown = sock_no_shutdown,
.setsockopt = sock_no_setsockopt,
.getsockopt = sock_no_getsockopt,
diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c
index eea1d8611b20..13b38ad0fa4a 100644
--- a/net/rds/ib_cm.c
+++ b/net/rds/ib_cm.c
@@ -547,7 +547,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
rdsdebug("conn %p pd %p cq %p %p\n", conn, ic->i_pd,
ic->i_send_cq, ic->i_recv_cq);
- return ret;
+ goto out;
sends_out:
vfree(ic->i_sends);
@@ -572,6 +572,7 @@ send_cq_out:
ic->i_send_cq = NULL;
rds_ibdev_out:
rds_ib_remove_conn(rds_ibdev, conn);
+out:
rds_ib_dev_put(rds_ibdev);
return ret;
diff --git a/net/rds/recv.c b/net/rds/recv.c
index de50e2126e40..dc67458b52f0 100644
--- a/net/rds/recv.c
+++ b/net/rds/recv.c
@@ -558,6 +558,7 @@ static int rds_cmsg_recv(struct rds_incoming *inc, struct msghdr *msg,
struct rds_cmsg_rx_trace t;
int i, j;
+ memset(&t, 0, sizeof(t));
inc->i_rx_lat_trace[RDS_MSG_RX_CMSG] = local_clock();
t.rx_traces = rs->rs_rx_traces;
for (i = 0; i < rs->rs_rx_traces; i++) {
diff --git a/net/rfkill/rfkill-gpio.c b/net/rfkill/rfkill-gpio.c
index 41bd496531d4..00192a996be0 100644
--- a/net/rfkill/rfkill-gpio.c
+++ b/net/rfkill/rfkill-gpio.c
@@ -137,13 +137,18 @@ static int rfkill_gpio_probe(struct platform_device *pdev)
ret = rfkill_register(rfkill->rfkill_dev);
if (ret < 0)
- return ret;
+ goto err_destroy;
platform_set_drvdata(pdev, rfkill);
dev_info(&pdev->dev, "%s device registered.\n", rfkill->name);
return 0;
+
+err_destroy:
+ rfkill_destroy(rfkill->rfkill_dev);
+
+ return ret;
}
static int rfkill_gpio_remove(struct platform_device *pdev)
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index 9ff5e0a76593..5b73fea849df 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -1453,18 +1453,6 @@ static const struct seq_operations rose_info_seqops = {
.stop = rose_info_stop,
.show = rose_info_show,
};
-
-static int rose_info_open(struct inode *inode, struct file *file)
-{
- return seq_open(file, &rose_info_seqops);
-}
-
-static const struct file_operations rose_info_fops = {
- .open = rose_info_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
#endif /* CONFIG_PROC_FS */
static const struct net_proto_family rose_family_ops = {
@@ -1482,7 +1470,7 @@ static const struct proto_ops rose_proto_ops = {
.socketpair = sock_no_socketpair,
.accept = rose_accept,
.getname = rose_getname,
- .poll = datagram_poll,
+ .poll_mask = datagram_poll_mask,
.ioctl = rose_ioctl,
.listen = rose_listen,
.shutdown = sock_no_shutdown,
@@ -1567,13 +1555,13 @@ static int __init rose_proto_init(void)
rose_add_loopback_neigh();
- proc_create("rose", 0444, init_net.proc_net, &rose_info_fops);
- proc_create("rose_neigh", 0444, init_net.proc_net,
- &rose_neigh_fops);
- proc_create("rose_nodes", 0444, init_net.proc_net,
- &rose_nodes_fops);
- proc_create("rose_routes", 0444, init_net.proc_net,
- &rose_routes_fops);
+ proc_create_seq("rose", 0444, init_net.proc_net, &rose_info_seqops);
+ proc_create_seq("rose_neigh", 0444, init_net.proc_net,
+ &rose_neigh_seqops);
+ proc_create_seq("rose_nodes", 0444, init_net.proc_net,
+ &rose_node_seqops);
+ proc_create_seq("rose_routes", 0444, init_net.proc_net,
+ &rose_route_seqops);
out:
return rc;
fail:
diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c
index 178619ddab68..77e9f85a2c92 100644
--- a/net/rose/rose_route.c
+++ b/net/rose/rose_route.c
@@ -1143,25 +1143,13 @@ static int rose_node_show(struct seq_file *seq, void *v)
return 0;
}
-static const struct seq_operations rose_node_seqops = {
+const struct seq_operations rose_node_seqops = {
.start = rose_node_start,
.next = rose_node_next,
.stop = rose_node_stop,
.show = rose_node_show,
};
-static int rose_nodes_open(struct inode *inode, struct file *file)
-{
- return seq_open(file, &rose_node_seqops);
-}
-
-const struct file_operations rose_nodes_fops = {
- .open = rose_nodes_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
static void *rose_neigh_start(struct seq_file *seq, loff_t *pos)
__acquires(rose_neigh_list_lock)
{
@@ -1226,26 +1214,13 @@ static int rose_neigh_show(struct seq_file *seq, void *v)
}
-static const struct seq_operations rose_neigh_seqops = {
+const struct seq_operations rose_neigh_seqops = {
.start = rose_neigh_start,
.next = rose_neigh_next,
.stop = rose_neigh_stop,
.show = rose_neigh_show,
};
-static int rose_neigh_open(struct inode *inode, struct file *file)
-{
- return seq_open(file, &rose_neigh_seqops);
-}
-
-const struct file_operations rose_neigh_fops = {
- .open = rose_neigh_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
-
static void *rose_route_start(struct seq_file *seq, loff_t *pos)
__acquires(rose_route_list_lock)
{
@@ -1311,25 +1286,12 @@ static int rose_route_show(struct seq_file *seq, void *v)
return 0;
}
-static const struct seq_operations rose_route_seqops = {
+struct seq_operations rose_route_seqops = {
.start = rose_route_start,
.next = rose_route_next,
.stop = rose_route_stop,
.show = rose_route_show,
};
-
-static int rose_route_open(struct inode *inode, struct file *file)
-{
- return seq_open(file, &rose_route_seqops);
-}
-
-const struct file_operations rose_routes_fops = {
- .open = rose_route_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
#endif /* CONFIG_PROC_FS */
/*
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index 9a2c8e7c000e..3b1ac93efee2 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -313,7 +313,7 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock,
memset(&cp, 0, sizeof(cp));
cp.local = rx->local;
cp.key = key;
- cp.security_level = 0;
+ cp.security_level = rx->min_sec_level;
cp.exclusive = false;
cp.upgrade = upgrade;
cp.service_id = srx->srx_service;
@@ -734,15 +734,11 @@ static int rxrpc_getsockopt(struct socket *sock, int level, int optname,
/*
* permit an RxRPC socket to be polled
*/
-static __poll_t rxrpc_poll(struct file *file, struct socket *sock,
- poll_table *wait)
+static __poll_t rxrpc_poll_mask(struct socket *sock, __poll_t events)
{
struct sock *sk = sock->sk;
struct rxrpc_sock *rx = rxrpc_sk(sk);
- __poll_t mask;
-
- sock_poll_wait(file, sk_sleep(sk), wait);
- mask = 0;
+ __poll_t mask = 0;
/* the socket is readable if there are any messages waiting on the Rx
* queue */
@@ -949,7 +945,7 @@ static const struct proto_ops rxrpc_rpc_ops = {
.socketpair = sock_no_socketpair,
.accept = sock_no_accept,
.getname = sock_no_getname,
- .poll = rxrpc_poll,
+ .poll_mask = rxrpc_poll_mask,
.ioctl = sock_no_ioctl,
.listen = rxrpc_listen,
.shutdown = rxrpc_shutdown,
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index 90d7079e0aa9..29923ec2189c 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -476,6 +476,7 @@ enum rxrpc_call_flag {
RXRPC_CALL_SEND_PING, /* A ping will need to be sent */
RXRPC_CALL_PINGING, /* Ping in process */
RXRPC_CALL_RETRANS_TIMEOUT, /* Retransmission due to timeout occurred */
+ RXRPC_CALL_BEGAN_RX_TIMER, /* We began the expect_rx_by timer */
};
/*
@@ -1050,8 +1051,8 @@ void __rxrpc_queue_peer_error(struct rxrpc_peer *);
/*
* proc.c
*/
-extern const struct file_operations rxrpc_call_seq_fops;
-extern const struct file_operations rxrpc_connection_seq_fops;
+extern const struct seq_operations rxrpc_call_seq_ops;
+extern const struct seq_operations rxrpc_connection_seq_ops;
/*
* recvmsg.c
diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c
index c717152070df..1350f1be8037 100644
--- a/net/rxrpc/conn_event.c
+++ b/net/rxrpc/conn_event.c
@@ -40,7 +40,7 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn,
} __attribute__((packed)) pkt;
struct rxrpc_ackinfo ack_info;
size_t len;
- int ioc;
+ int ret, ioc;
u32 serial, mtu, call_id, padding;
_enter("%d", conn->debug_id);
@@ -135,10 +135,13 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn,
break;
}
- kernel_sendmsg(conn->params.local->socket, &msg, iov, ioc, len);
+ ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, ioc, len);
conn->params.peer->last_tx_at = ktime_get_real();
+ if (ret < 0)
+ trace_rxrpc_tx_fail(conn->debug_id, serial, ret,
+ rxrpc_tx_fail_call_final_resend);
+
_leave("");
- return;
}
/*
@@ -236,6 +239,8 @@ static int rxrpc_abort_connection(struct rxrpc_connection *conn,
ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 2, len);
if (ret < 0) {
+ trace_rxrpc_tx_fail(conn->debug_id, serial, ret,
+ rxrpc_tx_fail_conn_abort);
_debug("sendmsg failed: %d", ret);
return -EAGAIN;
}
diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
index 0410d2277ca2..b5fd6381313d 100644
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -971,7 +971,7 @@ static void rxrpc_input_call_packet(struct rxrpc_call *call,
if (timo) {
unsigned long now = jiffies, expect_rx_by;
- expect_rx_by = jiffies + timo;
+ expect_rx_by = now + timo;
WRITE_ONCE(call->expect_rx_by, expect_rx_by);
rxrpc_reduce_call_timer(call, expect_rx_by, now,
rxrpc_timer_set_for_normal);
diff --git a/net/rxrpc/local_event.c b/net/rxrpc/local_event.c
index 93b5d910b4a1..8325f1b86840 100644
--- a/net/rxrpc/local_event.c
+++ b/net/rxrpc/local_event.c
@@ -71,7 +71,8 @@ static void rxrpc_send_version_request(struct rxrpc_local *local,
ret = kernel_sendmsg(local->socket, &msg, iov, 2, len);
if (ret < 0)
- _debug("sendmsg failed: %d", ret);
+ trace_rxrpc_tx_fail(local->debug_id, 0, ret,
+ rxrpc_tx_fail_version_reply);
_leave("");
}
diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c
index 8b54e9531d52..b493e6b62740 100644
--- a/net/rxrpc/local_object.c
+++ b/net/rxrpc/local_object.c
@@ -134,22 +134,49 @@ static int rxrpc_open_socket(struct rxrpc_local *local, struct net *net)
}
}
- /* we want to receive ICMP errors */
- opt = 1;
- ret = kernel_setsockopt(local->socket, SOL_IP, IP_RECVERR,
- (char *) &opt, sizeof(opt));
- if (ret < 0) {
- _debug("setsockopt failed");
- goto error;
- }
+ switch (local->srx.transport.family) {
+ case AF_INET:
+ /* we want to receive ICMP errors */
+ opt = 1;
+ ret = kernel_setsockopt(local->socket, SOL_IP, IP_RECVERR,
+ (char *) &opt, sizeof(opt));
+ if (ret < 0) {
+ _debug("setsockopt failed");
+ goto error;
+ }
- /* we want to set the don't fragment bit */
- opt = IP_PMTUDISC_DO;
- ret = kernel_setsockopt(local->socket, SOL_IP, IP_MTU_DISCOVER,
- (char *) &opt, sizeof(opt));
- if (ret < 0) {
- _debug("setsockopt failed");
- goto error;
+ /* we want to set the don't fragment bit */
+ opt = IP_PMTUDISC_DO;
+ ret = kernel_setsockopt(local->socket, SOL_IP, IP_MTU_DISCOVER,
+ (char *) &opt, sizeof(opt));
+ if (ret < 0) {
+ _debug("setsockopt failed");
+ goto error;
+ }
+ break;
+
+ case AF_INET6:
+ /* we want to receive ICMP errors */
+ opt = 1;
+ ret = kernel_setsockopt(local->socket, SOL_IPV6, IPV6_RECVERR,
+ (char *) &opt, sizeof(opt));
+ if (ret < 0) {
+ _debug("setsockopt failed");
+ goto error;
+ }
+
+ /* we want to set the don't fragment bit */
+ opt = IPV6_PMTUDISC_DO;
+ ret = kernel_setsockopt(local->socket, SOL_IPV6, IPV6_MTU_DISCOVER,
+ (char *) &opt, sizeof(opt));
+ if (ret < 0) {
+ _debug("setsockopt failed");
+ goto error;
+ }
+ break;
+
+ default:
+ BUG();
}
/* set the socket up */
diff --git a/net/rxrpc/net_ns.c b/net/rxrpc/net_ns.c
index c7a023fb22d0..5d6a773db973 100644
--- a/net/rxrpc/net_ns.c
+++ b/net/rxrpc/net_ns.c
@@ -97,8 +97,11 @@ static __net_init int rxrpc_init_net(struct net *net)
if (!rxnet->proc_net)
goto err_proc;
- proc_create("calls", 0444, rxnet->proc_net, &rxrpc_call_seq_fops);
- proc_create("conns", 0444, rxnet->proc_net, &rxrpc_connection_seq_fops);
+ proc_create_net("calls", 0444, rxnet->proc_net, &rxrpc_call_seq_ops,
+ sizeof(struct seq_net_private));
+ proc_create_net("conns", 0444, rxnet->proc_net,
+ &rxrpc_connection_seq_ops,
+ sizeof(struct seq_net_private));
return 0;
err_proc:
diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c
index 7f1fc04775b3..f03de1c59ba3 100644
--- a/net/rxrpc/output.c
+++ b/net/rxrpc/output.c
@@ -210,6 +210,9 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping,
if (ping)
call->ping_time = now;
conn->params.peer->last_tx_at = ktime_get_real();
+ if (ret < 0)
+ trace_rxrpc_tx_fail(call->debug_id, serial, ret,
+ rxrpc_tx_fail_call_ack);
if (call->state < RXRPC_CALL_COMPLETE) {
if (ret < 0) {
@@ -294,6 +297,10 @@ int rxrpc_send_abort_packet(struct rxrpc_call *call)
ret = kernel_sendmsg(conn->params.local->socket,
&msg, iov, 1, sizeof(pkt));
conn->params.peer->last_tx_at = ktime_get_real();
+ if (ret < 0)
+ trace_rxrpc_tx_fail(call->debug_id, serial, ret,
+ rxrpc_tx_fail_call_abort);
+
rxrpc_put_connection(conn);
return ret;
@@ -387,6 +394,9 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb,
conn->params.peer->last_tx_at = ktime_get_real();
up_read(&conn->params.local->defrag_sem);
+ if (ret < 0)
+ trace_rxrpc_tx_fail(call->debug_id, serial, ret,
+ rxrpc_tx_fail_call_data_nofrag);
if (ret == -EMSGSIZE)
goto send_fragmentable;
@@ -414,6 +424,17 @@ done:
rxrpc_timer_set_for_lost_ack);
}
}
+
+ if (sp->hdr.seq == 1 &&
+ !test_and_set_bit(RXRPC_CALL_BEGAN_RX_TIMER,
+ &call->flags)) {
+ unsigned long nowj = jiffies, expect_rx_by;
+
+ expect_rx_by = nowj + call->next_rx_timo;
+ WRITE_ONCE(call->expect_rx_by, expect_rx_by);
+ rxrpc_reduce_call_timer(call, expect_rx_by, nowj,
+ rxrpc_timer_set_for_normal);
+ }
}
rxrpc_set_keepalive(call);
@@ -465,6 +486,10 @@ send_fragmentable:
#endif
}
+ if (ret < 0)
+ trace_rxrpc_tx_fail(call->debug_id, serial, ret,
+ rxrpc_tx_fail_call_data_frag);
+
up_write(&conn->params.local->defrag_sem);
goto done;
}
@@ -482,6 +507,7 @@ void rxrpc_reject_packets(struct rxrpc_local *local)
struct kvec iov[2];
size_t size;
__be32 code;
+ int ret;
_enter("%d", local->debug_id);
@@ -516,7 +542,10 @@ void rxrpc_reject_packets(struct rxrpc_local *local)
whdr.flags ^= RXRPC_CLIENT_INITIATED;
whdr.flags &= RXRPC_CLIENT_INITIATED;
- kernel_sendmsg(local->socket, &msg, iov, 2, size);
+ ret = kernel_sendmsg(local->socket, &msg, iov, 2, size);
+ if (ret < 0)
+ trace_rxrpc_tx_fail(local->debug_id, 0, ret,
+ rxrpc_tx_fail_reject);
}
rxrpc_free_skb(skb, rxrpc_skb_rx_freed);
@@ -567,7 +596,8 @@ void rxrpc_send_keepalive(struct rxrpc_peer *peer)
ret = kernel_sendmsg(peer->local->socket, &msg, iov, 2, len);
if (ret < 0)
- _debug("sendmsg failed: %d", ret);
+ trace_rxrpc_tx_fail(peer->debug_id, 0, ret,
+ rxrpc_tx_fail_version_keepalive);
peer->last_tx_at = ktime_get_real();
_leave("");
diff --git a/net/rxrpc/peer_event.c b/net/rxrpc/peer_event.c
index 78c2f95d1f22..0ed8b651cec2 100644
--- a/net/rxrpc/peer_event.c
+++ b/net/rxrpc/peer_event.c
@@ -28,39 +28,39 @@ static void rxrpc_store_error(struct rxrpc_peer *, struct sock_exterr_skb *);
* Find the peer associated with an ICMP packet.
*/
static struct rxrpc_peer *rxrpc_lookup_peer_icmp_rcu(struct rxrpc_local *local,
- const struct sk_buff *skb)
+ const struct sk_buff *skb,
+ struct sockaddr_rxrpc *srx)
{
struct sock_exterr_skb *serr = SKB_EXT_ERR(skb);
- struct sockaddr_rxrpc srx;
_enter("");
- memset(&srx, 0, sizeof(srx));
- srx.transport_type = local->srx.transport_type;
- srx.transport_len = local->srx.transport_len;
- srx.transport.family = local->srx.transport.family;
+ memset(srx, 0, sizeof(*srx));
+ srx->transport_type = local->srx.transport_type;
+ srx->transport_len = local->srx.transport_len;
+ srx->transport.family = local->srx.transport.family;
/* Can we see an ICMP4 packet on an ICMP6 listening socket? and vice
* versa?
*/
- switch (srx.transport.family) {
+ switch (srx->transport.family) {
case AF_INET:
- srx.transport.sin.sin_port = serr->port;
+ srx->transport.sin.sin_port = serr->port;
switch (serr->ee.ee_origin) {
case SO_EE_ORIGIN_ICMP:
_net("Rx ICMP");
- memcpy(&srx.transport.sin.sin_addr,
+ memcpy(&srx->transport.sin.sin_addr,
skb_network_header(skb) + serr->addr_offset,
sizeof(struct in_addr));
break;
case SO_EE_ORIGIN_ICMP6:
_net("Rx ICMP6 on v4 sock");
- memcpy(&srx.transport.sin.sin_addr,
+ memcpy(&srx->transport.sin.sin_addr,
skb_network_header(skb) + serr->addr_offset + 12,
sizeof(struct in_addr));
break;
default:
- memcpy(&srx.transport.sin.sin_addr, &ip_hdr(skb)->saddr,
+ memcpy(&srx->transport.sin.sin_addr, &ip_hdr(skb)->saddr,
sizeof(struct in_addr));
break;
}
@@ -68,25 +68,25 @@ static struct rxrpc_peer *rxrpc_lookup_peer_icmp_rcu(struct rxrpc_local *local,
#ifdef CONFIG_AF_RXRPC_IPV6
case AF_INET6:
- srx.transport.sin6.sin6_port = serr->port;
+ srx->transport.sin6.sin6_port = serr->port;
switch (serr->ee.ee_origin) {
case SO_EE_ORIGIN_ICMP6:
_net("Rx ICMP6");
- memcpy(&srx.transport.sin6.sin6_addr,
+ memcpy(&srx->transport.sin6.sin6_addr,
skb_network_header(skb) + serr->addr_offset,
sizeof(struct in6_addr));
break;
case SO_EE_ORIGIN_ICMP:
_net("Rx ICMP on v6 sock");
- srx.transport.sin6.sin6_addr.s6_addr32[0] = 0;
- srx.transport.sin6.sin6_addr.s6_addr32[1] = 0;
- srx.transport.sin6.sin6_addr.s6_addr32[2] = htonl(0xffff);
- memcpy(srx.transport.sin6.sin6_addr.s6_addr + 12,
+ srx->transport.sin6.sin6_addr.s6_addr32[0] = 0;
+ srx->transport.sin6.sin6_addr.s6_addr32[1] = 0;
+ srx->transport.sin6.sin6_addr.s6_addr32[2] = htonl(0xffff);
+ memcpy(srx->transport.sin6.sin6_addr.s6_addr + 12,
skb_network_header(skb) + serr->addr_offset,
sizeof(struct in_addr));
break;
default:
- memcpy(&srx.transport.sin6.sin6_addr,
+ memcpy(&srx->transport.sin6.sin6_addr,
&ipv6_hdr(skb)->saddr,
sizeof(struct in6_addr));
break;
@@ -98,7 +98,7 @@ static struct rxrpc_peer *rxrpc_lookup_peer_icmp_rcu(struct rxrpc_local *local,
BUG();
}
- return rxrpc_lookup_peer_rcu(local, &srx);
+ return rxrpc_lookup_peer_rcu(local, srx);
}
/*
@@ -146,6 +146,7 @@ static void rxrpc_adjust_mtu(struct rxrpc_peer *peer, struct sock_exterr_skb *se
void rxrpc_error_report(struct sock *sk)
{
struct sock_exterr_skb *serr;
+ struct sockaddr_rxrpc srx;
struct rxrpc_local *local = sk->sk_user_data;
struct rxrpc_peer *peer;
struct sk_buff *skb;
@@ -166,7 +167,7 @@ void rxrpc_error_report(struct sock *sk)
}
rcu_read_lock();
- peer = rxrpc_lookup_peer_icmp_rcu(local, skb);
+ peer = rxrpc_lookup_peer_icmp_rcu(local, skb, &srx);
if (peer && !rxrpc_get_peer_maybe(peer))
peer = NULL;
if (!peer) {
@@ -176,6 +177,8 @@ void rxrpc_error_report(struct sock *sk)
return;
}
+ trace_rxrpc_rx_icmp(peer, &serr->ee, &srx);
+
if ((serr->ee.ee_origin == SO_EE_ORIGIN_ICMP &&
serr->ee.ee_type == ICMP_DEST_UNREACH &&
serr->ee.ee_code == ICMP_FRAG_NEEDED)) {
@@ -209,9 +212,6 @@ static void rxrpc_store_error(struct rxrpc_peer *peer,
ee = &serr->ee;
- _net("Rx Error o=%d t=%d c=%d e=%d",
- ee->ee_origin, ee->ee_type, ee->ee_code, ee->ee_errno);
-
err = ee->ee_errno;
switch (ee->ee_origin) {
diff --git a/net/rxrpc/proc.c b/net/rxrpc/proc.c
index 7e45db058823..d9fca8c4bcdc 100644
--- a/net/rxrpc/proc.c
+++ b/net/rxrpc/proc.c
@@ -115,26 +115,13 @@ static int rxrpc_call_seq_show(struct seq_file *seq, void *v)
return 0;
}
-static const struct seq_operations rxrpc_call_seq_ops = {
+const struct seq_operations rxrpc_call_seq_ops = {
.start = rxrpc_call_seq_start,
.next = rxrpc_call_seq_next,
.stop = rxrpc_call_seq_stop,
.show = rxrpc_call_seq_show,
};
-static int rxrpc_call_seq_open(struct inode *inode, struct file *file)
-{
- return seq_open_net(inode, file, &rxrpc_call_seq_ops,
- sizeof(struct seq_net_private));
-}
-
-const struct file_operations rxrpc_call_seq_fops = {
- .open = rxrpc_call_seq_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
/*
* generate a list of extant virtual connections in /proc/net/rxrpc_conns
*/
@@ -207,23 +194,9 @@ print:
return 0;
}
-static const struct seq_operations rxrpc_connection_seq_ops = {
+const struct seq_operations rxrpc_connection_seq_ops = {
.start = rxrpc_connection_seq_start,
.next = rxrpc_connection_seq_next,
.stop = rxrpc_connection_seq_stop,
.show = rxrpc_connection_seq_show,
};
-
-
-static int rxrpc_connection_seq_open(struct inode *inode, struct file *file)
-{
- return seq_open_net(inode, file, &rxrpc_connection_seq_ops,
- sizeof(struct seq_net_private));
-}
-
-const struct file_operations rxrpc_connection_seq_fops = {
- .open = rxrpc_connection_seq_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c
index 588fea0dd362..6c0ae27fff84 100644
--- a/net/rxrpc/rxkad.c
+++ b/net/rxrpc/rxkad.c
@@ -664,7 +664,8 @@ static int rxkad_issue_challenge(struct rxrpc_connection *conn)
ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 2, len);
if (ret < 0) {
- _debug("sendmsg failed: %d", ret);
+ trace_rxrpc_tx_fail(conn->debug_id, serial, ret,
+ rxrpc_tx_fail_conn_challenge);
return -EAGAIN;
}
@@ -719,7 +720,8 @@ static int rxkad_send_response(struct rxrpc_connection *conn,
ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 3, len);
if (ret < 0) {
- _debug("sendmsg failed: %d", ret);
+ trace_rxrpc_tx_fail(conn->debug_id, serial, ret,
+ rxrpc_tx_fail_conn_response);
return -EAGAIN;
}
diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c
index 206e802ccbdc..be01f9c5d963 100644
--- a/net/rxrpc/sendmsg.c
+++ b/net/rxrpc/sendmsg.c
@@ -223,6 +223,15 @@ static void rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call,
ret = rxrpc_send_data_packet(call, skb, false);
if (ret < 0) {
+ switch (ret) {
+ case -ENETUNREACH:
+ case -EHOSTUNREACH:
+ case -ECONNREFUSED:
+ rxrpc_set_call_completion(call,
+ RXRPC_CALL_LOCAL_ERROR,
+ 0, ret);
+ goto out;
+ }
_debug("need instant resend %d", ret);
rxrpc_instant_resend(call, ix);
} else {
@@ -241,6 +250,7 @@ static void rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call,
rxrpc_timer_set_for_send);
}
+out:
rxrpc_free_skb(skb, rxrpc_skb_tx_freed);
_leave("");
}
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index ddf69fc01bdf..6138d1d71900 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -121,7 +121,8 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
return 0;
if (!flags) {
- tcf_idr_release(*a, bind);
+ if (exists)
+ tcf_idr_release(*a, bind);
return -EINVAL;
}
diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c
index bbcbdce732cc..ad050d7d4b46 100644
--- a/net/sched/act_skbmod.c
+++ b/net/sched/act_skbmod.c
@@ -131,8 +131,11 @@ static int tcf_skbmod_init(struct net *net, struct nlattr *nla,
if (exists && bind)
return 0;
- if (!lflags)
+ if (!lflags) {
+ if (exists)
+ tcf_idr_release(*a, bind);
return -EINVAL;
+ }
if (!exists) {
ret = tcf_idr_create(tn, parm->index, est, a,
diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c
index 853604685965..1fb39e1f9d07 100644
--- a/net/sched/act_vlan.c
+++ b/net/sched/act_vlan.c
@@ -161,6 +161,8 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
case htons(ETH_P_8021AD):
break;
default:
+ if (exists)
+ tcf_idr_release(*a, bind);
return -EPROTONOSUPPORT;
}
} else {
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index b66754f52a9f..a57e112d9b3e 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -152,8 +152,8 @@ static struct tcf_proto *tcf_proto_create(const char *kind, u32 protocol,
NL_SET_ERR_MSG(extack, "TC classifier not found");
err = -ENOENT;
}
- goto errout;
#endif
+ goto errout;
}
tp->classify = tp->ops->classify;
tp->protocol = protocol;
@@ -1588,7 +1588,7 @@ int tc_setup_cb_call(struct tcf_block *block, struct tcf_exts *exts,
return ret;
ok_count = ret;
- if (!exts)
+ if (!exts || ok_count)
return ok_count;
ret = tc_exts_setup_cb_egdev_call(exts, type, type_data, err_stop);
if (ret < 0)
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index d964e60c730e..c79f6e71512e 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -977,7 +977,7 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
return 0;
errout_idr:
- if (fnew->handle)
+ if (!fold)
idr_remove(&head->handle_idr, fnew->handle);
errout:
tcf_exts_destroy(&fnew->exts);
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 106dae7e4818..54eca685420f 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -2092,23 +2092,11 @@ static int psched_show(struct seq_file *seq, void *v)
return 0;
}
-static int psched_open(struct inode *inode, struct file *file)
-{
- return single_open(file, psched_show, NULL);
-}
-
-static const struct file_operations psched_fops = {
- .open = psched_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
static int __net_init psched_net_init(struct net *net)
{
struct proc_dir_entry *e;
- e = proc_create("psched", 0, net->proc_net, &psched_fops);
+ e = proc_create_single("psched", 0, net->proc_net, psched_show);
if (e == NULL)
return -ENOMEM;
diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
index a366e4c9413a..4808713c73b9 100644
--- a/net/sched/sch_fq.c
+++ b/net/sched/sch_fq.c
@@ -128,6 +128,28 @@ static bool fq_flow_is_detached(const struct fq_flow *f)
return f->next == &detached;
}
+static bool fq_flow_is_throttled(const struct fq_flow *f)
+{
+ return f->next == &throttled;
+}
+
+static void fq_flow_add_tail(struct fq_flow_head *head, struct fq_flow *flow)
+{
+ if (head->first)
+ head->last->next = flow;
+ else
+ head->first = flow;
+ head->last = flow;
+ flow->next = NULL;
+}
+
+static void fq_flow_unset_throttled(struct fq_sched_data *q, struct fq_flow *f)
+{
+ rb_erase(&f->rate_node, &q->delayed);
+ q->throttled_flows--;
+ fq_flow_add_tail(&q->old_flows, f);
+}
+
static void fq_flow_set_throttled(struct fq_sched_data *q, struct fq_flow *f)
{
struct rb_node **p = &q->delayed.rb_node, *parent = NULL;
@@ -155,15 +177,6 @@ static void fq_flow_set_throttled(struct fq_sched_data *q, struct fq_flow *f)
static struct kmem_cache *fq_flow_cachep __read_mostly;
-static void fq_flow_add_tail(struct fq_flow_head *head, struct fq_flow *flow)
-{
- if (head->first)
- head->last->next = flow;
- else
- head->first = flow;
- head->last = flow;
- flow->next = NULL;
-}
/* limit number of collected flows per round */
#define FQ_GC_MAX 8
@@ -267,6 +280,8 @@ static struct fq_flow *fq_classify(struct sk_buff *skb, struct fq_sched_data *q)
f->socket_hash != sk->sk_hash)) {
f->credit = q->initial_quantum;
f->socket_hash = sk->sk_hash;
+ if (fq_flow_is_throttled(f))
+ fq_flow_unset_throttled(q, f);
f->time_next_packet = 0ULL;
}
return f;
@@ -438,9 +453,7 @@ static void fq_check_throttled(struct fq_sched_data *q, u64 now)
q->time_next_delayed_flow = f->time_next_packet;
break;
}
- rb_erase(p, &q->delayed);
- q->throttled_flows--;
- fq_flow_add_tail(&q->old_flows, f);
+ fq_flow_unset_throttled(q, f);
}
}
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index 16644b3d2362..56c181c3feeb 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -222,10 +222,11 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt,
extack);
if (IS_ERR(child))
return PTR_ERR(child);
- }
- if (child != &noop_qdisc)
+ /* child is fifo, no need to check for noop_qdisc */
qdisc_hash_add(child, true);
+ }
+
sch_tree_lock(sch);
q->flags = ctl->flags;
q->limit = ctl->limit;
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index 03225a8df973..6f74a426f159 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -383,6 +383,9 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt,
err = PTR_ERR(child);
goto done;
}
+
+ /* child is fifo, no need to check for noop_qdisc */
+ qdisc_hash_add(child, true);
}
sch_tree_lock(sch);
@@ -391,8 +394,6 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt,
q->qdisc->qstats.backlog);
qdisc_destroy(q->qdisc);
q->qdisc = child;
- if (child != &noop_qdisc)
- qdisc_hash_add(child, true);
}
q->limit = qopt->limit;
if (tb[TCA_TBF_PBURST])
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 837806dd5799..a47179da24e6 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -1024,8 +1024,9 @@ static void sctp_assoc_bh_rcv(struct work_struct *work)
struct sctp_endpoint *ep;
struct sctp_chunk *chunk;
struct sctp_inq *inqueue;
- int state;
+ int first_time = 1; /* is this the first time through the loop */
int error = 0;
+ int state;
/* The association should be held so we should be safe. */
ep = asoc->ep;
@@ -1036,6 +1037,30 @@ static void sctp_assoc_bh_rcv(struct work_struct *work)
state = asoc->state;
subtype = SCTP_ST_CHUNK(chunk->chunk_hdr->type);
+ /* If the first chunk in the packet is AUTH, do special
+ * processing specified in Section 6.3 of SCTP-AUTH spec
+ */
+ if (first_time && subtype.chunk == SCTP_CID_AUTH) {
+ struct sctp_chunkhdr *next_hdr;
+
+ next_hdr = sctp_inq_peek(inqueue);
+ if (!next_hdr)
+ goto normal;
+
+ /* If the next chunk is COOKIE-ECHO, skip the AUTH
+ * chunk while saving a pointer to it so we can do
+ * Authentication later (during cookie-echo
+ * processing).
+ */
+ if (next_hdr->type == SCTP_CID_COOKIE_ECHO) {
+ chunk->auth_chunk = skb_clone(chunk->skb,
+ GFP_ATOMIC);
+ chunk->auth = 1;
+ continue;
+ }
+ }
+
+normal:
/* SCTP-AUTH, Section 6.3:
* The receiver has a list of chunk types which it expects
* to be received only after an AUTH-chunk. This list has
@@ -1074,6 +1099,9 @@ static void sctp_assoc_bh_rcv(struct work_struct *work)
/* If there is an error on chunk, discard this packet. */
if (error && chunk)
chunk->pdiscard = 1;
+
+ if (first_time)
+ first_time = 0;
}
sctp_association_put(asoc);
}
diff --git a/net/sctp/inqueue.c b/net/sctp/inqueue.c
index 23ebc5318edc..eb93ffe2408b 100644
--- a/net/sctp/inqueue.c
+++ b/net/sctp/inqueue.c
@@ -217,7 +217,7 @@ new_skb:
skb_pull(chunk->skb, sizeof(*ch));
chunk->subh.v = NULL; /* Subheader is no longer valid. */
- if (chunk->chunk_end + sizeof(*ch) < skb_tail_pointer(chunk->skb)) {
+ if (chunk->chunk_end + sizeof(*ch) <= skb_tail_pointer(chunk->skb)) {
/* This is not a singleton */
chunk->singleton = 0;
} else if (chunk->chunk_end > skb_tail_pointer(chunk->skb)) {
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 2e3f7b75a8ec..7339918a805d 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -895,6 +895,9 @@ static int sctp_inet6_cmp_addr(const union sctp_addr *addr1,
if (sctp_is_any(sk, addr1) || sctp_is_any(sk, addr2))
return 1;
+ if (addr1->sa.sa_family == AF_INET && addr2->sa.sa_family == AF_INET)
+ return addr1->v4.sin_addr.s_addr == addr2->v4.sin_addr.s_addr;
+
return __sctp_v6_cmp_addr(addr1, addr2);
}
@@ -1003,11 +1006,11 @@ static const struct proto_ops inet6_seqpacket_ops = {
.owner = THIS_MODULE,
.release = inet6_release,
.bind = inet6_bind,
- .connect = inet_dgram_connect,
+ .connect = sctp_inet_connect,
.socketpair = sock_no_socketpair,
.accept = inet_accept,
.getname = sctp_getname,
- .poll = sctp_poll,
+ .poll_mask = sctp_poll_mask,
.ioctl = inet6_ioctl,
.listen = sctp_inet_listen,
.shutdown = inet_shutdown,
diff --git a/net/sctp/objcnt.c b/net/sctp/objcnt.c
index fd2684ad94c8..a6179b26b80c 100644
--- a/net/sctp/objcnt.c
+++ b/net/sctp/objcnt.c
@@ -108,25 +108,13 @@ static const struct seq_operations sctp_objcnt_seq_ops = {
.show = sctp_objcnt_seq_show,
};
-static int sctp_objcnt_seq_open(struct inode *inode, struct file *file)
-{
- return seq_open(file, &sctp_objcnt_seq_ops);
-}
-
-static const struct file_operations sctp_objcnt_ops = {
- .open = sctp_objcnt_seq_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
/* Initialize the objcount in the proc filesystem. */
void sctp_dbg_objcnt_init(struct net *net)
{
struct proc_dir_entry *ent;
- ent = proc_create("sctp_dbg_objcnt", 0,
- net->sctp.proc_net_sctp, &sctp_objcnt_ops);
+ ent = proc_create_seq("sctp_dbg_objcnt", 0,
+ net->sctp.proc_net_sctp, &sctp_objcnt_seq_ops);
if (!ent)
pr_warn("sctp_dbg_objcnt: Unable to create /proc entry.\n");
}
diff --git a/net/sctp/proc.c b/net/sctp/proc.c
index 1d9ccc6dab2b..ef5c9a82d4e8 100644
--- a/net/sctp/proc.c
+++ b/net/sctp/proc.c
@@ -88,19 +88,6 @@ static int sctp_snmp_seq_show(struct seq_file *seq, void *v)
return 0;
}
-/* Initialize the seq file operations for 'snmp' object. */
-static int sctp_snmp_seq_open(struct inode *inode, struct file *file)
-{
- return single_open_net(inode, file, sctp_snmp_seq_show);
-}
-
-static const struct file_operations sctp_snmp_seq_fops = {
- .open = sctp_snmp_seq_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release_net,
-};
-
/* Dump local addresses of an association/endpoint. */
static void sctp_seq_dump_local_addrs(struct seq_file *seq, struct sctp_ep_common *epb)
{
@@ -225,21 +212,6 @@ static const struct seq_operations sctp_eps_ops = {
.show = sctp_eps_seq_show,
};
-
-/* Initialize the seq file operations for 'eps' object. */
-static int sctp_eps_seq_open(struct inode *inode, struct file *file)
-{
- return seq_open_net(inode, file, &sctp_eps_ops,
- sizeof(struct seq_net_private));
-}
-
-static const struct file_operations sctp_eps_seq_fops = {
- .open = sctp_eps_seq_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release_net,
-};
-
struct sctp_ht_iter {
struct seq_net_private p;
struct rhashtable_iter hti;
@@ -338,20 +310,6 @@ static const struct seq_operations sctp_assoc_ops = {
.show = sctp_assocs_seq_show,
};
-/* Initialize the seq file operations for 'assocs' object. */
-static int sctp_assocs_seq_open(struct inode *inode, struct file *file)
-{
- return seq_open_net(inode, file, &sctp_assoc_ops,
- sizeof(struct sctp_ht_iter));
-}
-
-static const struct file_operations sctp_assocs_seq_fops = {
- .open = sctp_assocs_seq_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release_net,
-};
-
static int sctp_remaddr_seq_show(struct seq_file *seq, void *v)
{
struct sctp_association *assoc;
@@ -431,36 +389,23 @@ static const struct seq_operations sctp_remaddr_ops = {
.show = sctp_remaddr_seq_show,
};
-static int sctp_remaddr_seq_open(struct inode *inode, struct file *file)
-{
- return seq_open_net(inode, file, &sctp_remaddr_ops,
- sizeof(struct sctp_ht_iter));
-}
-
-static const struct file_operations sctp_remaddr_seq_fops = {
- .open = sctp_remaddr_seq_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release_net,
-};
-
/* Set up the proc fs entry for the SCTP protocol. */
int __net_init sctp_proc_init(struct net *net)
{
net->sctp.proc_net_sctp = proc_net_mkdir(net, "sctp", net->proc_net);
if (!net->sctp.proc_net_sctp)
return -ENOMEM;
- if (!proc_create("snmp", 0444, net->sctp.proc_net_sctp,
- &sctp_snmp_seq_fops))
+ if (!proc_create_net_single("snmp", 0444, net->sctp.proc_net_sctp,
+ sctp_snmp_seq_show, NULL))
goto cleanup;
- if (!proc_create("eps", 0444, net->sctp.proc_net_sctp,
- &sctp_eps_seq_fops))
+ if (!proc_create_net("eps", 0444, net->sctp.proc_net_sctp,
+ &sctp_eps_ops, sizeof(struct seq_net_private)))
goto cleanup;
- if (!proc_create("assocs", 0444, net->sctp.proc_net_sctp,
- &sctp_assocs_seq_fops))
+ if (!proc_create_net("assocs", 0444, net->sctp.proc_net_sctp,
+ &sctp_assoc_ops, sizeof(struct sctp_ht_iter)))
goto cleanup;
- if (!proc_create("remaddr", 0444, net->sctp.proc_net_sctp,
- &sctp_remaddr_seq_fops))
+ if (!proc_create_net("remaddr", 0444, net->sctp.proc_net_sctp,
+ &sctp_remaddr_ops, sizeof(struct sctp_ht_iter)))
goto cleanup;
return 0;
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index d685f8456762..11d93377ba5e 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -1012,11 +1012,11 @@ static const struct proto_ops inet_seqpacket_ops = {
.owner = THIS_MODULE,
.release = inet_release, /* Needs to be wrapped... */
.bind = inet_bind,
- .connect = inet_dgram_connect,
+ .connect = sctp_inet_connect,
.socketpair = sock_no_socketpair,
.accept = inet_accept,
.getname = inet_getname, /* Semantics are different. */
- .poll = sctp_poll,
+ .poll_mask = sctp_poll_mask,
.ioctl = inet_ioctl,
.listen = sctp_inet_listen,
.shutdown = inet_shutdown, /* Looks harmless. */
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 5a4fb1dc8400..e62addb60434 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -1152,7 +1152,7 @@ struct sctp_chunk *sctp_make_violation_max_retrans(
const struct sctp_association *asoc,
const struct sctp_chunk *chunk)
{
- static const char error[] = "Association exceeded its max_retans count";
+ static const char error[] = "Association exceeded its max_retrans count";
size_t payload_len = sizeof(error) + sizeof(struct sctp_errhdr);
struct sctp_chunk *retval;
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index dd0594a10961..c9ae3404b1bb 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -153,10 +153,7 @@ static enum sctp_disposition sctp_sf_violation_chunk(
struct sctp_cmd_seq *commands);
static enum sctp_ierror sctp_sf_authenticate(
- struct net *net,
- const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
- const union sctp_subtype type,
struct sctp_chunk *chunk);
static enum sctp_disposition __sctp_sf_do_9_1_abort(
@@ -626,6 +623,38 @@ enum sctp_disposition sctp_sf_do_5_1C_ack(struct net *net,
return SCTP_DISPOSITION_CONSUME;
}
+static bool sctp_auth_chunk_verify(struct net *net, struct sctp_chunk *chunk,
+ const struct sctp_association *asoc)
+{
+ struct sctp_chunk auth;
+
+ if (!chunk->auth_chunk)
+ return true;
+
+ /* SCTP-AUTH: auth_chunk pointer is only set when the cookie-echo
+ * is supposed to be authenticated and we have to do delayed
+ * authentication. We've just recreated the association using
+ * the information in the cookie and now it's much easier to
+ * do the authentication.
+ */
+
+ /* Make sure that we and the peer are AUTH capable */
+ if (!net->sctp.auth_enable || !asoc->peer.auth_capable)
+ return false;
+
+ /* set-up our fake chunk so that we can process it */
+ auth.skb = chunk->auth_chunk;
+ auth.asoc = chunk->asoc;
+ auth.sctp_hdr = chunk->sctp_hdr;
+ auth.chunk_hdr = (struct sctp_chunkhdr *)
+ skb_push(chunk->auth_chunk,
+ sizeof(struct sctp_chunkhdr));
+ skb_pull(chunk->auth_chunk, sizeof(struct sctp_chunkhdr));
+ auth.transport = chunk->transport;
+
+ return sctp_sf_authenticate(asoc, &auth) == SCTP_IERROR_NO_ERROR;
+}
+
/*
* Respond to a normal COOKIE ECHO chunk.
* We are the side that is being asked for an association.
@@ -763,37 +792,9 @@ enum sctp_disposition sctp_sf_do_5_1D_ce(struct net *net,
if (error)
goto nomem_init;
- /* SCTP-AUTH: auth_chunk pointer is only set when the cookie-echo
- * is supposed to be authenticated and we have to do delayed
- * authentication. We've just recreated the association using
- * the information in the cookie and now it's much easier to
- * do the authentication.
- */
- if (chunk->auth_chunk) {
- struct sctp_chunk auth;
- enum sctp_ierror ret;
-
- /* Make sure that we and the peer are AUTH capable */
- if (!net->sctp.auth_enable || !new_asoc->peer.auth_capable) {
- sctp_association_free(new_asoc);
- return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
- }
-
- /* set-up our fake chunk so that we can process it */
- auth.skb = chunk->auth_chunk;
- auth.asoc = chunk->asoc;
- auth.sctp_hdr = chunk->sctp_hdr;
- auth.chunk_hdr = (struct sctp_chunkhdr *)
- skb_push(chunk->auth_chunk,
- sizeof(struct sctp_chunkhdr));
- skb_pull(chunk->auth_chunk, sizeof(struct sctp_chunkhdr));
- auth.transport = chunk->transport;
-
- ret = sctp_sf_authenticate(net, ep, new_asoc, type, &auth);
- if (ret != SCTP_IERROR_NO_ERROR) {
- sctp_association_free(new_asoc);
- return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
- }
+ if (!sctp_auth_chunk_verify(net, chunk, new_asoc)) {
+ sctp_association_free(new_asoc);
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
}
repl = sctp_make_cookie_ack(new_asoc, chunk);
@@ -1794,13 +1795,18 @@ static enum sctp_disposition sctp_sf_do_dupcook_a(
GFP_ATOMIC))
goto nomem;
+ if (sctp_auth_asoc_init_active_key(new_asoc, GFP_ATOMIC))
+ goto nomem;
+
+ if (!sctp_auth_chunk_verify(net, chunk, new_asoc))
+ return SCTP_DISPOSITION_DISCARD;
+
/* Make sure no new addresses are being added during the
* restart. Though this is a pretty complicated attack
* since you'd have to get inside the cookie.
*/
- if (!sctp_sf_check_restart_addrs(new_asoc, asoc, chunk, commands)) {
+ if (!sctp_sf_check_restart_addrs(new_asoc, asoc, chunk, commands))
return SCTP_DISPOSITION_CONSUME;
- }
/* If the endpoint is in the SHUTDOWN-ACK-SENT state and recognizes
* the peer has restarted (Action A), it MUST NOT setup a new
@@ -1906,6 +1912,12 @@ static enum sctp_disposition sctp_sf_do_dupcook_b(
GFP_ATOMIC))
goto nomem;
+ if (sctp_auth_asoc_init_active_key(new_asoc, GFP_ATOMIC))
+ goto nomem;
+
+ if (!sctp_auth_chunk_verify(net, chunk, new_asoc))
+ return SCTP_DISPOSITION_DISCARD;
+
/* Update the content of current association. */
sctp_add_cmd_sf(commands, SCTP_CMD_UPDATE_ASSOC, SCTP_ASOC(new_asoc));
sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE,
@@ -2003,6 +2015,9 @@ static enum sctp_disposition sctp_sf_do_dupcook_d(
* a COOKIE ACK.
*/
+ if (!sctp_auth_chunk_verify(net, chunk, asoc))
+ return SCTP_DISPOSITION_DISCARD;
+
/* Don't accidentally move back into established state. */
if (asoc->state < SCTP_STATE_ESTABLISHED) {
sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP,
@@ -2050,7 +2065,7 @@ static enum sctp_disposition sctp_sf_do_dupcook_d(
}
}
- repl = sctp_make_cookie_ack(new_asoc, chunk);
+ repl = sctp_make_cookie_ack(asoc, chunk);
if (!repl)
goto nomem;
@@ -4165,10 +4180,7 @@ gen_shutdown:
* The return value is the disposition of the chunk.
*/
static enum sctp_ierror sctp_sf_authenticate(
- struct net *net,
- const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
- const union sctp_subtype type,
struct sctp_chunk *chunk)
{
struct sctp_shared_key *sh_key = NULL;
@@ -4269,7 +4281,7 @@ enum sctp_disposition sctp_sf_eat_auth(struct net *net,
commands);
auth_hdr = (struct sctp_authhdr *)chunk->skb->data;
- error = sctp_sf_authenticate(net, ep, asoc, type, chunk);
+ error = sctp_sf_authenticate(asoc, chunk);
switch (error) {
case SCTP_IERROR_AUTH_BAD_HMAC:
/* Generate the ERROR chunk and discard the rest
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 80835ac26d2c..bf747094d26b 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -1086,7 +1086,7 @@ out:
*/
static int __sctp_connect(struct sock *sk,
struct sockaddr *kaddrs,
- int addrs_size,
+ int addrs_size, int flags,
sctp_assoc_t *assoc_id)
{
struct net *net = sock_net(sk);
@@ -1104,7 +1104,6 @@ static int __sctp_connect(struct sock *sk,
union sctp_addr *sa_addr = NULL;
void *addr_buf;
unsigned short port;
- unsigned int f_flags = 0;
sp = sctp_sk(sk);
ep = sp->ep;
@@ -1254,13 +1253,7 @@ static int __sctp_connect(struct sock *sk,
sp->pf->to_sk_daddr(sa_addr, sk);
sk->sk_err = 0;
- /* in-kernel sockets don't generally have a file allocated to them
- * if all they do is call sock_create_kern().
- */
- if (sk->sk_socket->file)
- f_flags = sk->sk_socket->file->f_flags;
-
- timeo = sock_sndtimeo(sk, f_flags & O_NONBLOCK);
+ timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
if (assoc_id)
*assoc_id = asoc->assoc_id;
@@ -1348,7 +1341,7 @@ static int __sctp_setsockopt_connectx(struct sock *sk,
sctp_assoc_t *assoc_id)
{
struct sockaddr *kaddrs;
- int err = 0;
+ int err = 0, flags = 0;
pr_debug("%s: sk:%p addrs:%p addrs_size:%d\n",
__func__, sk, addrs, addrs_size);
@@ -1367,7 +1360,13 @@ static int __sctp_setsockopt_connectx(struct sock *sk,
if (err)
goto out_free;
- err = __sctp_connect(sk, kaddrs, addrs_size, assoc_id);
+ /* in-kernel sockets don't generally have a file allocated to them
+ * if all they do is call sock_create_kern().
+ */
+ if (sk->sk_socket->file)
+ flags = sk->sk_socket->file->f_flags;
+
+ err = __sctp_connect(sk, kaddrs, addrs_size, flags, assoc_id);
out_free:
kvfree(kaddrs);
@@ -4397,16 +4396,26 @@ out_nounlock:
* len: the size of the address.
*/
static int sctp_connect(struct sock *sk, struct sockaddr *addr,
- int addr_len)
+ int addr_len, int flags)
{
- int err = 0;
+ struct inet_sock *inet = inet_sk(sk);
struct sctp_af *af;
+ int err = 0;
lock_sock(sk);
pr_debug("%s: sk:%p, sockaddr:%p, addr_len:%d\n", __func__, sk,
addr, addr_len);
+ /* We may need to bind the socket. */
+ if (!inet->inet_num) {
+ if (sk->sk_prot->get_port(sk, 0)) {
+ release_sock(sk);
+ return -EAGAIN;
+ }
+ inet->inet_sport = htons(inet->inet_num);
+ }
+
/* Validate addr_len before calling common connect/connectx routine. */
af = sctp_get_af_specific(addr->sa_family);
if (!af || addr_len < af->sockaddr_len) {
@@ -4415,13 +4424,25 @@ static int sctp_connect(struct sock *sk, struct sockaddr *addr,
/* Pass correct addr len to common routine (so it knows there
* is only one address being passed.
*/
- err = __sctp_connect(sk, addr, af->sockaddr_len, NULL);
+ err = __sctp_connect(sk, addr, af->sockaddr_len, flags, NULL);
}
release_sock(sk);
return err;
}
+int sctp_inet_connect(struct socket *sock, struct sockaddr *uaddr,
+ int addr_len, int flags)
+{
+ if (addr_len < sizeof(uaddr->sa_family))
+ return -EINVAL;
+
+ if (uaddr->sa_family == AF_UNSPEC)
+ return -EOPNOTSUPP;
+
+ return sctp_connect(sock->sk, uaddr, addr_len, flags);
+}
+
/* FIXME: Write comments. */
static int sctp_disconnect(struct sock *sk, int flags)
{
@@ -7701,14 +7722,12 @@ out:
* here, again, by modeling the current TCP/UDP code. We don't have
* a good way to test with it yet.
*/
-__poll_t sctp_poll(struct file *file, struct socket *sock, poll_table *wait)
+__poll_t sctp_poll_mask(struct socket *sock, __poll_t events)
{
struct sock *sk = sock->sk;
struct sctp_sock *sp = sctp_sk(sk);
__poll_t mask;
- poll_wait(file, sk_sleep(sk), wait);
-
sock_rps_record_flow(sk);
/* A TCP-style listening socket becomes readable when the accept queue
@@ -8724,7 +8743,6 @@ struct proto sctp_prot = {
.name = "SCTP",
.owner = THIS_MODULE,
.close = sctp_close,
- .connect = sctp_connect,
.disconnect = sctp_disconnect,
.accept = sctp_accept,
.ioctl = sctp_ioctl,
@@ -8767,7 +8785,6 @@ struct proto sctpv6_prot = {
.name = "SCTPv6",
.owner = THIS_MODULE,
.close = sctp_close,
- .connect = sctp_connect,
.disconnect = sctp_disconnect,
.accept = sctp_accept,
.ioctl = sctp_ioctl,
diff --git a/net/sctp/stream.c b/net/sctp/stream.c
index f799043abec9..f1f1d1b232ba 100644
--- a/net/sctp/stream.c
+++ b/net/sctp/stream.c
@@ -240,6 +240,8 @@ void sctp_stream_update(struct sctp_stream *stream, struct sctp_stream *new)
new->out = NULL;
new->in = NULL;
+ new->outcnt = 0;
+ new->incnt = 0;
}
static int sctp_send_reconf(struct sctp_association *asoc,
diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c
index 84207ad33e8e..8cb7d9858270 100644
--- a/net/sctp/ulpevent.c
+++ b/net/sctp/ulpevent.c
@@ -715,7 +715,6 @@ struct sctp_ulpevent *sctp_ulpevent_make_rcvmsg(struct sctp_association *asoc,
return event;
fail_mark:
- sctp_chunk_put(chunk);
kfree_skb(skb);
fail:
return NULL;
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index f5d4b69dbabc..544bab42f925 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -292,6 +292,17 @@ static void smc_copy_sock_settings_to_smc(struct smc_sock *smc)
smc_copy_sock_settings(&smc->sk, smc->clcsock->sk, SK_FLAGS_CLC_TO_SMC);
}
+/* register a new rmb */
+static int smc_reg_rmb(struct smc_link *link, struct smc_buf_desc *rmb_desc)
+{
+ /* register memory region for new rmb */
+ if (smc_wr_reg_send(link, rmb_desc->mr_rx[SMC_SINGLE_LINK])) {
+ rmb_desc->regerr = 1;
+ return -EFAULT;
+ }
+ return 0;
+}
+
static int smc_clnt_conf_first_link(struct smc_sock *smc)
{
struct smc_link_group *lgr = smc->conn.lgr;
@@ -321,9 +332,7 @@ static int smc_clnt_conf_first_link(struct smc_sock *smc)
smc_wr_remember_qp_attr(link);
- rc = smc_wr_reg_send(link,
- smc->conn.rmb_desc->mr_rx[SMC_SINGLE_LINK]);
- if (rc)
+ if (smc_reg_rmb(link, smc->conn.rmb_desc))
return SMC_CLC_DECL_INTERR;
/* send CONFIRM LINK response over RoCE fabric */
@@ -473,13 +482,8 @@ static int smc_connect_rdma(struct smc_sock *smc)
goto decline_rdma_unlock;
}
} else {
- struct smc_buf_desc *buf_desc = smc->conn.rmb_desc;
-
- if (!buf_desc->reused) {
- /* register memory region for new rmb */
- rc = smc_wr_reg_send(link,
- buf_desc->mr_rx[SMC_SINGLE_LINK]);
- if (rc) {
+ if (!smc->conn.rmb_desc->reused) {
+ if (smc_reg_rmb(link, smc->conn.rmb_desc)) {
reason_code = SMC_CLC_DECL_INTERR;
goto decline_rdma_unlock;
}
@@ -719,9 +723,7 @@ static int smc_serv_conf_first_link(struct smc_sock *smc)
link = &lgr->lnk[SMC_SINGLE_LINK];
- rc = smc_wr_reg_send(link,
- smc->conn.rmb_desc->mr_rx[SMC_SINGLE_LINK]);
- if (rc)
+ if (smc_reg_rmb(link, smc->conn.rmb_desc))
return SMC_CLC_DECL_INTERR;
/* send CONFIRM LINK request to client over the RoCE fabric */
@@ -854,13 +856,8 @@ static void smc_listen_work(struct work_struct *work)
smc_rx_init(new_smc);
if (local_contact != SMC_FIRST_CONTACT) {
- struct smc_buf_desc *buf_desc = new_smc->conn.rmb_desc;
-
- if (!buf_desc->reused) {
- /* register memory region for new rmb */
- rc = smc_wr_reg_send(link,
- buf_desc->mr_rx[SMC_SINGLE_LINK]);
- if (rc) {
+ if (!new_smc->conn.rmb_desc->reused) {
+ if (smc_reg_rmb(link, new_smc->conn.rmb_desc)) {
reason_code = SMC_CLC_DECL_INTERR;
goto decline_rdma_unlock;
}
@@ -978,10 +975,6 @@ static void smc_tcp_listen_work(struct work_struct *work)
}
out:
- if (lsmc->clcsock) {
- sock_release(lsmc->clcsock);
- lsmc->clcsock = NULL;
- }
release_sock(lsk);
sock_put(&lsmc->sk); /* sock_hold in smc_listen */
}
@@ -1170,13 +1163,15 @@ static __poll_t smc_poll(struct file *file, struct socket *sock,
/* delegate to CLC child sock */
release_sock(sk);
mask = smc->clcsock->ops->poll(file, smc->clcsock, wait);
- /* if non-blocking connect finished ... */
lock_sock(sk);
- if ((sk->sk_state == SMC_INIT) && (mask & EPOLLOUT)) {
- sk->sk_err = smc->clcsock->sk->sk_err;
- if (sk->sk_err) {
- mask |= EPOLLERR;
- } else {
+ sk->sk_err = smc->clcsock->sk->sk_err;
+ if (sk->sk_err) {
+ mask |= EPOLLERR;
+ } else {
+ /* if non-blocking connect finished ... */
+ if (sk->sk_state == SMC_INIT &&
+ mask & EPOLLOUT &&
+ smc->clcsock->sk->sk_state != TCP_CLOSE) {
rc = smc_connect_rdma(smc);
if (rc < 0)
mask |= EPOLLERR;
@@ -1320,8 +1315,11 @@ static ssize_t smc_sendpage(struct socket *sock, struct page *page,
smc = smc_sk(sk);
lock_sock(sk);
- if (sk->sk_state != SMC_ACTIVE)
+ if (sk->sk_state != SMC_ACTIVE) {
+ release_sock(sk);
goto out;
+ }
+ release_sock(sk);
if (smc->use_fallback)
rc = kernel_sendpage(smc->clcsock, page, offset,
size, flags);
@@ -1329,7 +1327,6 @@ static ssize_t smc_sendpage(struct socket *sock, struct page *page,
rc = sock_no_sendpage(sock, page, offset, size, flags);
out:
- release_sock(sk);
return rc;
}
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index f44f6803f7ff..d4bd01bb44e1 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -32,6 +32,9 @@
static u32 smc_lgr_num; /* unique link group number */
+static void smc_buf_free(struct smc_buf_desc *buf_desc, struct smc_link *lnk,
+ bool is_rmb);
+
static void smc_lgr_schedule_free_work(struct smc_link_group *lgr)
{
/* client link group creation always follows the server link group
@@ -234,9 +237,22 @@ static void smc_buf_unuse(struct smc_connection *conn)
conn->sndbuf_size = 0;
}
if (conn->rmb_desc) {
- conn->rmb_desc->reused = true;
- conn->rmb_desc->used = 0;
- conn->rmbe_size = 0;
+ if (!conn->rmb_desc->regerr) {
+ conn->rmb_desc->reused = 1;
+ conn->rmb_desc->used = 0;
+ conn->rmbe_size = 0;
+ } else {
+ /* buf registration failed, reuse not possible */
+ struct smc_link_group *lgr = conn->lgr;
+ struct smc_link *lnk;
+
+ write_lock_bh(&lgr->rmbs_lock);
+ list_del(&conn->rmb_desc->list);
+ write_unlock_bh(&lgr->rmbs_lock);
+
+ lnk = &lgr->lnk[SMC_SINGLE_LINK];
+ smc_buf_free(conn->rmb_desc, lnk, true);
+ }
}
}
diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index 07e2a393e6d9..5dfcb15d529f 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -123,7 +123,8 @@ struct smc_buf_desc {
*/
u32 order; /* allocation order */
u32 used; /* currently used / unused */
- bool reused; /* new created / reused */
+ u8 reused : 1; /* new created / reused */
+ u8 regerr : 1; /* err during registration */
};
struct smc_rtoken { /* address/key of remote RMB */
diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c
index 74568cdbca70..d7b88b2d1b22 100644
--- a/net/smc/smc_pnet.c
+++ b/net/smc/smc_pnet.c
@@ -245,40 +245,45 @@ out:
static int smc_pnet_fill_entry(struct net *net, struct smc_pnetentry *pnetelem,
struct nlattr *tb[])
{
- char *string, *ibname = NULL;
- int rc = 0;
+ char *string, *ibname;
+ int rc;
memset(pnetelem, 0, sizeof(*pnetelem));
INIT_LIST_HEAD(&pnetelem->list);
- if (tb[SMC_PNETID_NAME]) {
- string = (char *)nla_data(tb[SMC_PNETID_NAME]);
- if (!smc_pnetid_valid(string, pnetelem->pnet_name)) {
- rc = -EINVAL;
- goto error;
- }
- }
- if (tb[SMC_PNETID_ETHNAME]) {
- string = (char *)nla_data(tb[SMC_PNETID_ETHNAME]);
- pnetelem->ndev = dev_get_by_name(net, string);
- if (!pnetelem->ndev)
- return -ENOENT;
- }
- if (tb[SMC_PNETID_IBNAME]) {
- ibname = (char *)nla_data(tb[SMC_PNETID_IBNAME]);
- ibname = strim(ibname);
- pnetelem->smcibdev = smc_pnet_find_ib(ibname);
- if (!pnetelem->smcibdev) {
- rc = -ENOENT;
- goto error;
- }
- }
- if (tb[SMC_PNETID_IBPORT]) {
- pnetelem->ib_port = nla_get_u8(tb[SMC_PNETID_IBPORT]);
- if (pnetelem->ib_port > SMC_MAX_PORTS) {
- rc = -EINVAL;
- goto error;
- }
- }
+
+ rc = -EINVAL;
+ if (!tb[SMC_PNETID_NAME])
+ goto error;
+ string = (char *)nla_data(tb[SMC_PNETID_NAME]);
+ if (!smc_pnetid_valid(string, pnetelem->pnet_name))
+ goto error;
+
+ rc = -EINVAL;
+ if (!tb[SMC_PNETID_ETHNAME])
+ goto error;
+ rc = -ENOENT;
+ string = (char *)nla_data(tb[SMC_PNETID_ETHNAME]);
+ pnetelem->ndev = dev_get_by_name(net, string);
+ if (!pnetelem->ndev)
+ goto error;
+
+ rc = -EINVAL;
+ if (!tb[SMC_PNETID_IBNAME])
+ goto error;
+ rc = -ENOENT;
+ ibname = (char *)nla_data(tb[SMC_PNETID_IBNAME]);
+ ibname = strim(ibname);
+ pnetelem->smcibdev = smc_pnet_find_ib(ibname);
+ if (!pnetelem->smcibdev)
+ goto error;
+
+ rc = -EINVAL;
+ if (!tb[SMC_PNETID_IBPORT])
+ goto error;
+ pnetelem->ib_port = nla_get_u8(tb[SMC_PNETID_IBPORT]);
+ if (pnetelem->ib_port < 1 || pnetelem->ib_port > SMC_MAX_PORTS)
+ goto error;
+
return 0;
error:
@@ -307,6 +312,8 @@ static int smc_pnet_get(struct sk_buff *skb, struct genl_info *info)
void *hdr;
int rc;
+ if (!info->attrs[SMC_PNETID_NAME])
+ return -EINVAL;
pnetelem = smc_pnet_find_pnetid(
(char *)nla_data(info->attrs[SMC_PNETID_NAME]));
if (!pnetelem)
@@ -359,6 +366,8 @@ static int smc_pnet_add(struct sk_buff *skb, struct genl_info *info)
static int smc_pnet_del(struct sk_buff *skb, struct genl_info *info)
{
+ if (!info->attrs[SMC_PNETID_NAME])
+ return -EINVAL;
return smc_pnet_remove_by_pnetid(
(char *)nla_data(info->attrs[SMC_PNETID_NAME]));
}
diff --git a/net/socket.c b/net/socket.c
index f10f1d947c78..2d752e9eb3f9 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -117,8 +117,10 @@ static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from);
static int sock_mmap(struct file *file, struct vm_area_struct *vma);
static int sock_close(struct inode *inode, struct file *file);
-static __poll_t sock_poll(struct file *file,
- struct poll_table_struct *wait);
+static struct wait_queue_head *sock_get_poll_head(struct file *file,
+ __poll_t events);
+static __poll_t sock_poll_mask(struct file *file, __poll_t);
+static __poll_t sock_poll(struct file *file, struct poll_table_struct *wait);
static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
#ifdef CONFIG_COMPAT
static long compat_sock_ioctl(struct file *file,
@@ -141,6 +143,8 @@ static const struct file_operations socket_file_ops = {
.llseek = no_llseek,
.read_iter = sock_read_iter,
.write_iter = sock_write_iter,
+ .get_poll_head = sock_get_poll_head,
+ .poll_mask = sock_poll_mask,
.poll = sock_poll,
.unlocked_ioctl = sock_ioctl,
#ifdef CONFIG_COMPAT
@@ -1114,27 +1118,48 @@ out_release:
}
EXPORT_SYMBOL(sock_create_lite);
-/* No kernel lock held - perfect */
-static __poll_t sock_poll(struct file *file, poll_table *wait)
+static struct wait_queue_head *sock_get_poll_head(struct file *file,
+ __poll_t events)
{
- __poll_t busy_flag = 0;
- struct socket *sock;
+ struct socket *sock = file->private_data;
+
+ if (!sock->ops->poll_mask)
+ return NULL;
+ sock_poll_busy_loop(sock, events);
+ return sk_sleep(sock->sk);
+}
+
+static __poll_t sock_poll_mask(struct file *file, __poll_t events)
+{
+ struct socket *sock = file->private_data;
/*
- * We can't return errors to poll, so it's either yes or no.
+ * We need to be sure we are in sync with the socket flags modification.
+ *
+ * This memory barrier is paired in the wq_has_sleeper.
*/
- sock = file->private_data;
+ smp_mb();
+
+ /* this socket can poll_ll so tell the system call */
+ return sock->ops->poll_mask(sock, events) |
+ (sk_can_busy_loop(sock->sk) ? POLL_BUSY_LOOP : 0);
+}
- if (sk_can_busy_loop(sock->sk)) {
- /* this socket can poll_ll so tell the system call */
- busy_flag = POLL_BUSY_LOOP;
+/* No kernel lock held - perfect */
+static __poll_t sock_poll(struct file *file, poll_table *wait)
+{
+ struct socket *sock = file->private_data;
+ __poll_t events = poll_requested_events(wait), mask = 0;
- /* once, only if requested by syscall */
- if (wait && (wait->_key & POLL_BUSY_LOOP))
- sk_busy_loop(sock->sk, 1);
+ if (sock->ops->poll) {
+ sock_poll_busy_loop(sock, events);
+ mask = sock->ops->poll(file, sock, wait);
+ } else if (sock->ops->poll_mask) {
+ sock_poll_wait(file, sock_get_poll_head(file, events), wait);
+ mask = sock->ops->poll_mask(sock, events);
}
- return busy_flag | sock->ops->poll(file, sock, wait);
+ return mask | sock_poll_busy_flag(sock);
}
static int sock_mmap(struct file *file, struct vm_area_struct *vma)
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index c81ef5e6c981..4fda18d47e2c 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -609,22 +609,6 @@ static int __rpc_rmdir(struct inode *dir, struct dentry *dentry)
return ret;
}
-int rpc_rmdir(struct dentry *dentry)
-{
- struct dentry *parent;
- struct inode *dir;
- int error;
-
- parent = dget_parent(dentry);
- dir = d_inode(parent);
- inode_lock_nested(dir, I_MUTEX_PARENT);
- error = __rpc_rmdir(dir, dentry);
- inode_unlock(dir);
- dput(parent);
- return error;
-}
-EXPORT_SYMBOL_GPL(rpc_rmdir);
-
static int __rpc_unlink(struct inode *dir, struct dentry *dentry)
{
int ret;
diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c
index 5cc68a824f45..f2f63959fddd 100644
--- a/net/sunrpc/xprtrdma/fmr_ops.c
+++ b/net/sunrpc/xprtrdma/fmr_ops.c
@@ -72,6 +72,7 @@ fmr_op_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr)
if (IS_ERR(mr->fmr.fm_mr))
goto out_fmr_err;
+ INIT_LIST_HEAD(&mr->mr_list);
return 0;
out_fmr_err:
@@ -102,10 +103,6 @@ fmr_op_release_mr(struct rpcrdma_mr *mr)
LIST_HEAD(unmap_list);
int rc;
- /* Ensure MW is not on any rl_registered list */
- if (!list_empty(&mr->mr_list))
- list_del(&mr->mr_list);
-
kfree(mr->fmr.fm_physaddrs);
kfree(mr->mr_sg);
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
index c5743a0960be..c59c5c788db0 100644
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -110,6 +110,7 @@ frwr_op_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr)
if (!mr->mr_sg)
goto out_list_err;
+ INIT_LIST_HEAD(&mr->mr_list);
sg_init_table(mr->mr_sg, depth);
init_completion(&frwr->fr_linv_done);
return 0;
@@ -133,10 +134,6 @@ frwr_op_release_mr(struct rpcrdma_mr *mr)
{
int rc;
- /* Ensure MR is not on any rl_registered list */
- if (!list_empty(&mr->mr_list))
- list_del(&mr->mr_list);
-
rc = ib_dereg_mr(mr->frwr.fr_mr);
if (rc)
pr_err("rpcrdma: final ib_dereg_mr for %p returned %i\n",
@@ -195,7 +192,7 @@ frwr_op_recover_mr(struct rpcrdma_mr *mr)
return;
out_release:
- pr_err("rpcrdma: FRWR reset failed %d, %p release\n", rc, mr);
+ pr_err("rpcrdma: FRWR reset failed %d, %p released\n", rc, mr);
r_xprt->rx_stats.mrs_orphaned++;
spin_lock(&r_xprt->rx_buf.rb_mrlock);
@@ -476,7 +473,7 @@ frwr_op_reminv(struct rpcrdma_rep *rep, struct list_head *mrs)
list_for_each_entry(mr, mrs, mr_list)
if (mr->mr_handle == rep->rr_inv_rkey) {
- list_del(&mr->mr_list);
+ list_del_init(&mr->mr_list);
trace_xprtrdma_remoteinv(mr);
mr->frwr.fr_state = FRWR_IS_INVALID;
rpcrdma_mr_unmap_and_put(mr);
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index fe5eaca2d197..c345d365af88 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -1254,6 +1254,11 @@ rpcrdma_mrs_destroy(struct rpcrdma_buffer *buf)
list_del(&mr->mr_all);
spin_unlock(&buf->rb_mrlock);
+
+ /* Ensure MW is not on any rl_registered list */
+ if (!list_empty(&mr->mr_list))
+ list_del(&mr->mr_list);
+
ia->ri_ops->ro_release_mr(mr);
count++;
spin_lock(&buf->rb_mrlock);
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index 3d3b423fa9c1..cb41b12a3bf8 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -380,7 +380,7 @@ rpcrdma_mr_pop(struct list_head *list)
struct rpcrdma_mr *mr;
mr = list_first_entry(list, struct rpcrdma_mr, mr_list);
- list_del(&mr->mr_list);
+ list_del_init(&mr->mr_list);
return mr;
}
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 6f98b56dd48e..f29549de9245 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -1950,6 +1950,7 @@ out:
int tipc_nl_node_get_link(struct sk_buff *skb, struct genl_info *info)
{
struct net *net = genl_info_net(info);
+ struct nlattr *attrs[TIPC_NLA_LINK_MAX + 1];
struct tipc_nl_msg msg;
char *name;
int err;
@@ -1957,9 +1958,19 @@ int tipc_nl_node_get_link(struct sk_buff *skb, struct genl_info *info)
msg.portid = info->snd_portid;
msg.seq = info->snd_seq;
- if (!info->attrs[TIPC_NLA_LINK_NAME])
+ if (!info->attrs[TIPC_NLA_LINK])
return -EINVAL;
- name = nla_data(info->attrs[TIPC_NLA_LINK_NAME]);
+
+ err = nla_parse_nested(attrs, TIPC_NLA_LINK_MAX,
+ info->attrs[TIPC_NLA_LINK],
+ tipc_nl_link_policy, info->extack);
+ if (err)
+ return err;
+
+ if (!attrs[TIPC_NLA_LINK_NAME])
+ return -EINVAL;
+
+ name = nla_data(attrs[TIPC_NLA_LINK_NAME]);
msg.skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
if (!msg.skb)
@@ -2244,7 +2255,7 @@ int tipc_nl_node_dump_monitor(struct sk_buff *skb, struct netlink_callback *cb)
rtnl_lock();
for (bearer_id = prev_bearer; bearer_id < MAX_BEARERS; bearer_id++) {
- err = __tipc_nl_add_monitor(net, &msg, prev_bearer);
+ err = __tipc_nl_add_monitor(net, &msg, bearer_id);
if (err)
break;
}
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 252a52ae0893..3bb45042e833 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -692,10 +692,9 @@ static int tipc_getname(struct socket *sock, struct sockaddr *uaddr,
}
/**
- * tipc_poll - read and possibly block on pollmask
+ * tipc_poll - read pollmask
* @file: file structure associated with the socket
* @sock: socket for which to calculate the poll bits
- * @wait: ???
*
* Returns pollmask value
*
@@ -709,15 +708,12 @@ static int tipc_getname(struct socket *sock, struct sockaddr *uaddr,
* imply that the operation will succeed, merely that it should be performed
* and will not block.
*/
-static __poll_t tipc_poll(struct file *file, struct socket *sock,
- poll_table *wait)
+static __poll_t tipc_poll_mask(struct socket *sock, __poll_t events)
{
struct sock *sk = sock->sk;
struct tipc_sock *tsk = tipc_sk(sk);
__poll_t revents = 0;
- sock_poll_wait(file, sk_sleep(sk), wait);
-
if (sk->sk_shutdown & RCV_SHUTDOWN)
revents |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
if (sk->sk_shutdown == SHUTDOWN_MASK)
@@ -1516,10 +1512,10 @@ static void tipc_sk_set_orig_addr(struct msghdr *m, struct sk_buff *skb)
srcaddr->sock.family = AF_TIPC;
srcaddr->sock.addrtype = TIPC_ADDR_ID;
+ srcaddr->sock.scope = 0;
srcaddr->sock.addr.id.ref = msg_origport(hdr);
srcaddr->sock.addr.id.node = msg_orignode(hdr);
srcaddr->sock.addr.name.domain = 0;
- srcaddr->sock.scope = 0;
m->msg_namelen = sizeof(struct sockaddr_tipc);
if (!msg_in_group(hdr))
@@ -1528,6 +1524,7 @@ static void tipc_sk_set_orig_addr(struct msghdr *m, struct sk_buff *skb)
/* Group message users may also want to know sending member's id */
srcaddr->member.family = AF_TIPC;
srcaddr->member.addrtype = TIPC_ADDR_NAME;
+ srcaddr->member.scope = 0;
srcaddr->member.addr.name.name.type = msg_nametype(hdr);
srcaddr->member.addr.name.name.instance = TIPC_SKB_CB(skb)->orig_member;
srcaddr->member.addr.name.domain = 0;
@@ -3027,7 +3024,7 @@ static const struct proto_ops msg_ops = {
.socketpair = tipc_socketpair,
.accept = sock_no_accept,
.getname = tipc_getname,
- .poll = tipc_poll,
+ .poll_mask = tipc_poll_mask,
.ioctl = tipc_ioctl,
.listen = sock_no_listen,
.shutdown = tipc_shutdown,
@@ -3048,7 +3045,7 @@ static const struct proto_ops packet_ops = {
.socketpair = tipc_socketpair,
.accept = tipc_accept,
.getname = tipc_getname,
- .poll = tipc_poll,
+ .poll_mask = tipc_poll_mask,
.ioctl = tipc_ioctl,
.listen = tipc_listen,
.shutdown = tipc_shutdown,
@@ -3069,7 +3066,7 @@ static const struct proto_ops stream_ops = {
.socketpair = tipc_socketpair,
.accept = tipc_accept,
.getname = tipc_getname,
- .poll = tipc_poll,
+ .poll_mask = tipc_poll_mask,
.ioctl = tipc_ioctl,
.listen = tipc_listen,
.shutdown = tipc_shutdown,
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index 0d379970960e..20cd93be6236 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -114,6 +114,7 @@ int tls_push_sg(struct sock *sk,
size = sg->length - offset;
offset += sg->offset;
+ ctx->in_tcp_sendpages = true;
while (1) {
if (sg_is_last(sg))
sendpage_flags = flags;
@@ -134,6 +135,7 @@ retry:
offset -= sg->offset;
ctx->partially_sent_offset = offset;
ctx->partially_sent_record = (void *)sg;
+ ctx->in_tcp_sendpages = false;
return ret;
}
@@ -148,6 +150,8 @@ retry:
}
clear_bit(TLS_PENDING_CLOSED_RECORD, &ctx->flags);
+ ctx->in_tcp_sendpages = false;
+ ctx->sk_write_space(sk);
return 0;
}
@@ -217,6 +221,10 @@ static void tls_write_space(struct sock *sk)
{
struct tls_context *ctx = tls_get_ctx(sk);
+ /* We are already sending pages, ignore notification */
+ if (ctx->in_tcp_sendpages)
+ return;
+
if (!sk->sk_write_pending && tls_is_pending_closed_record(ctx)) {
gfp_t sk_allocation = sk->sk_allocation;
int rc;
@@ -241,16 +249,13 @@ static void tls_sk_proto_close(struct sock *sk, long timeout)
struct tls_context *ctx = tls_get_ctx(sk);
long timeo = sock_sndtimeo(sk, 0);
void (*sk_proto_close)(struct sock *sk, long timeout);
+ bool free_ctx = false;
lock_sock(sk);
sk_proto_close = ctx->sk_proto_close;
- if (ctx->conf == TLS_HW_RECORD)
- goto skip_tx_cleanup;
-
- if (ctx->conf == TLS_BASE) {
- kfree(ctx);
- ctx = NULL;
+ if (ctx->conf == TLS_BASE || ctx->conf == TLS_HW_RECORD) {
+ free_ctx = true;
goto skip_tx_cleanup;
}
@@ -287,7 +292,7 @@ skip_tx_cleanup:
/* free ctx for TLS_HW_RECORD, used by tcp_set_state
* for sk->sk_prot->unhash [tls_hw_unhash]
*/
- if (ctx && ctx->conf == TLS_HW_RECORD)
+ if (free_ctx)
kfree(ctx);
}
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 71e79597f940..e1c93ce74e0f 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -680,7 +680,6 @@ static int decrypt_skb(struct sock *sk, struct sk_buff *skb,
struct scatterlist *sgin = &sgin_arr[0];
struct strp_msg *rxm = strp_msg(skb);
int ret, nsg = ARRAY_SIZE(sgin_arr);
- char aad_recv[TLS_AAD_SPACE_SIZE];
struct sk_buff *unused;
ret = skb_copy_bits(skb, rxm->offset + TLS_HEADER_SIZE,
@@ -698,13 +697,13 @@ static int decrypt_skb(struct sock *sk, struct sk_buff *skb,
}
sg_init_table(sgin, nsg);
- sg_set_buf(&sgin[0], aad_recv, sizeof(aad_recv));
+ sg_set_buf(&sgin[0], ctx->rx_aad_ciphertext, TLS_AAD_SPACE_SIZE);
nsg = skb_to_sgvec(skb, &sgin[1],
rxm->offset + tls_ctx->rx.prepend_size,
rxm->full_len - tls_ctx->rx.prepend_size);
- tls_make_aad(aad_recv,
+ tls_make_aad(ctx->rx_aad_ciphertext,
rxm->full_len - tls_ctx->rx.overhead_size,
tls_ctx->rx.rec_seq,
tls_ctx->rx.rec_seq_size,
@@ -803,12 +802,12 @@ int tls_sw_recvmsg(struct sock *sk,
if (to_copy <= len && page_count < MAX_SKB_FRAGS &&
likely(!(flags & MSG_PEEK))) {
struct scatterlist sgin[MAX_SKB_FRAGS + 1];
- char unused[21];
int pages = 0;
zc = true;
sg_init_table(sgin, MAX_SKB_FRAGS + 1);
- sg_set_buf(&sgin[0], unused, 13);
+ sg_set_buf(&sgin[0], ctx->rx_aad_plaintext,
+ TLS_AAD_SPACE_SIZE);
err = zerocopy_from_iter(sk, &msg->msg_iter,
to_copy, &pages,
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 68bb70a62afe..95b02a71fd47 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -638,9 +638,8 @@ static int unix_stream_connect(struct socket *, struct sockaddr *,
static int unix_socketpair(struct socket *, struct socket *);
static int unix_accept(struct socket *, struct socket *, int, bool);
static int unix_getname(struct socket *, struct sockaddr *, int);
-static __poll_t unix_poll(struct file *, struct socket *, poll_table *);
-static __poll_t unix_dgram_poll(struct file *, struct socket *,
- poll_table *);
+static __poll_t unix_poll_mask(struct socket *, __poll_t);
+static __poll_t unix_dgram_poll_mask(struct socket *, __poll_t);
static int unix_ioctl(struct socket *, unsigned int, unsigned long);
static int unix_shutdown(struct socket *, int);
static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t);
@@ -681,7 +680,7 @@ static const struct proto_ops unix_stream_ops = {
.socketpair = unix_socketpair,
.accept = unix_accept,
.getname = unix_getname,
- .poll = unix_poll,
+ .poll_mask = unix_poll_mask,
.ioctl = unix_ioctl,
.listen = unix_listen,
.shutdown = unix_shutdown,
@@ -704,7 +703,7 @@ static const struct proto_ops unix_dgram_ops = {
.socketpair = unix_socketpair,
.accept = sock_no_accept,
.getname = unix_getname,
- .poll = unix_dgram_poll,
+ .poll_mask = unix_dgram_poll_mask,
.ioctl = unix_ioctl,
.listen = sock_no_listen,
.shutdown = unix_shutdown,
@@ -726,7 +725,7 @@ static const struct proto_ops unix_seqpacket_ops = {
.socketpair = unix_socketpair,
.accept = unix_accept,
.getname = unix_getname,
- .poll = unix_dgram_poll,
+ .poll_mask = unix_dgram_poll_mask,
.ioctl = unix_ioctl,
.listen = unix_listen,
.shutdown = unix_shutdown,
@@ -2630,13 +2629,10 @@ static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
return err;
}
-static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wait)
+static __poll_t unix_poll_mask(struct socket *sock, __poll_t events)
{
struct sock *sk = sock->sk;
- __poll_t mask;
-
- sock_poll_wait(file, sk_sleep(sk), wait);
- mask = 0;
+ __poll_t mask = 0;
/* exceptional events? */
if (sk->sk_err)
@@ -2665,15 +2661,11 @@ static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wa
return mask;
}
-static __poll_t unix_dgram_poll(struct file *file, struct socket *sock,
- poll_table *wait)
+static __poll_t unix_dgram_poll_mask(struct socket *sock, __poll_t events)
{
struct sock *sk = sock->sk, *other;
- unsigned int writable;
- __poll_t mask;
-
- sock_poll_wait(file, sk_sleep(sk), wait);
- mask = 0;
+ int writable;
+ __poll_t mask = 0;
/* exceptional events? */
if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
@@ -2699,7 +2691,7 @@ static __poll_t unix_dgram_poll(struct file *file, struct socket *sock,
}
/* No write status requested, avoid expensive OUT tests. */
- if (!(poll_requested_events(wait) & (EPOLLWRBAND|EPOLLWRNORM|EPOLLOUT)))
+ if (!(events & (EPOLLWRBAND|EPOLLWRNORM|EPOLLOUT)))
return mask;
writable = unix_writable(sk);
@@ -2852,20 +2844,6 @@ static const struct seq_operations unix_seq_ops = {
.stop = unix_seq_stop,
.show = unix_seq_show,
};
-
-static int unix_seq_open(struct inode *inode, struct file *file)
-{
- return seq_open_net(inode, file, &unix_seq_ops,
- sizeof(struct seq_net_private));
-}
-
-static const struct file_operations unix_seq_fops = {
- .open = unix_seq_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release_net,
-};
-
#endif
static const struct net_proto_family unix_family_ops = {
@@ -2884,7 +2862,8 @@ static int __net_init unix_net_init(struct net *net)
goto out;
#ifdef CONFIG_PROC_FS
- if (!proc_create("unix", 0, net->proc_net, &unix_seq_fops)) {
+ if (!proc_create_net("unix", 0, net->proc_net, &unix_seq_ops,
+ sizeof(struct seq_net_private))) {
unix_sysctl_unregister(net);
goto out;
}
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index c1076c19b858..bb5d5fa68c35 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -850,18 +850,11 @@ static int vsock_shutdown(struct socket *sock, int mode)
return err;
}
-static __poll_t vsock_poll(struct file *file, struct socket *sock,
- poll_table *wait)
+static __poll_t vsock_poll_mask(struct socket *sock, __poll_t events)
{
- struct sock *sk;
- __poll_t mask;
- struct vsock_sock *vsk;
-
- sk = sock->sk;
- vsk = vsock_sk(sk);
-
- poll_wait(file, sk_sleep(sk), wait);
- mask = 0;
+ struct sock *sk = sock->sk;
+ struct vsock_sock *vsk = vsock_sk(sk);
+ __poll_t mask = 0;
if (sk->sk_err)
/* Signify that there has been an error on this socket. */
@@ -1091,7 +1084,7 @@ static const struct proto_ops vsock_dgram_ops = {
.socketpair = sock_no_socketpair,
.accept = sock_no_accept,
.getname = vsock_getname,
- .poll = vsock_poll,
+ .poll_mask = vsock_poll_mask,
.ioctl = sock_no_ioctl,
.listen = sock_no_listen,
.shutdown = vsock_shutdown,
@@ -1849,7 +1842,7 @@ static const struct proto_ops vsock_stream_ops = {
.socketpair = sock_no_socketpair,
.accept = vsock_accept,
.getname = vsock_getname,
- .poll = vsock_poll,
+ .poll_mask = vsock_poll_mask,
.ioctl = sock_no_ioctl,
.listen = vsock_listen,
.shutdown = vsock_shutdown,
diff --git a/net/wireless/core.c b/net/wireless/core.c
index a6f3cac8c640..c0fd8a85e7f7 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -95,6 +95,9 @@ static int cfg80211_dev_check_name(struct cfg80211_registered_device *rdev,
ASSERT_RTNL();
+ if (strlen(newname) > NL80211_WIPHY_NAME_MAXLEN)
+ return -EINVAL;
+
/* prohibit calling the thing phy%d when %d is not its number */
sscanf(newname, PHY_NAME "%d%n", &wiphy_idx, &taken);
if (taken == strlen(newname) && wiphy_idx != rdev->wiphy_idx) {
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index ff28f8feeb09..7c5135a92d76 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -9214,6 +9214,7 @@ static int nl80211_connect(struct sk_buff *skb, struct genl_info *info)
if (nla_get_flag(info->attrs[NL80211_ATTR_EXTERNAL_AUTH_SUPPORT])) {
if (!info->attrs[NL80211_ATTR_SOCKET_OWNER]) {
+ kzfree(connkeys);
GENL_SET_ERR_MSG(info,
"external auth requires connection ownership");
return -EINVAL;
@@ -15554,7 +15555,8 @@ void cfg80211_ft_event(struct net_device *netdev,
if (!ft_event->target_ap)
return;
- msg = nlmsg_new(100 + ft_event->ric_ies_len, GFP_KERNEL);
+ msg = nlmsg_new(100 + ft_event->ies_len + ft_event->ric_ies_len,
+ GFP_KERNEL);
if (!msg)
return;
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 16c7e4ef5820..5fcec5c94eb7 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -916,6 +916,9 @@ int reg_query_regdb_wmm(char *alpha2, int freq, u32 *dbptr,
const struct fwdb_header *hdr = regdb;
const struct fwdb_country *country;
+ if (!regdb)
+ return -ENODATA;
+
if (IS_ERR(regdb))
return PTR_ERR(regdb);
@@ -1026,6 +1029,7 @@ static int regdb_query_country(const struct fwdb_header *db,
if (!tmp_rd) {
kfree(regdom);
+ kfree(wmm_ptrs);
return -ENOMEM;
}
regdom = tmp_rd;
diff --git a/net/wireless/wext-proc.c b/net/wireless/wext-proc.c
index b4c464594a5e..cadcf8613af2 100644
--- a/net/wireless/wext-proc.c
+++ b/net/wireless/wext-proc.c
@@ -126,24 +126,11 @@ static const struct seq_operations wireless_seq_ops = {
.show = wireless_dev_seq_show,
};
-static int seq_open_wireless(struct inode *inode, struct file *file)
-{
- return seq_open_net(inode, file, &wireless_seq_ops,
- sizeof(struct seq_net_private));
-}
-
-static const struct file_operations wireless_seq_fops = {
- .open = seq_open_wireless,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release_net,
-};
-
int __net_init wext_proc_init(struct net *net)
{
/* Create /proc/net/wireless entry */
- if (!proc_create("wireless", 0444, net->proc_net,
- &wireless_seq_fops))
+ if (!proc_create_net("wireless", 0444, net->proc_net,
+ &wireless_seq_ops, sizeof(struct seq_net_private)))
return -ENOMEM;
return 0;
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index d49aa79b7997..f93365ae0fdd 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -1750,7 +1750,7 @@ static const struct proto_ops x25_proto_ops = {
.socketpair = sock_no_socketpair,
.accept = x25_accept,
.getname = x25_getname,
- .poll = datagram_poll,
+ .poll_mask = datagram_poll_mask,
.ioctl = x25_ioctl,
#ifdef CONFIG_COMPAT
.compat_ioctl = compat_x25_ioctl,
diff --git a/net/x25/x25_proc.c b/net/x25/x25_proc.c
index 64b415e93f6a..da52c9dc256c 100644
--- a/net/x25/x25_proc.c
+++ b/net/x25/x25_proc.c
@@ -171,57 +171,21 @@ static const struct seq_operations x25_seq_forward_ops = {
.show = x25_seq_forward_show,
};
-static int x25_seq_socket_open(struct inode *inode, struct file *file)
-{
- return seq_open(file, &x25_seq_socket_ops);
-}
-
-static int x25_seq_route_open(struct inode *inode, struct file *file)
-{
- return seq_open(file, &x25_seq_route_ops);
-}
-
-static int x25_seq_forward_open(struct inode *inode, struct file *file)
-{
- return seq_open(file, &x25_seq_forward_ops);
-}
-
-static const struct file_operations x25_seq_socket_fops = {
- .open = x25_seq_socket_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
-static const struct file_operations x25_seq_route_fops = {
- .open = x25_seq_route_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
-static const struct file_operations x25_seq_forward_fops = {
- .open = x25_seq_forward_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
int __init x25_proc_init(void)
{
if (!proc_mkdir("x25", init_net.proc_net))
return -ENOMEM;
- if (!proc_create("x25/route", 0444, init_net.proc_net,
- &x25_seq_route_fops))
+ if (!proc_create_seq("x25/route", 0444, init_net.proc_net,
+ &x25_seq_route_ops))
goto out;
- if (!proc_create("x25/socket", 0444, init_net.proc_net,
- &x25_seq_socket_fops))
+ if (!proc_create_seq("x25/socket", 0444, init_net.proc_net,
+ &x25_seq_socket_ops))
goto out;
- if (!proc_create("x25/forward", 0444, init_net.proc_net,
- &x25_seq_forward_fops))
+ if (!proc_create_seq("x25/forward", 0444, init_net.proc_net,
+ &x25_seq_forward_ops))
goto out;
return 0;
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 40b54cc64243..5f48251c1319 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1658,7 +1658,6 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,
trailer_len -= xdst_prev->u.dst.xfrm->props.trailer_len;
}
-out:
return &xdst0->u.dst;
put_states:
@@ -1667,8 +1666,8 @@ put_states:
free_dst:
if (xdst0)
dst_release_immediate(&xdst0->u.dst);
- xdst0 = ERR_PTR(err);
- goto out;
+
+ return ERR_PTR(err);
}
static int xfrm_expand_policies(const struct flowi *fl, u16 family,
diff --git a/net/xfrm/xfrm_proc.c b/net/xfrm/xfrm_proc.c
index ed06903cd84d..178318d2e120 100644
--- a/net/xfrm/xfrm_proc.c
+++ b/net/xfrm/xfrm_proc.c
@@ -65,22 +65,10 @@ static int xfrm_statistics_seq_show(struct seq_file *seq, void *v)
return 0;
}
-static int xfrm_statistics_seq_open(struct inode *inode, struct file *file)
-{
- return single_open_net(inode, file, xfrm_statistics_seq_show);
-}
-
-static const struct file_operations xfrm_statistics_seq_fops = {
- .open = xfrm_statistics_seq_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release_net,
-};
-
int __net_init xfrm_proc_init(struct net *net)
{
- if (!proc_create("xfrm_stat", 0444, net->proc_net,
- &xfrm_statistics_seq_fops))
+ if (!proc_create_net_single("xfrm_stat", 0444, net->proc_net,
+ xfrm_statistics_seq_show, NULL))
return -ENOMEM;
return 0;
}
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index f9d2f2233f09..6c177ae7a6d9 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -2175,6 +2175,12 @@ struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned int family)
return afinfo;
}
+void xfrm_flush_gc(void)
+{
+ flush_work(&xfrm_state_gc_work);
+}
+EXPORT_SYMBOL(xfrm_flush_gc);
+
/* Temporarily located here until net/xfrm/xfrm_tunnel.c is created */
void xfrm_state_delete_tunnel(struct xfrm_state *x)
{
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index 4d6a6edd4bf6..092947676143 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -255,7 +255,7 @@ $(obj)/tracex5_kern.o: $(obj)/syscall_nrs.h
$(obj)/%.o: $(src)/%.c
$(CLANG) $(NOSTDINC_FLAGS) $(LINUXINCLUDE) $(EXTRA_CFLAGS) -I$(obj) \
-I$(srctree)/tools/testing/selftests/bpf/ \
- -D__KERNEL__ -Wno-unused-value -Wno-pointer-sign \
+ -D__KERNEL__ -D__BPF_TRACING__ -Wno-unused-value -Wno-pointer-sign \
-D__TARGET_ARCH_$(ARCH) -Wno-compare-distinct-pointer-types \
-Wno-gnu-variable-sized-type-not-at-end \
-Wno-address-of-packed-member -Wno-tautological-compare \
diff --git a/samples/sockmap/Makefile b/samples/sockmap/Makefile
index 9bf2881bd11b..fa53f4d77834 100644
--- a/samples/sockmap/Makefile
+++ b/samples/sockmap/Makefile
@@ -65,11 +65,14 @@ $(src)/*.c: verify_target_bpf
# asm/sysreg.h - inline assembly used by it is incompatible with llvm.
# But, there is no easy way to fix it, so just exclude it since it is
# useless for BPF samples.
+#
+# -target bpf option required with SK_MSG programs, this is to ensure
+# reading 'void *' data types for data and data_end are __u64 reads.
$(obj)/%.o: $(src)/%.c
$(CLANG) $(NOSTDINC_FLAGS) $(LINUXINCLUDE) $(EXTRA_CFLAGS) -I$(obj) \
-D__KERNEL__ -D__ASM_SYSREG_H -Wno-unused-value -Wno-pointer-sign \
-Wno-compare-distinct-pointer-types \
-Wno-gnu-variable-sized-type-not-at-end \
-Wno-address-of-packed-member -Wno-tautological-compare \
- -Wno-unknown-warning-option \
- -O2 -emit-llvm -c $< -o -| $(LLC) -march=bpf -filetype=obj -o $@
+ -Wno-unknown-warning-option -O2 -target bpf \
+ -emit-llvm -c $< -o -| $(LLC) -march=bpf -filetype=obj -o $@
diff --git a/scripts/Makefile.gcc-plugins b/scripts/Makefile.gcc-plugins
index b2a95af7df18..7f5c86246138 100644
--- a/scripts/Makefile.gcc-plugins
+++ b/scripts/Makefile.gcc-plugins
@@ -14,7 +14,7 @@ ifdef CONFIG_GCC_PLUGINS
endif
ifdef CONFIG_GCC_PLUGIN_SANCOV
- ifeq ($(CFLAGS_KCOV),)
+ ifeq ($(strip $(CFLAGS_KCOV)),)
# It is needed because of the gcc-plugin.sh and gcc version checks.
gcc-plugin-$(CONFIG_GCC_PLUGIN_SANCOV) += sancov_plugin.so
diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib
index 07d07409f16f..5af34a2b0cd9 100644
--- a/scripts/Makefile.lib
+++ b/scripts/Makefile.lib
@@ -196,7 +196,7 @@ $(obj)/%.tab.c: $(src)/%.y FORCE
$(call if_changed,bison)
quiet_cmd_bison_h = YACC $@
- cmd_bison_h = bison -o/dev/null --defines=$@ -t -l $<
+ cmd_bison_h = $(YACC) -o/dev/null --defines=$@ -t -l $<
$(obj)/%.tab.h: $(src)/%.y FORCE
$(call if_changed,bison_h)
diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index e16d6713f236..2d42eb9cd1a5 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -5041,7 +5041,7 @@ sub process {
$tmp_stmt =~ s/\b(typeof|__typeof__|__builtin\w+|typecheck\s*\(\s*$Type\s*,|\#+)\s*\(*\s*$arg\s*\)*\b//g;
$tmp_stmt =~ s/\#+\s*$arg\b//g;
$tmp_stmt =~ s/\b$arg\s*\#\#//g;
- my $use_cnt = $tmp_stmt =~ s/\b$arg\b//g;
+ my $use_cnt = () = $tmp_stmt =~ /\b$arg\b/g;
if ($use_cnt > 1) {
CHK("MACRO_ARG_REUSE",
"Macro argument reuse '$arg' - possible side-effects?\n" . "$herectx");
diff --git a/scripts/documentation-file-ref-check b/scripts/documentation-file-ref-check
index bc1659900e89..2520bc14ffac 100755
--- a/scripts/documentation-file-ref-check
+++ b/scripts/documentation-file-ref-check
@@ -1,15 +1,116 @@
-#!/bin/sh
+#!/usr/bin/env perl
+# SPDX-License-Identifier: GPL-2.0
+#
# Treewide grep for references to files under Documentation, and report
# non-existing files in stderr.
-for f in $(git ls-files); do
- for ref in $(grep -ho "Documentation/[A-Za-z0-9_.,~/*+-]*" "$f"); do
- # presume trailing . and , are not part of the name
- ref=${ref%%[.,]}
-
- # use ls to handle wildcards
- if ! ls $ref >/dev/null 2>&1; then
- echo "$f: $ref" >&2
- fi
- done
-done
+use warnings;
+use strict;
+use Getopt::Long qw(:config no_auto_abbrev);
+
+my $scriptname = $0;
+$scriptname =~ s,.*/([^/]+/),$1,;
+
+# Parse arguments
+my $help = 0;
+my $fix = 0;
+
+GetOptions(
+ 'fix' => \$fix,
+ 'h|help|usage' => \$help,
+);
+
+if ($help != 0) {
+ print "$scriptname [--help] [--fix-rst]\n";
+ exit -1;
+}
+
+# Step 1: find broken references
+print "Finding broken references. This may take a while... " if ($fix);
+
+my %broken_ref;
+
+open IN, "git grep 'Documentation/'|"
+ or die "Failed to run git grep";
+while (<IN>) {
+ next if (!m/^([^:]+):(.*)/);
+
+ my $f = $1;
+ my $ln = $2;
+
+ # Makefiles contain nasty expressions to parse docs
+ next if ($f =~ m/Makefile/);
+ # Skip this script
+ next if ($f eq $scriptname);
+
+ if ($ln =~ m,\b(\S*)(Documentation/[A-Za-z0-9\_\.\,\~/\*+-]*),) {
+ my $prefix = $1;
+ my $ref = $2;
+ my $base = $2;
+
+ $ref =~ s/[\,\.]+$//;
+
+ my $fulref = "$prefix$ref";
+
+ $fulref =~ s/^(\<file|ref)://;
+ $fulref =~ s/^[\'\`]+//;
+ $fulref =~ s,^\$\(.*\)/,,;
+ $base =~ s,.*/,,;
+
+ # Remove URL false-positives
+ next if ($fulref =~ m/^http/);
+
+ # Check if exists, evaluating wildcards
+ next if (grep -e, glob("$ref $fulref"));
+
+ if ($fix) {
+ if (!($ref =~ m/(devicetree|scripts|Kconfig|Kbuild)/)) {
+ $broken_ref{$ref}++;
+ }
+ } else {
+ print STDERR "$f: $fulref\n";
+ }
+ }
+}
+
+exit 0 if (!$fix);
+
+# Step 2: Seek for file name alternatives
+print "Auto-fixing broken references. Please double-check the results\n";
+
+foreach my $ref (keys %broken_ref) {
+ my $new =$ref;
+
+ # get just the basename
+ $new =~ s,.*/,,;
+
+ # Seek for the same name on another place, as it may have been moved
+ my $f="";
+
+ $f = qx(find . -iname $new) if ($new);
+
+ # usual reason for breakage: file renamed to .rst
+ if (!$f) {
+ $new =~ s/\.txt$/.rst/;
+ $f=qx(find . -iname $new) if ($new);
+ }
+
+ my @find = split /\s+/, $f;
+
+ if (!$f) {
+ print STDERR "ERROR: Didn't find a replacement for $ref\n";
+ } elsif (scalar(@find) > 1) {
+ print STDERR "WARNING: Won't auto-replace, as found multiple files close to $ref:\n";
+ foreach my $j (@find) {
+ $j =~ s,^./,,;
+ print STDERR " $j\n";
+ }
+ } else {
+ $f = $find[0];
+ $f =~ s,^./,,;
+ print "INFO: Replacing $ref to $f\n";
+ foreach my $j (qx(git grep -l $ref)) {
+ qx(sed "s\@$ref\@$f\@g" -i $j);
+ }
+ }
+}
diff --git a/scripts/dtc/checks.c b/scripts/dtc/checks.c
index c07ba4da9e36..815eaf140ab5 100644
--- a/scripts/dtc/checks.c
+++ b/scripts/dtc/checks.c
@@ -787,10 +787,9 @@ static void check_pci_bridge(struct check *c, struct dt_info *dti, struct node *
FAIL(c, dti, node, "incorrect #size-cells for PCI bridge");
prop = get_property(node, "bus-range");
- if (!prop) {
- FAIL(c, dti, node, "missing bus-range for PCI bridge");
+ if (!prop)
return;
- }
+
if (prop->val.len != (sizeof(cell_t) * 2)) {
FAIL_PROP(c, dti, node, prop, "value must be 2 cells");
return;
diff --git a/scripts/extract_xc3028.pl b/scripts/extract_xc3028.pl
index 61d9b256c658..a1c51b7e4baf 100755
--- a/scripts/extract_xc3028.pl
+++ b/scripts/extract_xc3028.pl
@@ -1,6 +1,6 @@
#!/usr/bin/env perl
-# Copyright (c) Mauro Carvalho Chehab <mchehab@infradead.org>
+# Copyright (c) Mauro Carvalho Chehab <mchehab@kernel.org>
# Released under GPLv2
#
# In order to use, you need to:
diff --git a/scripts/faddr2line b/scripts/faddr2line
index 9e5735a4d3a5..1876a741087c 100755
--- a/scripts/faddr2line
+++ b/scripts/faddr2line
@@ -170,7 +170,10 @@ __faddr2line() {
echo "$file_lines" | while read -r line
do
echo $line
- eval $(echo $line | awk -F "[ :]" '{printf("n1=%d;n2=%d;f=%s",$NF-5, $NF+5, $(NF-1))}')
+ n=$(echo $line | sed 's/.*:\([0-9]\+\).*/\1/g')
+ n1=$[$n-5]
+ n2=$[$n+5]
+ f=$(echo $line | sed 's/.*at \(.\+\):.*/\1/g')
awk 'NR>=strtonum("'$n1'") && NR<=strtonum("'$n2'") {printf("%d\t%s\n", NR, $0)}' $f
done
diff --git a/scripts/genksyms/Makefile b/scripts/genksyms/Makefile
index ef0287e42957..03b7ce97de14 100644
--- a/scripts/genksyms/Makefile
+++ b/scripts/genksyms/Makefile
@@ -14,14 +14,14 @@ genksyms-objs := genksyms.o parse.tab.o lex.lex.o
# so that 'bison: not found' will be displayed if it is missing.
ifeq ($(findstring 1,$(KBUILD_ENABLE_EXTRA_GCC_CHECKS)),)
-quiet_cmd_bison_no_warn = $(quet_cmd_bison)
+quiet_cmd_bison_no_warn = $(quiet_cmd_bison)
cmd_bison_no_warn = $(YACC) --version >/dev/null; \
$(cmd_bison) 2>/dev/null
$(obj)/parse.tab.c: $(src)/parse.y FORCE
$(call if_changed,bison_no_warn)
-quiet_cmd_bison_h_no_warn = $(quet_cmd_bison_h)
+quiet_cmd_bison_h_no_warn = $(quiet_cmd_bison_h)
cmd_bison_h_no_warn = $(YACC) --version >/dev/null; \
$(cmd_bison_h) 2>/dev/null
diff --git a/scripts/mod/sumversion.c b/scripts/mod/sumversion.c
index 944418da9fe3..0f6dcb4011a8 100644
--- a/scripts/mod/sumversion.c
+++ b/scripts/mod/sumversion.c
@@ -330,14 +330,7 @@ static int parse_source_files(const char *objfile, struct md4_ctx *md)
goto out;
}
- /* There will be a line like so:
- deps_drivers/net/dummy.o := \
- drivers/net/dummy.c \
- $(wildcard include/config/net/fastroute.h) \
- include/linux/module.h \
-
- Sum all files in the same dir or subdirs.
- */
+ /* Sum all files in the same dir or subdirs. */
while ((line = get_next_line(&pos, file, flen)) != NULL) {
char* p = line;
diff --git a/scripts/spdxcheck.py b/scripts/spdxcheck.py
new file mode 100755
index 000000000000..7deaef297f52
--- /dev/null
+++ b/scripts/spdxcheck.py
@@ -0,0 +1,284 @@
+#!/usr/bin/env python
+# SPDX-License-Identifier: GPL-2.0
+# Copyright Thomas Gleixner <tglx@linutronix.de>
+
+from argparse import ArgumentParser
+from ply import lex, yacc
+import traceback
+import sys
+import git
+import re
+import os
+
+class ParserException(Exception):
+ def __init__(self, tok, txt):
+ self.tok = tok
+ self.txt = txt
+
+class SPDXException(Exception):
+ def __init__(self, el, txt):
+ self.el = el
+ self.txt = txt
+
+class SPDXdata(object):
+ def __init__(self):
+ self.license_files = 0
+ self.exception_files = 0
+ self.licenses = [ ]
+ self.exceptions = { }
+
+# Read the spdx data from the LICENSES directory
+def read_spdxdata(repo):
+
+ # The subdirectories of LICENSES in the kernel source
+ license_dirs = [ "preferred", "other", "exceptions" ]
+ lictree = repo.heads.master.commit.tree['LICENSES']
+
+ spdx = SPDXdata()
+
+ for d in license_dirs:
+ for el in lictree[d].traverse():
+ if not os.path.isfile(el.path):
+ continue
+
+ exception = None
+ for l in open(el.path).readlines():
+ if l.startswith('Valid-License-Identifier:'):
+ lid = l.split(':')[1].strip().upper()
+ if lid in spdx.licenses:
+ raise SPDXException(el, 'Duplicate License Identifier: %s' %lid)
+ else:
+ spdx.licenses.append(lid)
+
+ elif l.startswith('SPDX-Exception-Identifier:'):
+ exception = l.split(':')[1].strip().upper()
+ spdx.exceptions[exception] = []
+
+ elif l.startswith('SPDX-Licenses:'):
+ for lic in l.split(':')[1].upper().strip().replace(' ', '').replace('\t', '').split(','):
+ if not lic in spdx.licenses:
+ raise SPDXException(None, 'Exception %s missing license %s' %(ex, lic))
+ spdx.exceptions[exception].append(lic)
+
+ elif l.startswith("License-Text:"):
+ if exception:
+ if not len(spdx.exceptions[exception]):
+ raise SPDXException(el, 'Exception %s is missing SPDX-Licenses' %excid)
+ spdx.exception_files += 1
+ else:
+ spdx.license_files += 1
+ break
+ return spdx
+
+class id_parser(object):
+
+ reserved = [ 'AND', 'OR', 'WITH' ]
+ tokens = [ 'LPAR', 'RPAR', 'ID', 'EXC' ] + reserved
+
+ precedence = ( ('nonassoc', 'AND', 'OR'), )
+
+ t_ignore = ' \t'
+
+ def __init__(self, spdx):
+ self.spdx = spdx
+ self.lasttok = None
+ self.lastid = None
+ self.lexer = lex.lex(module = self, reflags = re.UNICODE)
+ # Initialize the parser. No debug file and no parser rules stored on disk
+ # The rules are small enough to be generated on the fly
+ self.parser = yacc.yacc(module = self, write_tables = False, debug = False)
+ self.lines_checked = 0
+ self.checked = 0
+ self.spdx_valid = 0
+ self.spdx_errors = 0
+ self.curline = 0
+ self.deepest = 0
+
+ # Validate License and Exception IDs
+ def validate(self, tok):
+ id = tok.value.upper()
+ if tok.type == 'ID':
+ if not id in self.spdx.licenses:
+ raise ParserException(tok, 'Invalid License ID')
+ self.lastid = id
+ elif tok.type == 'EXC':
+ if not self.spdx.exceptions.has_key(id):
+ raise ParserException(tok, 'Invalid Exception ID')
+ if self.lastid not in self.spdx.exceptions[id]:
+ raise ParserException(tok, 'Exception not valid for license %s' %self.lastid)
+ self.lastid = None
+ elif tok.type != 'WITH':
+ self.lastid = None
+
+ # Lexer functions
+ def t_RPAR(self, tok):
+ r'\)'
+ self.lasttok = tok.type
+ return tok
+
+ def t_LPAR(self, tok):
+ r'\('
+ self.lasttok = tok.type
+ return tok
+
+ def t_ID(self, tok):
+ r'[A-Za-z.0-9\-+]+'
+
+ if self.lasttok == 'EXC':
+ print(tok)
+ raise ParserException(tok, 'Missing parentheses')
+
+ tok.value = tok.value.strip()
+ val = tok.value.upper()
+
+ if val in self.reserved:
+ tok.type = val
+ elif self.lasttok == 'WITH':
+ tok.type = 'EXC'
+
+ self.lasttok = tok.type
+ self.validate(tok)
+ return tok
+
+ def t_error(self, tok):
+ raise ParserException(tok, 'Invalid token')
+
+ def p_expr(self, p):
+ '''expr : ID
+ | ID WITH EXC
+ | expr AND expr
+ | expr OR expr
+ | LPAR expr RPAR'''
+ pass
+
+ def p_error(self, p):
+ if not p:
+ raise ParserException(None, 'Unfinished license expression')
+ else:
+ raise ParserException(p, 'Syntax error')
+
+ def parse(self, expr):
+ self.lasttok = None
+ self.lastid = None
+ self.parser.parse(expr, lexer = self.lexer)
+
+ def parse_lines(self, fd, maxlines, fname):
+ self.checked += 1
+ self.curline = 0
+ try:
+ for line in fd:
+ self.curline += 1
+ if self.curline > maxlines:
+ break
+ self.lines_checked += 1
+ if line.find("SPDX-License-Identifier:") < 0:
+ continue
+ expr = line.split(':')[1].replace('*/', '').strip()
+ self.parse(expr)
+ self.spdx_valid += 1
+ #
+ # Should we check for more SPDX ids in the same file and
+ # complain if there are any?
+ #
+ break
+
+ except ParserException as pe:
+ if pe.tok:
+ col = line.find(expr) + pe.tok.lexpos
+ tok = pe.tok.value
+ sys.stdout.write('%s: %d:%d %s: %s\n' %(fname, self.curline, col, pe.txt, tok))
+ else:
+ sys.stdout.write('%s: %d:0 %s\n' %(fname, self.curline, col, pe.txt))
+ self.spdx_errors += 1
+
+def scan_git_tree(tree):
+ for el in tree.traverse():
+ # Exclude stuff which would make pointless noise
+ # FIXME: Put this somewhere more sensible
+ if el.path.startswith("LICENSES"):
+ continue
+ if el.path.find("license-rules.rst") >= 0:
+ continue
+ if el.path == 'scripts/checkpatch.pl':
+ continue
+ if not os.path.isfile(el.path):
+ continue
+ parser.parse_lines(open(el.path), args.maxlines, el.path)
+
+def scan_git_subtree(tree, path):
+ for p in path.strip('/').split('/'):
+ tree = tree[p]
+ scan_git_tree(tree)
+
+if __name__ == '__main__':
+
+ ap = ArgumentParser(description='SPDX expression checker')
+ ap.add_argument('path', nargs='*', help='Check path or file. If not given full git tree scan. For stdin use "-"')
+ ap.add_argument('-m', '--maxlines', type=int, default=15,
+ help='Maximum number of lines to scan in a file. Default 15')
+ ap.add_argument('-v', '--verbose', action='store_true', help='Verbose statistics output')
+ args = ap.parse_args()
+
+ # Sanity check path arguments
+ if '-' in args.path and len(args.path) > 1:
+ sys.stderr.write('stdin input "-" must be the only path argument\n')
+ sys.exit(1)
+
+ try:
+ # Use git to get the valid license expressions
+ repo = git.Repo(os.getcwd())
+ assert not repo.bare
+
+ # Initialize SPDX data
+ spdx = read_spdxdata(repo)
+
+ # Initilize the parser
+ parser = id_parser(spdx)
+
+ except SPDXException as se:
+ if se.el:
+ sys.stderr.write('%s: %s\n' %(se.el.path, se.txt))
+ else:
+ sys.stderr.write('%s\n' %se.txt)
+ sys.exit(1)
+
+ except Exception as ex:
+ sys.stderr.write('FAIL: %s\n' %ex)
+ sys.stderr.write('%s\n' %traceback.format_exc())
+ sys.exit(1)
+
+ try:
+ if len(args.path) and args.path[0] == '-':
+ parser.parse_lines(sys.stdin, args.maxlines, '-')
+ else:
+ if args.path:
+ for p in args.path:
+ if os.path.isfile(p):
+ parser.parse_lines(open(p), args.maxlines, p)
+ elif os.path.isdir(p):
+ scan_git_subtree(repo.head.reference.commit.tree, p)
+ else:
+ sys.stderr.write('path %s does not exist\n' %p)
+ sys.exit(1)
+ else:
+ # Full git tree scan
+ scan_git_tree(repo.head.commit.tree)
+
+ if args.verbose:
+ sys.stderr.write('\n')
+ sys.stderr.write('License files: %12d\n' %spdx.license_files)
+ sys.stderr.write('Exception files: %12d\n' %spdx.exception_files)
+ sys.stderr.write('License IDs %12d\n' %len(spdx.licenses))
+ sys.stderr.write('Exception IDs %12d\n' %len(spdx.exceptions))
+ sys.stderr.write('\n')
+ sys.stderr.write('Files checked: %12d\n' %parser.checked)
+ sys.stderr.write('Lines checked: %12d\n' %parser.lines_checked)
+ sys.stderr.write('Files with SPDX: %12d\n' %parser.spdx_valid)
+ sys.stderr.write('Files with errors: %12d\n' %parser.spdx_errors)
+
+ sys.exit(0)
+
+ except Exception as ex:
+ sys.stderr.write('FAIL: %s\n' %ex)
+ sys.stderr.write('%s\n' %traceback.format_exc())
+ sys.exit(1)
diff --git a/scripts/split-man.pl b/scripts/split-man.pl
index bfe16cbe42df..c3db607ee9ec 100755
--- a/scripts/split-man.pl
+++ b/scripts/split-man.pl
@@ -1,7 +1,7 @@
#!/usr/bin/perl
# SPDX-License-Identifier: GPL-2.0
#
-# Author: Mauro Carvalho Chehab <mchehab@s-opensource.com>
+# Author: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
#
# Produce manpages from kernel-doc.
# See Documentation/doc-guide/kernel-doc.rst for instructions
diff --git a/security/commoncap.c b/security/commoncap.c
index 1ce701fcb3f3..f4c33abd9959 100644
--- a/security/commoncap.c
+++ b/security/commoncap.c
@@ -919,6 +919,8 @@ int cap_bprm_set_creds(struct linux_binprm *bprm)
int cap_inode_setxattr(struct dentry *dentry, const char *name,
const void *value, size_t size, int flags)
{
+ struct user_namespace *user_ns = dentry->d_sb->s_user_ns;
+
/* Ignore non-security xattrs */
if (strncmp(name, XATTR_SECURITY_PREFIX,
sizeof(XATTR_SECURITY_PREFIX) - 1) != 0)
@@ -931,7 +933,7 @@ int cap_inode_setxattr(struct dentry *dentry, const char *name,
if (strcmp(name, XATTR_NAME_CAPS) == 0)
return 0;
- if (!capable(CAP_SYS_ADMIN))
+ if (!ns_capable(user_ns, CAP_SYS_ADMIN))
return -EPERM;
return 0;
}
@@ -949,6 +951,8 @@ int cap_inode_setxattr(struct dentry *dentry, const char *name,
*/
int cap_inode_removexattr(struct dentry *dentry, const char *name)
{
+ struct user_namespace *user_ns = dentry->d_sb->s_user_ns;
+
/* Ignore non-security xattrs */
if (strncmp(name, XATTR_SECURITY_PREFIX,
sizeof(XATTR_SECURITY_PREFIX) - 1) != 0)
@@ -964,7 +968,7 @@ int cap_inode_removexattr(struct dentry *dentry, const char *name)
return 0;
}
- if (!capable(CAP_SYS_ADMIN))
+ if (!ns_capable(user_ns, CAP_SYS_ADMIN))
return -EPERM;
return 0;
}
diff --git a/security/integrity/evm/evm_crypto.c b/security/integrity/evm/evm_crypto.c
index a46fba322340..facf9cdd577d 100644
--- a/security/integrity/evm/evm_crypto.c
+++ b/security/integrity/evm/evm_crypto.c
@@ -200,7 +200,8 @@ static int evm_calc_hmac_or_hash(struct dentry *dentry,
int size;
bool ima_present = false;
- if (!(inode->i_opflags & IOP_XATTR))
+ if (!(inode->i_opflags & IOP_XATTR) ||
+ inode->i_sb->s_user_ns != &init_user_ns)
return -EOPNOTSUPP;
desc = init_desc(type);
diff --git a/security/keys/proc.c b/security/keys/proc.c
index fbc4af5c6c9f..5af2934965d8 100644
--- a/security/keys/proc.c
+++ b/security/keys/proc.c
@@ -18,7 +18,6 @@
#include <asm/errno.h>
#include "internal.h"
-static int proc_keys_open(struct inode *inode, struct file *file);
static void *proc_keys_start(struct seq_file *p, loff_t *_pos);
static void *proc_keys_next(struct seq_file *p, void *v, loff_t *_pos);
static void proc_keys_stop(struct seq_file *p, void *v);
@@ -31,14 +30,6 @@ static const struct seq_operations proc_keys_ops = {
.show = proc_keys_show,
};
-static const struct file_operations proc_keys_fops = {
- .open = proc_keys_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
-static int proc_key_users_open(struct inode *inode, struct file *file);
static void *proc_key_users_start(struct seq_file *p, loff_t *_pos);
static void *proc_key_users_next(struct seq_file *p, void *v, loff_t *_pos);
static void proc_key_users_stop(struct seq_file *p, void *v);
@@ -51,13 +42,6 @@ static const struct seq_operations proc_key_users_ops = {
.show = proc_key_users_show,
};
-static const struct file_operations proc_key_users_fops = {
- .open = proc_key_users_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
/*
* Declare the /proc files.
*/
@@ -65,11 +49,11 @@ static int __init key_proc_init(void)
{
struct proc_dir_entry *p;
- p = proc_create("keys", 0, NULL, &proc_keys_fops);
+ p = proc_create_seq("keys", 0, NULL, &proc_keys_ops);
if (!p)
panic("Cannot create /proc/keys\n");
- p = proc_create("key-users", 0, NULL, &proc_key_users_fops);
+ p = proc_create_seq("key-users", 0, NULL, &proc_key_users_ops);
if (!p)
panic("Cannot create /proc/key-users\n");
@@ -96,11 +80,6 @@ static struct rb_node *key_serial_next(struct seq_file *p, struct rb_node *n)
return n;
}
-static int proc_keys_open(struct inode *inode, struct file *file)
-{
- return seq_open(file, &proc_keys_ops);
-}
-
static struct key *find_ge_key(struct seq_file *p, key_serial_t id)
{
struct user_namespace *user_ns = seq_user_ns(p);
@@ -293,15 +272,6 @@ static struct rb_node *key_user_first(struct user_namespace *user_ns, struct rb_
return __key_user_next(user_ns, n);
}
-/*
- * Implement "/proc/key-users" to provides a list of the key users and their
- * quotas.
- */
-static int proc_key_users_open(struct inode *inode, struct file *file)
-{
- return seq_open(file, &proc_key_users_ops);
-}
-
static void *proc_key_users_start(struct seq_file *p, loff_t *_pos)
__acquires(key_user_lock)
{
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 4cafe6a19167..99c4675952f7 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -274,11 +274,10 @@ static int inode_doinit_with_dentry(struct inode *inode, struct dentry *opt_dent
* Try reloading inode security labels that have been marked as invalid. The
* @may_sleep parameter indicates when sleeping and thus reloading labels is
* allowed; when set to false, returns -ECHILD when the label is
- * invalid. The @opt_dentry parameter should be set to a dentry of the inode;
- * when no dentry is available, set it to NULL instead.
+ * invalid. The @dentry parameter should be set to a dentry of the inode.
*/
static int __inode_security_revalidate(struct inode *inode,
- struct dentry *opt_dentry,
+ struct dentry *dentry,
bool may_sleep)
{
struct inode_security_struct *isec = inode->i_security;
@@ -295,7 +294,7 @@ static int __inode_security_revalidate(struct inode *inode,
* @opt_dentry is NULL and no dentry for this inode can be
* found; in that case, continue using the old label.
*/
- inode_doinit_with_dentry(inode, opt_dentry);
+ inode_doinit_with_dentry(inode, dentry);
}
return 0;
}
@@ -1568,8 +1567,15 @@ static int inode_doinit_with_dentry(struct inode *inode, struct dentry *opt_dent
/* Called from d_instantiate or d_splice_alias. */
dentry = dget(opt_dentry);
} else {
- /* Called from selinux_complete_init, try to find a dentry. */
+ /*
+ * Called from selinux_complete_init, try to find a dentry.
+ * Some filesystems really want a connected one, so try
+ * that first. We could split SECURITY_FS_USE_XATTR in
+ * two, depending upon that...
+ */
dentry = d_find_alias(inode);
+ if (!dentry)
+ dentry = d_find_any_alias(inode);
}
if (!dentry) {
/*
@@ -1674,14 +1680,19 @@ static int inode_doinit_with_dentry(struct inode *inode, struct dentry *opt_dent
if ((sbsec->flags & SE_SBGENFS) && !S_ISLNK(inode->i_mode)) {
/* We must have a dentry to determine the label on
* procfs inodes */
- if (opt_dentry)
+ if (opt_dentry) {
/* Called from d_instantiate or
* d_splice_alias. */
dentry = dget(opt_dentry);
- else
+ } else {
/* Called from selinux_complete_init, try to
- * find a dentry. */
+ * find a dentry. Some filesystems really want
+ * a connected one, so try that first.
+ */
dentry = d_find_alias(inode);
+ if (!dentry)
+ dentry = d_find_any_alias(inode);
+ }
/*
* This can be hit on boot when a file is accessed
* before the policy is loaded. When we load policy we
@@ -4576,6 +4587,7 @@ static int selinux_socket_post_create(struct socket *sock, int family,
static int selinux_socket_bind(struct socket *sock, struct sockaddr *address, int addrlen)
{
struct sock *sk = sock->sk;
+ struct sk_security_struct *sksec = sk->sk_security;
u16 family;
int err;
@@ -4587,11 +4599,11 @@ static int selinux_socket_bind(struct socket *sock, struct sockaddr *address, in
family = sk->sk_family;
if (family == PF_INET || family == PF_INET6) {
char *addrp;
- struct sk_security_struct *sksec = sk->sk_security;
struct common_audit_data ad;
struct lsm_network_audit net = {0,};
struct sockaddr_in *addr4 = NULL;
struct sockaddr_in6 *addr6 = NULL;
+ u16 family_sa = address->sa_family;
unsigned short snum;
u32 sid, node_perm;
@@ -4601,11 +4613,20 @@ static int selinux_socket_bind(struct socket *sock, struct sockaddr *address, in
* need to check address->sa_family as it is possible to have
* sk->sk_family = PF_INET6 with addr->sa_family = AF_INET.
*/
- switch (address->sa_family) {
+ switch (family_sa) {
+ case AF_UNSPEC:
case AF_INET:
if (addrlen < sizeof(struct sockaddr_in))
return -EINVAL;
addr4 = (struct sockaddr_in *)address;
+ if (family_sa == AF_UNSPEC) {
+ /* see __inet_bind(), we only want to allow
+ * AF_UNSPEC if the address is INADDR_ANY
+ */
+ if (addr4->sin_addr.s_addr != htonl(INADDR_ANY))
+ goto err_af;
+ family_sa = AF_INET;
+ }
snum = ntohs(addr4->sin_port);
addrp = (char *)&addr4->sin_addr.s_addr;
break;
@@ -4617,15 +4638,14 @@ static int selinux_socket_bind(struct socket *sock, struct sockaddr *address, in
addrp = (char *)&addr6->sin6_addr.s6_addr;
break;
default:
- /* Note that SCTP services expect -EINVAL, whereas
- * others expect -EAFNOSUPPORT.
- */
- if (sksec->sclass == SECCLASS_SCTP_SOCKET)
- return -EINVAL;
- else
- return -EAFNOSUPPORT;
+ goto err_af;
}
+ ad.type = LSM_AUDIT_DATA_NET;
+ ad.u.net = &net;
+ ad.u.net->sport = htons(snum);
+ ad.u.net->family = family_sa;
+
if (snum) {
int low, high;
@@ -4637,10 +4657,6 @@ static int selinux_socket_bind(struct socket *sock, struct sockaddr *address, in
snum, &sid);
if (err)
goto out;
- ad.type = LSM_AUDIT_DATA_NET;
- ad.u.net = &net;
- ad.u.net->sport = htons(snum);
- ad.u.net->family = family;
err = avc_has_perm(&selinux_state,
sksec->sid, sid,
sksec->sclass,
@@ -4672,16 +4688,11 @@ static int selinux_socket_bind(struct socket *sock, struct sockaddr *address, in
break;
}
- err = sel_netnode_sid(addrp, family, &sid);
+ err = sel_netnode_sid(addrp, family_sa, &sid);
if (err)
goto out;
- ad.type = LSM_AUDIT_DATA_NET;
- ad.u.net = &net;
- ad.u.net->sport = htons(snum);
- ad.u.net->family = family;
-
- if (address->sa_family == AF_INET)
+ if (family_sa == AF_INET)
ad.u.net->v4info.saddr = addr4->sin_addr.s_addr;
else
ad.u.net->v6info.saddr = addr6->sin6_addr;
@@ -4694,6 +4705,11 @@ static int selinux_socket_bind(struct socket *sock, struct sockaddr *address, in
}
out:
return err;
+err_af:
+ /* Note that SCTP services expect -EINVAL, others -EAFNOSUPPORT. */
+ if (sksec->sclass == SECCLASS_SCTP_SOCKET)
+ return -EINVAL;
+ return -EAFNOSUPPORT;
}
/* This supports connect(2) and SCTP connect services such as sctp_connectx(3)
@@ -4771,7 +4787,7 @@ static int selinux_socket_connect_helper(struct socket *sock,
ad.type = LSM_AUDIT_DATA_NET;
ad.u.net = &net;
ad.u.net->dport = htons(snum);
- ad.u.net->family = sk->sk_family;
+ ad.u.net->family = address->sa_family;
err = avc_has_perm(&selinux_state,
sksec->sid, sid, sksec->sclass, perm, &ad);
if (err)
@@ -5272,6 +5288,7 @@ static int selinux_sctp_bind_connect(struct sock *sk, int optname,
while (walk_size < addrlen) {
addr = addr_buf;
switch (addr->sa_family) {
+ case AF_UNSPEC:
case AF_INET:
len = sizeof(struct sockaddr_in);
break;
@@ -5279,7 +5296,7 @@ static int selinux_sctp_bind_connect(struct sock *sk, int optname,
len = sizeof(struct sockaddr_in6);
break;
default:
- return -EAFNOSUPPORT;
+ return -EINVAL;
}
err = -EINVAL;
diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c
index 8057e19dc15f..3ce225e3f142 100644
--- a/security/selinux/ss/services.c
+++ b/security/selinux/ss/services.c
@@ -1494,7 +1494,7 @@ static int security_context_to_sid_core(struct selinux_state *state,
scontext_len, &context, def_sid);
if (rc == -EINVAL && force) {
context.str = str;
- context.len = scontext_len;
+ context.len = strlen(str) + 1;
str = NULL;
} else if (rc)
goto out_unlock;
diff --git a/sound/core/control_compat.c b/sound/core/control_compat.c
index a848836a5de0..507fd5210c1c 100644
--- a/sound/core/control_compat.c
+++ b/sound/core/control_compat.c
@@ -396,8 +396,7 @@ static int snd_ctl_elem_add_compat(struct snd_ctl_file *file,
if (copy_from_user(&data->id, &data32->id, sizeof(data->id)) ||
copy_from_user(&data->type, &data32->type, 3 * sizeof(u32)))
goto error;
- if (get_user(data->owner, &data32->owner) ||
- get_user(data->type, &data32->type))
+ if (get_user(data->owner, &data32->owner))
goto error;
switch (data->type) {
case SNDRV_CTL_ELEM_TYPE_BOOLEAN:
diff --git a/sound/core/pcm_compat.c b/sound/core/pcm_compat.c
index 06d7c40af570..6491afbb5fd5 100644
--- a/sound/core/pcm_compat.c
+++ b/sound/core/pcm_compat.c
@@ -423,6 +423,8 @@ static int snd_pcm_ioctl_xfern_compat(struct snd_pcm_substream *substream,
return -ENOTTY;
if (substream->stream != dir)
return -EINVAL;
+ if (substream->runtime->status->state == SNDRV_PCM_STATE_OPEN)
+ return -EBADFD;
if ((ch = substream->runtime->channels) > 128)
return -EINVAL;
diff --git a/sound/core/seq/seq_virmidi.c b/sound/core/seq/seq_virmidi.c
index f48a4cd24ffc..289ae6bb81d9 100644
--- a/sound/core/seq/seq_virmidi.c
+++ b/sound/core/seq/seq_virmidi.c
@@ -174,12 +174,12 @@ static void snd_virmidi_output_trigger(struct snd_rawmidi_substream *substream,
}
return;
}
+ spin_lock_irqsave(&substream->runtime->lock, flags);
if (vmidi->event.type != SNDRV_SEQ_EVENT_NONE) {
if (snd_seq_kernel_client_dispatch(vmidi->client, &vmidi->event, in_atomic(), 0) < 0)
- return;
+ goto out;
vmidi->event.type = SNDRV_SEQ_EVENT_NONE;
}
- spin_lock_irqsave(&substream->runtime->lock, flags);
while (1) {
count = __snd_rawmidi_transmit_peek(substream, buf, sizeof(buf));
if (count <= 0)
diff --git a/sound/core/timer.c b/sound/core/timer.c
index dc87728c5b74..0ddcae495838 100644
--- a/sound/core/timer.c
+++ b/sound/core/timer.c
@@ -592,7 +592,7 @@ static int snd_timer_stop1(struct snd_timer_instance *timeri, bool stop)
else
timeri->flags |= SNDRV_TIMER_IFLG_PAUSED;
snd_timer_notify1(timeri, stop ? SNDRV_TIMER_EVENT_STOP :
- SNDRV_TIMER_EVENT_CONTINUE);
+ SNDRV_TIMER_EVENT_PAUSE);
unlock:
spin_unlock_irqrestore(&timer->lock, flags);
return result;
@@ -614,7 +614,7 @@ static int snd_timer_stop_slave(struct snd_timer_instance *timeri, bool stop)
list_del_init(&timeri->ack_list);
list_del_init(&timeri->active_list);
snd_timer_notify1(timeri, stop ? SNDRV_TIMER_EVENT_STOP :
- SNDRV_TIMER_EVENT_CONTINUE);
+ SNDRV_TIMER_EVENT_PAUSE);
spin_unlock(&timeri->timer->lock);
}
spin_unlock_irqrestore(&slave_active_lock, flags);
diff --git a/sound/drivers/aloop.c b/sound/drivers/aloop.c
index 58e349fc893f..eab7f594ebe7 100644
--- a/sound/drivers/aloop.c
+++ b/sound/drivers/aloop.c
@@ -831,9 +831,11 @@ static int loopback_rate_shift_get(struct snd_kcontrol *kcontrol,
{
struct loopback *loopback = snd_kcontrol_chip(kcontrol);
+ mutex_lock(&loopback->cable_lock);
ucontrol->value.integer.value[0] =
loopback->setup[kcontrol->id.subdevice]
[kcontrol->id.device].rate_shift;
+ mutex_unlock(&loopback->cable_lock);
return 0;
}
@@ -865,9 +867,11 @@ static int loopback_notify_get(struct snd_kcontrol *kcontrol,
{
struct loopback *loopback = snd_kcontrol_chip(kcontrol);
+ mutex_lock(&loopback->cable_lock);
ucontrol->value.integer.value[0] =
loopback->setup[kcontrol->id.subdevice]
[kcontrol->id.device].notify;
+ mutex_unlock(&loopback->cable_lock);
return 0;
}
@@ -879,12 +883,14 @@ static int loopback_notify_put(struct snd_kcontrol *kcontrol,
int change = 0;
val = ucontrol->value.integer.value[0] ? 1 : 0;
+ mutex_lock(&loopback->cable_lock);
if (val != loopback->setup[kcontrol->id.subdevice]
[kcontrol->id.device].notify) {
loopback->setup[kcontrol->id.subdevice]
[kcontrol->id.device].notify = val;
change = 1;
}
+ mutex_unlock(&loopback->cable_lock);
return change;
}
@@ -892,15 +898,18 @@ static int loopback_active_get(struct snd_kcontrol *kcontrol,
struct snd_ctl_elem_value *ucontrol)
{
struct loopback *loopback = snd_kcontrol_chip(kcontrol);
- struct loopback_cable *cable = loopback->cables
- [kcontrol->id.subdevice][kcontrol->id.device ^ 1];
+ struct loopback_cable *cable;
+
unsigned int val = 0;
+ mutex_lock(&loopback->cable_lock);
+ cable = loopback->cables[kcontrol->id.subdevice][kcontrol->id.device ^ 1];
if (cable != NULL) {
unsigned int running = cable->running ^ cable->pause;
val = (running & (1 << SNDRV_PCM_STREAM_PLAYBACK)) ? 1 : 0;
}
+ mutex_unlock(&loopback->cable_lock);
ucontrol->value.integer.value[0] = val;
return 0;
}
@@ -943,9 +952,11 @@ static int loopback_rate_get(struct snd_kcontrol *kcontrol,
{
struct loopback *loopback = snd_kcontrol_chip(kcontrol);
+ mutex_lock(&loopback->cable_lock);
ucontrol->value.integer.value[0] =
loopback->setup[kcontrol->id.subdevice]
[kcontrol->id.device].rate;
+ mutex_unlock(&loopback->cable_lock);
return 0;
}
@@ -965,9 +976,11 @@ static int loopback_channels_get(struct snd_kcontrol *kcontrol,
{
struct loopback *loopback = snd_kcontrol_chip(kcontrol);
+ mutex_lock(&loopback->cable_lock);
ucontrol->value.integer.value[0] =
loopback->setup[kcontrol->id.subdevice]
[kcontrol->id.device].channels;
+ mutex_unlock(&loopback->cable_lock);
return 0;
}
diff --git a/sound/firewire/amdtp-stream.c b/sound/firewire/amdtp-stream.c
index 4a1dc145327b..cb9acfe60f6a 100644
--- a/sound/firewire/amdtp-stream.c
+++ b/sound/firewire/amdtp-stream.c
@@ -773,8 +773,6 @@ static void amdtp_stream_first_callback(struct fw_iso_context *context,
u32 cycle;
unsigned int packets;
- s->max_payload_length = amdtp_stream_get_max_payload(s);
-
/*
* For in-stream, first packet has come.
* For out-stream, prepared to transmit first packet
@@ -879,6 +877,9 @@ int amdtp_stream_start(struct amdtp_stream *s, int channel, int speed)
amdtp_stream_update(s);
+ if (s->direction == AMDTP_IN_STREAM)
+ s->max_payload_length = amdtp_stream_get_max_payload(s);
+
if (s->flags & CIP_NO_HEADER)
s->tag = TAG_NO_CIP_HEADER;
else
diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
index b0c8c79848a9..a0c93b9c9a28 100644
--- a/sound/pci/hda/hda_intel.c
+++ b/sound/pci/hda/hda_intel.c
@@ -2210,6 +2210,8 @@ static struct snd_pci_quirk power_save_blacklist[] = {
SND_PCI_QUIRK(0x1849, 0x0c0c, "Asrock B85M-ITX", 0),
/* https://bugzilla.redhat.com/show_bug.cgi?id=1525104 */
SND_PCI_QUIRK(0x1043, 0x8733, "Asus Prime X370-Pro", 0),
+ /* https://bugzilla.redhat.com/show_bug.cgi?id=1572975 */
+ SND_PCI_QUIRK(0x17aa, 0x36a7, "Lenovo C50 All in one", 0),
/* https://bugzilla.kernel.org/show_bug.cgi?id=198611 */
SND_PCI_QUIRK(0x17aa, 0x2227, "Lenovo X1 Carbon 3rd Gen", 0),
{}
diff --git a/sound/pci/hda/hda_local.h b/sound/pci/hda/hda_local.h
index 321e78baa63c..9bd935216c18 100644
--- a/sound/pci/hda/hda_local.h
+++ b/sound/pci/hda/hda_local.h
@@ -622,8 +622,10 @@ snd_hda_check_power_state(struct hda_codec *codec, hda_nid_t nid,
{
return snd_hdac_check_power_state(&codec->core, nid, target_state);
}
-static inline bool snd_hda_sync_power_state(struct hda_codec *codec,
- hda_nid_t nid, unsigned int target_state)
+
+static inline unsigned int snd_hda_sync_power_state(struct hda_codec *codec,
+ hda_nid_t nid,
+ unsigned int target_state)
{
return snd_hdac_sync_power_state(&codec->core, nid, target_state);
}
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 8c238e51bb5a..01a6643fc7d4 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -2363,6 +2363,7 @@ static const struct snd_pci_quirk alc882_fixup_tbl[] = {
SND_PCI_QUIRK_VENDOR(0x1462, "MSI", ALC882_FIXUP_GPIO3),
SND_PCI_QUIRK(0x147b, 0x107a, "Abit AW9D-MAX", ALC882_FIXUP_ABIT_AW9D_MAX),
SND_PCI_QUIRK(0x1558, 0x9501, "Clevo P950HR", ALC1220_FIXUP_CLEVO_P950),
+ SND_PCI_QUIRK(0x1558, 0x95e2, "Clevo P950ER", ALC1220_FIXUP_CLEVO_P950),
SND_PCI_QUIRK_VENDOR(0x1558, "Clevo laptop", ALC882_FIXUP_EAPD),
SND_PCI_QUIRK(0x161f, 0x2054, "Medion laptop", ALC883_FIXUP_EAPD),
SND_PCI_QUIRK(0x17aa, 0x3a0d, "Lenovo Y530", ALC882_FIXUP_LENOVO_Y530),
@@ -3832,7 +3833,7 @@ static void alc280_fixup_hp_gpio4(struct hda_codec *codec,
}
}
-#if IS_REACHABLE(INPUT)
+#if IS_REACHABLE(CONFIG_INPUT)
static void gpio2_mic_hotkey_event(struct hda_codec *codec,
struct hda_jack_callback *event)
{
diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c
index 344d7b069d59..bb5ab7a7dfa5 100644
--- a/sound/usb/mixer.c
+++ b/sound/usb/mixer.c
@@ -967,6 +967,14 @@ static void volume_control_quirks(struct usb_mixer_elem_info *cval,
}
break;
+ case USB_ID(0x0d8c, 0x0103):
+ if (!strcmp(kctl->id.name, "PCM Playback Volume")) {
+ usb_audio_info(chip,
+ "set volume quirk for CM102-A+/102S+\n");
+ cval->min = -256;
+ }
+ break;
+
case USB_ID(0x0471, 0x0101):
case USB_ID(0x0471, 0x0104):
case USB_ID(0x0471, 0x0105):
diff --git a/sound/usb/stream.c b/sound/usb/stream.c
index 956be9f7c72a..5ed334575fc7 100644
--- a/sound/usb/stream.c
+++ b/sound/usb/stream.c
@@ -576,7 +576,7 @@ static int parse_uac_endpoint_attributes(struct snd_usb_audio *chip,
if (protocol == UAC_VERSION_1) {
attributes = csep->bmAttributes;
- } else {
+ } else if (protocol == UAC_VERSION_2) {
struct uac2_iso_endpoint_descriptor *csep2 =
(struct uac2_iso_endpoint_descriptor *) csep;
@@ -585,6 +585,13 @@ static int parse_uac_endpoint_attributes(struct snd_usb_audio *chip,
/* emulate the endpoint attributes of a v1 device */
if (csep2->bmControls & UAC2_CONTROL_PITCH)
attributes |= UAC_EP_CS_ATTR_PITCH_CONTROL;
+ } else { /* UAC_VERSION_3 */
+ struct uac3_iso_endpoint_descriptor *csep3 =
+ (struct uac3_iso_endpoint_descriptor *) csep;
+
+ /* emulate the endpoint attributes of a v1 device */
+ if (le32_to_cpu(csep3->bmControls) & UAC2_CONTROL_PITCH)
+ attributes |= UAC_EP_CS_ATTR_PITCH_CONTROL;
}
return attributes;
diff --git a/tools/arch/arm/include/uapi/asm/kvm.h b/tools/arch/arm/include/uapi/asm/kvm.h
index 2ba95d6fe852..caae4843cb70 100644
--- a/tools/arch/arm/include/uapi/asm/kvm.h
+++ b/tools/arch/arm/include/uapi/asm/kvm.h
@@ -195,6 +195,12 @@ struct kvm_arch_memory_slot {
#define KVM_REG_ARM_VFP_FPINST 0x1009
#define KVM_REG_ARM_VFP_FPINST2 0x100A
+/* KVM-as-firmware specific pseudo-registers */
+#define KVM_REG_ARM_FW (0x0014 << KVM_REG_ARM_COPROC_SHIFT)
+#define KVM_REG_ARM_FW_REG(r) (KVM_REG_ARM | KVM_REG_SIZE_U64 | \
+ KVM_REG_ARM_FW | ((r) & 0xffff))
+#define KVM_REG_ARM_PSCI_VERSION KVM_REG_ARM_FW_REG(0)
+
/* Device Control API: ARM VGIC */
#define KVM_DEV_ARM_VGIC_GRP_ADDR 0
#define KVM_DEV_ARM_VGIC_GRP_DIST_REGS 1
diff --git a/tools/arch/arm64/include/uapi/asm/kvm.h b/tools/arch/arm64/include/uapi/asm/kvm.h
index 9abbf3044654..04b3256f8e6d 100644
--- a/tools/arch/arm64/include/uapi/asm/kvm.h
+++ b/tools/arch/arm64/include/uapi/asm/kvm.h
@@ -206,6 +206,12 @@ struct kvm_arch_memory_slot {
#define KVM_REG_ARM_TIMER_CNT ARM64_SYS_REG(3, 3, 14, 3, 2)
#define KVM_REG_ARM_TIMER_CVAL ARM64_SYS_REG(3, 3, 14, 0, 2)
+/* KVM-as-firmware specific pseudo-registers */
+#define KVM_REG_ARM_FW (0x0014 << KVM_REG_ARM_COPROC_SHIFT)
+#define KVM_REG_ARM_FW_REG(r) (KVM_REG_ARM64 | KVM_REG_SIZE_U64 | \
+ KVM_REG_ARM_FW | ((r) & 0xffff))
+#define KVM_REG_ARM_PSCI_VERSION KVM_REG_ARM_FW_REG(0)
+
/* Device Control API: ARM VGIC */
#define KVM_DEV_ARM_VGIC_GRP_ADDR 0
#define KVM_DEV_ARM_VGIC_GRP_DIST_REGS 1
diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h
index d554c11e01ff..fb00a2fca990 100644
--- a/tools/arch/x86/include/asm/cpufeatures.h
+++ b/tools/arch/x86/include/asm/cpufeatures.h
@@ -198,7 +198,6 @@
#define X86_FEATURE_CAT_L2 ( 7*32+ 5) /* Cache Allocation Technology L2 */
#define X86_FEATURE_CDP_L3 ( 7*32+ 6) /* Code and Data Prioritization L3 */
#define X86_FEATURE_INVPCID_SINGLE ( 7*32+ 7) /* Effectively INVPCID && CR4.PCIDE=1 */
-
#define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */
#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
#define X86_FEATURE_SME ( 7*32+10) /* AMD Secure Memory Encryption */
@@ -207,13 +206,19 @@
#define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* "" AMD Retpoline mitigation for Spectre variant 2 */
#define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */
#define X86_FEATURE_CDP_L2 ( 7*32+15) /* Code and Data Prioritization L2 */
-
+#define X86_FEATURE_MSR_SPEC_CTRL ( 7*32+16) /* "" MSR SPEC_CTRL is implemented */
+#define X86_FEATURE_SSBD ( 7*32+17) /* Speculative Store Bypass Disable */
#define X86_FEATURE_MBA ( 7*32+18) /* Memory Bandwidth Allocation */
#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* "" Fill RSB on context switches */
#define X86_FEATURE_SEV ( 7*32+20) /* AMD Secure Encrypted Virtualization */
-
#define X86_FEATURE_USE_IBPB ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */
#define X86_FEATURE_USE_IBRS_FW ( 7*32+22) /* "" Use IBRS during runtime firmware calls */
+#define X86_FEATURE_SPEC_STORE_BYPASS_DISABLE ( 7*32+23) /* "" Disable Speculative Store Bypass. */
+#define X86_FEATURE_LS_CFG_SSBD ( 7*32+24) /* "" AMD SSBD implementation via LS_CFG MSR */
+#define X86_FEATURE_IBRS ( 7*32+25) /* Indirect Branch Restricted Speculation */
+#define X86_FEATURE_IBPB ( 7*32+26) /* Indirect Branch Prediction Barrier */
+#define X86_FEATURE_STIBP ( 7*32+27) /* Single Thread Indirect Branch Predictors */
+#define X86_FEATURE_ZEN ( 7*32+28) /* "" CPU is AMD family 0x17 (Zen) */
/* Virtualization flags: Linux defined, word 8 */
#define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */
@@ -274,9 +279,10 @@
#define X86_FEATURE_CLZERO (13*32+ 0) /* CLZERO instruction */
#define X86_FEATURE_IRPERF (13*32+ 1) /* Instructions Retired Count */
#define X86_FEATURE_XSAVEERPTR (13*32+ 2) /* Always save/restore FP error pointers */
-#define X86_FEATURE_IBPB (13*32+12) /* Indirect Branch Prediction Barrier */
-#define X86_FEATURE_IBRS (13*32+14) /* Indirect Branch Restricted Speculation */
-#define X86_FEATURE_STIBP (13*32+15) /* Single Thread Indirect Branch Predictors */
+#define X86_FEATURE_AMD_IBPB (13*32+12) /* "" Indirect Branch Prediction Barrier */
+#define X86_FEATURE_AMD_IBRS (13*32+14) /* "" Indirect Branch Restricted Speculation */
+#define X86_FEATURE_AMD_STIBP (13*32+15) /* "" Single Thread Indirect Branch Predictors */
+#define X86_FEATURE_VIRT_SSBD (13*32+25) /* Virtualized Speculative Store Bypass Disable */
/* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */
#define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */
@@ -320,6 +326,7 @@
#define X86_FEATURE_AVX512_VPOPCNTDQ (16*32+14) /* POPCNT for vectors of DW/QW */
#define X86_FEATURE_LA57 (16*32+16) /* 5-level page tables */
#define X86_FEATURE_RDPID (16*32+22) /* RDPID instruction */
+#define X86_FEATURE_CLDEMOTE (16*32+25) /* CLDEMOTE instruction */
/* AMD-defined CPU features, CPUID level 0x80000007 (EBX), word 17 */
#define X86_FEATURE_OVERFLOW_RECOV (17*32+ 0) /* MCA overflow recovery support */
@@ -333,6 +340,7 @@
#define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */
#define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */
#define X86_FEATURE_ARCH_CAPABILITIES (18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */
+#define X86_FEATURE_SPEC_CTRL_SSBD (18*32+31) /* "" Speculative Store Bypass Disable */
/*
* BUG word(s)
@@ -362,5 +370,6 @@
#define X86_BUG_CPU_MELTDOWN X86_BUG(14) /* CPU is affected by meltdown attack and needs kernel page table isolation */
#define X86_BUG_SPECTRE_V1 X86_BUG(15) /* CPU is affected by Spectre variant 1 attack with conditional branches */
#define X86_BUG_SPECTRE_V2 X86_BUG(16) /* CPU is affected by Spectre variant 2 attack with indirect branches */
+#define X86_BUG_SPEC_STORE_BYPASS X86_BUG(17) /* CPU is affected by speculative store bypass attack */
#endif /* _ASM_X86_CPUFEATURES_H */
diff --git a/tools/bpf/Makefile b/tools/bpf/Makefile
index 1ea545965ee3..53b60ad452f5 100644
--- a/tools/bpf/Makefile
+++ b/tools/bpf/Makefile
@@ -76,6 +76,8 @@ $(OUTPUT)bpf_asm: $(OUTPUT)bpf_asm.o $(OUTPUT)bpf_exp.yacc.o $(OUTPUT)bpf_exp.le
$(QUIET_LINK)$(CC) $(CFLAGS) -o $@ $^
$(OUTPUT)bpf_exp.lex.c: $(OUTPUT)bpf_exp.yacc.c
+$(OUTPUT)bpf_exp.yacc.o: $(OUTPUT)bpf_exp.yacc.c
+$(OUTPUT)bpf_exp.lex.o: $(OUTPUT)bpf_exp.lex.c
clean: bpftool_clean
$(call QUIET_CLEAN, bpf-progs)
diff --git a/tools/bpf/bpf_dbg.c b/tools/bpf/bpf_dbg.c
index 4f254bcc4423..61b9aa5d6415 100644
--- a/tools/bpf/bpf_dbg.c
+++ b/tools/bpf/bpf_dbg.c
@@ -1063,7 +1063,7 @@ static int cmd_load_pcap(char *file)
static int cmd_load(char *arg)
{
- char *subcmd, *cont, *tmp = strdup(arg);
+ char *subcmd, *cont = NULL, *tmp = strdup(arg);
int ret = CMD_OK;
subcmd = strtok_r(tmp, " ", &cont);
@@ -1073,7 +1073,10 @@ static int cmd_load(char *arg)
bpf_reset();
bpf_reset_breakpoints();
- ret = cmd_load_bpf(cont);
+ if (!cont)
+ ret = CMD_ERR;
+ else
+ ret = cmd_load_bpf(cont);
} else if (matches(subcmd, "pcap") == 0) {
ret = cmd_load_pcap(cont);
} else {
diff --git a/tools/include/linux/compiler-gcc.h b/tools/include/linux/compiler-gcc.h
index a3a4427441bf..70fe61295733 100644
--- a/tools/include/linux/compiler-gcc.h
+++ b/tools/include/linux/compiler-gcc.h
@@ -21,6 +21,9 @@
/* &a[0] degrades to a pointer: a different type from an array */
#define __must_be_array(a) BUILD_BUG_ON_ZERO(__same_type((a), &(a)[0]))
+#ifndef __pure
+#define __pure __attribute__((pure))
+#endif
#define noinline __attribute__((noinline))
#ifndef __packed
#define __packed __attribute__((packed))
diff --git a/tools/include/linux/spinlock.h b/tools/include/linux/spinlock.h
index b21b586b9854..1738c0391da4 100644
--- a/tools/include/linux/spinlock.h
+++ b/tools/include/linux/spinlock.h
@@ -6,8 +6,9 @@
#include <stdbool.h>
#define spinlock_t pthread_mutex_t
-#define DEFINE_SPINLOCK(x) pthread_mutex_t x = PTHREAD_MUTEX_INITIALIZER;
+#define DEFINE_SPINLOCK(x) pthread_mutex_t x = PTHREAD_MUTEX_INITIALIZER
#define __SPIN_LOCK_UNLOCKED(x) (pthread_mutex_t)PTHREAD_MUTEX_INITIALIZER
+#define spin_lock_init(x) pthread_mutex_init(x, NULL)
#define spin_lock_irqsave(x, f) (void)f, pthread_mutex_lock(x)
#define spin_unlock_irqrestore(x, f) (void)f, pthread_mutex_unlock(x)
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index c5ec89732a8d..8c317737ba3f 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -1017,6 +1017,7 @@ struct bpf_prog_info {
__aligned_u64 map_ids;
char name[BPF_OBJ_NAME_LEN];
__u32 ifindex;
+ __u32 :32;
__u64 netns_dev;
__u64 netns_ino;
} __attribute__((aligned(8)));
@@ -1030,6 +1031,7 @@ struct bpf_map_info {
__u32 map_flags;
char name[BPF_OBJ_NAME_LEN];
__u32 ifindex;
+ __u32 :32;
__u64 netns_dev;
__u64 netns_ino;
} __attribute__((aligned(8)));
diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h
index 1065006c9bf5..b02c41e53d56 100644
--- a/tools/include/uapi/linux/kvm.h
+++ b/tools/include/uapi/linux/kvm.h
@@ -676,6 +676,13 @@ struct kvm_ioeventfd {
__u8 pad[36];
};
+#define KVM_X86_DISABLE_EXITS_MWAIT (1 << 0)
+#define KVM_X86_DISABLE_EXITS_HTL (1 << 1)
+#define KVM_X86_DISABLE_EXITS_PAUSE (1 << 2)
+#define KVM_X86_DISABLE_VALID_EXITS (KVM_X86_DISABLE_EXITS_MWAIT | \
+ KVM_X86_DISABLE_EXITS_HTL | \
+ KVM_X86_DISABLE_EXITS_PAUSE)
+
/* for KVM_ENABLE_CAP */
struct kvm_enable_cap {
/* in */
diff --git a/tools/include/uapi/linux/prctl.h b/tools/include/uapi/linux/prctl.h
index af5f8c2df87a..db9f15f5db04 100644
--- a/tools/include/uapi/linux/prctl.h
+++ b/tools/include/uapi/linux/prctl.h
@@ -207,4 +207,16 @@ struct prctl_mm_map {
# define PR_SVE_VL_LEN_MASK 0xffff
# define PR_SVE_VL_INHERIT (1 << 17) /* inherit across exec */
+/* Per task speculation control */
+#define PR_GET_SPECULATION_CTRL 52
+#define PR_SET_SPECULATION_CTRL 53
+/* Speculation control variants */
+# define PR_SPEC_STORE_BYPASS 0
+/* Return and control values for PR_SET/GET_SPECULATION_CTRL */
+# define PR_SPEC_NOT_AFFECTED 0
+# define PR_SPEC_PRCTL (1UL << 0)
+# define PR_SPEC_ENABLE (1UL << 1)
+# define PR_SPEC_DISABLE (1UL << 2)
+# define PR_SPEC_FORCE_DISABLE (1UL << 3)
+
#endif /* _LINUX_PRCTL_H */
diff --git a/tools/lib/api/fs/tracing_path.c b/tools/lib/api/fs/tracing_path.c
index 7b7fd0b18551..120037496f77 100644
--- a/tools/lib/api/fs/tracing_path.c
+++ b/tools/lib/api/fs/tracing_path.c
@@ -13,11 +13,9 @@
#include "tracing_path.h"
-
-char tracing_mnt[PATH_MAX] = "/sys/kernel/debug";
-char tracing_path[PATH_MAX] = "/sys/kernel/debug/tracing";
-char tracing_events_path[PATH_MAX] = "/sys/kernel/debug/tracing/events";
-
+static char tracing_mnt[PATH_MAX] = "/sys/kernel/debug";
+static char tracing_path[PATH_MAX] = "/sys/kernel/debug/tracing";
+static char tracing_events_path[PATH_MAX] = "/sys/kernel/debug/tracing/events";
static void __tracing_path_set(const char *tracing, const char *mountpoint)
{
@@ -76,7 +74,7 @@ char *get_tracing_file(const char *name)
{
char *file;
- if (asprintf(&file, "%s/%s", tracing_path, name) < 0)
+ if (asprintf(&file, "%s/%s", tracing_path_mount(), name) < 0)
return NULL;
return file;
@@ -87,6 +85,34 @@ void put_tracing_file(char *file)
free(file);
}
+char *get_events_file(const char *name)
+{
+ char *file;
+
+ if (asprintf(&file, "%s/events/%s", tracing_path_mount(), name) < 0)
+ return NULL;
+
+ return file;
+}
+
+void put_events_file(char *file)
+{
+ free(file);
+}
+
+DIR *tracing_events__opendir(void)
+{
+ DIR *dir = NULL;
+ char *path = get_tracing_file("events");
+
+ if (path) {
+ dir = opendir(path);
+ put_events_file(path);
+ }
+
+ return dir;
+}
+
int tracing_path__strerror_open_tp(int err, char *buf, size_t size,
const char *sys, const char *name)
{
@@ -129,7 +155,7 @@ int tracing_path__strerror_open_tp(int err, char *buf, size_t size,
snprintf(buf, size,
"Error:\tNo permissions to read %s/%s\n"
"Hint:\tTry 'sudo mount -o remount,mode=755 %s'\n",
- tracing_events_path, filename, tracing_mnt);
+ tracing_events_path, filename, tracing_path_mount());
}
break;
default:
diff --git a/tools/lib/api/fs/tracing_path.h b/tools/lib/api/fs/tracing_path.h
index 0066f06cc381..a19136b086dc 100644
--- a/tools/lib/api/fs/tracing_path.h
+++ b/tools/lib/api/fs/tracing_path.h
@@ -3,9 +3,9 @@
#define __API_FS_TRACING_PATH_H
#include <linux/types.h>
+#include <dirent.h>
-extern char tracing_path[];
-extern char tracing_events_path[];
+DIR *tracing_events__opendir(void);
void tracing_path_set(const char *mountpoint);
const char *tracing_path_mount(void);
@@ -13,5 +13,10 @@ const char *tracing_path_mount(void);
char *get_tracing_file(const char *name);
void put_tracing_file(char *file);
+char *get_events_file(const char *name);
+void put_events_file(char *file);
+
+#define zput_events_file(ptr) ({ free(*ptr); *ptr = NULL; })
+
int tracing_path__strerror_open_tp(int err, char *buf, size_t size, const char *sys, const char *name);
#endif /* __API_FS_TRACING_PATH_H */
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 5922443063f0..0f9f06df49bc 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -2035,7 +2035,7 @@ int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
return -EINVAL;
obj = bpf_object__open(attr->file);
- if (IS_ERR(obj))
+ if (IS_ERR_OR_NULL(obj))
return -ENOENT;
bpf_object__for_each_program(prog, obj) {
diff --git a/tools/lib/symbol/kallsyms.c b/tools/lib/symbol/kallsyms.c
index 689b6a130dd7..96d830545bbb 100644
--- a/tools/lib/symbol/kallsyms.c
+++ b/tools/lib/symbol/kallsyms.c
@@ -10,6 +10,12 @@ u8 kallsyms2elf_type(char type)
return (type == 't' || type == 'w') ? STT_FUNC : STT_OBJECT;
}
+bool kallsyms__is_function(char symbol_type)
+{
+ symbol_type = toupper(symbol_type);
+ return symbol_type == 'T' || symbol_type == 'W';
+}
+
int kallsyms__parse(const char *filename, void *arg,
int (*process_symbol)(void *arg, const char *name,
char type, u64 start))
diff --git a/tools/lib/symbol/kallsyms.h b/tools/lib/symbol/kallsyms.h
index bc40101d72c1..72ab9870454b 100644
--- a/tools/lib/symbol/kallsyms.h
+++ b/tools/lib/symbol/kallsyms.h
@@ -20,6 +20,8 @@ static inline u8 kallsyms2elf_binding(char type)
u8 kallsyms2elf_type(char type);
+bool kallsyms__is_function(char symbol_type);
+
int kallsyms__parse(const char *filename, void *arg,
int (*process_symbol)(void *arg, const char *name,
char type, u64 start));
diff --git a/tools/memory-model/Documentation/cheatsheet.txt b/tools/memory-model/Documentation/cheatsheet.txt
index 956b1ae4aafb..33ba98d72b16 100644
--- a/tools/memory-model/Documentation/cheatsheet.txt
+++ b/tools/memory-model/Documentation/cheatsheet.txt
@@ -1,6 +1,6 @@
Prior Operation Subsequent Operation
--------------- ---------------------------
- C Self R W RWM Self R W DR DW RMW SV
+ C Self R W RMW Self R W DR DW RMW SV
-- ---- - - --- ---- - - -- -- --- --
Store, e.g., WRITE_ONCE() Y Y
@@ -14,7 +14,7 @@ smp_wmb() Y W Y Y W
smp_mb() & synchronize_rcu() CP Y Y Y Y Y Y Y Y
Successful full non-void RMW CP Y Y Y Y Y Y Y Y Y Y Y
smp_mb__before_atomic() CP Y Y Y a a a a Y
-smp_mb__after_atomic() CP a a Y Y Y Y Y
+smp_mb__after_atomic() CP a a Y Y Y Y Y Y
Key: C: Ordering is cumulative
@@ -26,4 +26,5 @@ Key: C: Ordering is cumulative
DR: Dependent read (address dependency)
DW: Dependent write (address, data, or control dependency)
RMW: Atomic read-modify-write operation
- SV Same-variable access
+ SELF: Orders self, as opposed to accesses before and/or after
+ SV: Orders later accesses to the same variable
diff --git a/tools/memory-model/Documentation/explanation.txt b/tools/memory-model/Documentation/explanation.txt
index a727c82bd434..1b09f3175a1f 100644
--- a/tools/memory-model/Documentation/explanation.txt
+++ b/tools/memory-model/Documentation/explanation.txt
@@ -27,7 +27,7 @@ Explanation of the Linux-Kernel Memory Consistency Model
19. AND THEN THERE WAS ALPHA
20. THE HAPPENS-BEFORE RELATION: hb
21. THE PROPAGATES-BEFORE RELATION: pb
- 22. RCU RELATIONS: link, gp-link, rscs-link, and rcu-path
+ 22. RCU RELATIONS: rcu-link, gp, rscs, rcu-fence, and rb
23. ODDS AND ENDS
@@ -1451,8 +1451,8 @@ they execute means that it cannot have cycles. This requirement is
the content of the LKMM's "propagation" axiom.
-RCU RELATIONS: link, gp-link, rscs-link, and rcu-path
------------------------------------------------------
+RCU RELATIONS: rcu-link, gp, rscs, rcu-fence, and rb
+----------------------------------------------------
RCU (Read-Copy-Update) is a powerful synchronization mechanism. It
rests on two concepts: grace periods and read-side critical sections.
@@ -1509,8 +1509,8 @@ y, which occurs before the end of the critical section, did not
propagate to P1 before the end of the grace period, violating the
Guarantee.
-In the kernel's implementations of RCU, the business about stores
-propagating to every CPU is realized by placing strong fences at
+In the kernel's implementations of RCU, the requirements for stores
+to propagate to every CPU are fulfilled by placing strong fences at
suitable places in the RCU-related code. Thus, if a critical section
starts before a grace period does then the critical section's CPU will
execute an smp_mb() fence after the end of the critical section and
@@ -1523,72 +1523,124 @@ executes.
What exactly do we mean by saying that a critical section "starts
before" or "ends after" a grace period? Some aspects of the meaning
are pretty obvious, as in the example above, but the details aren't
-entirely clear. The LKMM formalizes this notion by means of a
-relation with the unfortunately generic name "link". It is a very
-general relation; among other things, X ->link Z includes cases where
-X happens-before or is equal to some event Y which is equal to or
-comes before Z in the coherence order. Taking Y = Z, this says that
-X ->rfe Z implies X ->link Z, and taking Y = X, it says that X ->fr Z
-and X ->co Z each imply X ->link Z.
-
-The formal definition of the link relation is more than a little
+entirely clear. The LKMM formalizes this notion by means of the
+rcu-link relation. rcu-link encompasses a very general notion of
+"before": Among other things, X ->rcu-link Z includes cases where X
+happens-before or is equal to some event Y which is equal to or comes
+before Z in the coherence order. When Y = Z this says that X ->rfe Z
+implies X ->rcu-link Z. In addition, when Y = X it says that X ->fr Z
+and X ->co Z each imply X ->rcu-link Z.
+
+The formal definition of the rcu-link relation is more than a little
obscure, and we won't give it here. It is closely related to the pb
relation, and the details don't matter unless you want to comb through
a somewhat lengthy formal proof. Pretty much all you need to know
-about link is the information in the preceding paragraph.
-
-The LKMM goes on to define the gp-link and rscs-link relations. They
-bring grace periods and read-side critical sections into the picture,
-in the following way:
-
- E ->gp-link F means there is a synchronize_rcu() fence event S
- and an event X such that E ->po S, either S ->po X or S = X,
- and X ->link F. In other words, E and F are connected by a
- grace period followed by an instance of link.
-
- E ->rscs-link F means there is a critical section delimited by
- an rcu_read_lock() fence L and an rcu_read_unlock() fence U,
- and an event X such that E ->po U, either L ->po X or L = X,
- and X ->link F. Roughly speaking, this says that some event
- in the same critical section as E is connected by link to F.
-
-If we think of the link relation as standing for an extended "before",
-then E ->gp-link F says that E executes before a grace period which
-ends before F executes. (In fact it says more than this, because it
-includes cases where E executes before a grace period and some store
-propagates to F's CPU before F executes and doesn't propagate to some
-other CPU until after the grace period ends.) Similarly,
-E ->rscs-link F says that E is part of (or before the start of) a
-critical section which starts before F executes.
+about rcu-link is the information in the preceding paragraph.
+
+The LKMM also defines the gp and rscs relations. They bring grace
+periods and read-side critical sections into the picture, in the
+following way:
+
+ E ->gp F means there is a synchronize_rcu() fence event S such
+ that E ->po S and either S ->po F or S = F. In simple terms,
+ there is a grace period po-between E and F.
+
+ E ->rscs F means there is a critical section delimited by an
+ rcu_read_lock() fence L and an rcu_read_unlock() fence U, such
+ that E ->po U and either L ->po F or L = F. You can think of
+ this as saying that E and F are in the same critical section
+ (in fact, it also allows E to be po-before the start of the
+ critical section and F to be po-after the end).
+
+If we think of the rcu-link relation as standing for an extended
+"before", then X ->gp Y ->rcu-link Z says that X executes before a
+grace period which ends before Z executes. (In fact it covers more
+than this, because it also includes cases where X executes before a
+grace period and some store propagates to Z's CPU before Z executes
+but doesn't propagate to some other CPU until after the grace period
+ends.) Similarly, X ->rscs Y ->rcu-link Z says that X is part of (or
+before the start of) a critical section which starts before Z
+executes.
+
+The LKMM goes on to define the rcu-fence relation as a sequence of gp
+and rscs links separated by rcu-link links, in which the number of gp
+links is >= the number of rscs links. For example:
+
+ X ->gp Y ->rcu-link Z ->rscs T ->rcu-link U ->gp V
+
+would imply that X ->rcu-fence V, because this sequence contains two
+gp links and only one rscs link. (It also implies that X ->rcu-fence T
+and Z ->rcu-fence V.) On the other hand:
+
+ X ->rscs Y ->rcu-link Z ->rscs T ->rcu-link U ->gp V
+
+does not imply X ->rcu-fence V, because the sequence contains only
+one gp link but two rscs links.
+
+The rcu-fence relation is important because the Grace Period Guarantee
+means that rcu-fence acts kind of like a strong fence. In particular,
+if W is a write and we have W ->rcu-fence Z, the Guarantee says that W
+will propagate to every CPU before Z executes.
+
+To prove this in full generality requires some intellectual effort.
+We'll consider just a very simple case:
+
+ W ->gp X ->rcu-link Y ->rscs Z.
+
+This formula means that there is a grace period G and a critical
+section C such that:
+
+ 1. W is po-before G;
+
+ 2. X is equal to or po-after G;
+
+ 3. X comes "before" Y in some sense;
+
+ 4. Y is po-before the end of C;
+
+ 5. Z is equal to or po-after the start of C.
+
+From 2 - 4 we deduce that the grace period G ends before the critical
+section C. Then the second part of the Grace Period Guarantee says
+not only that G starts before C does, but also that W (which executes
+on G's CPU before G starts) must propagate to every CPU before C
+starts. In particular, W propagates to every CPU before Z executes
+(or finishes executing, in the case where Z is equal to the
+rcu_read_lock() fence event which starts C.) This sort of reasoning
+can be expanded to handle all the situations covered by rcu-fence.
+
+Finally, the LKMM defines the RCU-before (rb) relation in terms of
+rcu-fence. This is done in essentially the same way as the pb
+relation was defined in terms of strong-fence. We will omit the
+details; the end result is that E ->rb F implies E must execute before
+F, just as E ->pb F does (and for much the same reasons).
Putting this all together, the LKMM expresses the Grace Period
-Guarantee by requiring that there are no cycles consisting of gp-link
-and rscs-link connections in which the number of gp-link instances is
->= the number of rscs-link instances. It does this by defining the
-rcu-path relation to link events E and F whenever it is possible to
-pass from E to F by a sequence of gp-link and rscs-link connections
-with at least as many of the former as the latter. The LKMM's "rcu"
-axiom then says that there are no events E such that E ->rcu-path E.
-
-Justifying this axiom takes some intellectual effort, but it is in
-fact a valid formalization of the Grace Period Guarantee. We won't
-attempt to go through the detailed argument, but the following
-analysis gives a taste of what is involved. Suppose we have a
-violation of the first part of the Guarantee: A critical section
-starts before a grace period, and some store propagates to the
-critical section's CPU before the end of the critical section but
-doesn't propagate to some other CPU until after the end of the grace
-period.
+Guarantee by requiring that the rb relation does not contain a cycle.
+Equivalently, this "rcu" axiom requires that there are no events E and
+F with E ->rcu-link F ->rcu-fence E. Or to put it a third way, the
+axiom requires that there are no cycles consisting of gp and rscs
+alternating with rcu-link, where the number of gp links is >= the
+number of rscs links.
+
+Justifying the axiom isn't easy, but it is in fact a valid
+formalization of the Grace Period Guarantee. We won't attempt to go
+through the detailed argument, but the following analysis gives a
+taste of what is involved. Suppose we have a violation of the first
+part of the Guarantee: A critical section starts before a grace
+period, and some store propagates to the critical section's CPU before
+the end of the critical section but doesn't propagate to some other
+CPU until after the end of the grace period.
Putting symbols to these ideas, let L and U be the rcu_read_lock() and
rcu_read_unlock() fence events delimiting the critical section in
question, and let S be the synchronize_rcu() fence event for the grace
period. Saying that the critical section starts before S means there
are events E and F where E is po-after L (which marks the start of the
-critical section), E is "before" F in the sense of the link relation,
-and F is po-before the grace period S:
+critical section), E is "before" F in the sense of the rcu-link
+relation, and F is po-before the grace period S:
- L ->po E ->link F ->po S.
+ L ->po E ->rcu-link F ->po S.
Let W be the store mentioned above, let Z come before the end of the
critical section and witness that W propagates to the critical
@@ -1600,16 +1652,19 @@ some event X which is po-after S. Symbolically, this amounts to:
The fr link from Y to W indicates that W has not propagated to Y's CPU
at the time that Y executes. From this, it can be shown (see the
-discussion of the link relation earlier) that X and Z are connected by
-link, yielding:
+discussion of the rcu-link relation earlier) that X and Z are related
+by rcu-link, yielding:
+
+ S ->po X ->rcu-link Z ->po U.
+
+The formulas say that S is po-between F and X, hence F ->gp X. They
+also say that Z comes before the end of the critical section and E
+comes after its start, hence Z ->rscs E. From all this we obtain:
- S ->po X ->link Z ->po U.
+ F ->gp X ->rcu-link Z ->rscs E ->rcu-link F,
-These formulas say that S is po-between F and X, hence F ->gp-link Z
-via X. They also say that Z comes before the end of the critical
-section and E comes after its start, hence Z ->rscs-link F via E. But
-now we have a forbidden cycle: F ->gp-link Z ->rscs-link F. Thus the
-"rcu" axiom rules out this violation of the Grace Period Guarantee.
+a forbidden cycle. Thus the "rcu" axiom rules out this violation of
+the Grace Period Guarantee.
For something a little more down-to-earth, let's see how the axiom
works out in practice. Consider the RCU code example from above, this
@@ -1635,18 +1690,18 @@ time with statement labels added to the memory access instructions:
}
-If r2 = 0 at the end then P0's store at X overwrites the value
-that P1's load at Z reads from, so we have Z ->fre X and thus
-Z ->link X. In addition, there is a synchronize_rcu() between Y and
-Z, so therefore we have Y ->gp-link X.
+If r2 = 0 at the end then P0's store at X overwrites the value that
+P1's load at Z reads from, so we have Z ->fre X and thus Z ->rcu-link X.
+In addition, there is a synchronize_rcu() between Y and Z, so therefore
+we have Y ->gp Z.
If r1 = 1 at the end then P1's load at Y reads from P0's store at W,
-so we have W ->link Y. In addition, W and X are in the same critical
-section, so therefore we have X ->rscs-link Y.
+so we have W ->rcu-link Y. In addition, W and X are in the same critical
+section, so therefore we have X ->rscs W.
-This gives us a cycle, Y ->gp-link X ->rscs-link Y, with one gp-link
-and one rscs-link, violating the "rcu" axiom. Hence the outcome is
-not allowed by the LKMM, as we would expect.
+Then X ->rscs W ->rcu-link Y ->gp Z ->rcu-link X is a forbidden cycle,
+violating the "rcu" axiom. Hence the outcome is not allowed by the
+LKMM, as we would expect.
For contrast, let's see what can happen in a more complicated example:
@@ -1682,15 +1737,11 @@ For contrast, let's see what can happen in a more complicated example:
}
If r0 = r1 = r2 = 1 at the end, then similar reasoning to before shows
-that W ->rscs-link Y via X, Y ->gp-link U via Z, and U ->rscs-link W
-via V. And just as before, this gives a cycle:
-
- W ->rscs-link Y ->gp-link U ->rscs-link W.
-
-However, this cycle has fewer gp-link instances than rscs-link
-instances, and consequently the outcome is not forbidden by the LKMM.
-The following instruction timing diagram shows how it might actually
-occur:
+that W ->rscs X ->rcu-link Y ->gp Z ->rcu-link U ->rscs V ->rcu-link W.
+However this cycle is not forbidden, because the sequence of relations
+contains fewer instances of gp (one) than of rscs (two). Consequently
+the outcome is allowed by the LKMM. The following instruction timing
+diagram shows how it might actually occur:
P0 P1 P2
-------------------- -------------------- --------------------
diff --git a/tools/memory-model/Documentation/references.txt b/tools/memory-model/Documentation/references.txt
index ba2e34c2ec3f..b177f3e4a614 100644
--- a/tools/memory-model/Documentation/references.txt
+++ b/tools/memory-model/Documentation/references.txt
@@ -63,15 +63,22 @@ o Shaked Flur, Susmit Sarkar, Christopher Pulte, Kyndylan Nienhuis,
Principles of Programming Languages (POPL 2017). ACM, New York,
NY, USA, 429–442.
+o Christopher Pulte, Shaked Flur, Will Deacon, Jon French,
+ Susmit Sarkar, and Peter Sewell. 2018. "Simplifying ARM concurrency:
+ multicopy-atomic axiomatic and operational models for ARMv8". In
+ Proceedings of the ACM on Programming Languages, Volume 2, Issue
+ POPL, Article No. 19. ACM, New York, NY, USA.
+
Linux-kernel memory model
=========================
-o Andrea Parri, Alan Stern, Luc Maranget, Paul E. McKenney,
- and Jade Alglave. 2017. "A formal model of
- Linux-kernel memory ordering - companion webpage".
- http://moscova.inria.fr/∼maranget/cats7/linux/. (2017). [Online;
- accessed 30-January-2017].
+o Jade Alglave, Luc Maranget, Paul E. McKenney, Andrea Parri, and
+ Alan Stern. 2018. "Frightening small children and disconcerting
+ grown-ups: Concurrency in the Linux kernel". In Proceedings of
+ the 23rd International Conference on Architectural Support for
+ Programming Languages and Operating Systems (ASPLOS 2018). ACM,
+ New York, NY, USA, 405-418. Webpage: http://diy.inria.fr/linux/.
o Jade Alglave, Luc Maranget, Paul E. McKenney, Andrea Parri, and
Alan Stern. 2017. "A formal kernel memory-ordering model (part 1)"
diff --git a/tools/memory-model/README b/tools/memory-model/README
index 0b3a5f3c9ccd..734f7feaa5dc 100644
--- a/tools/memory-model/README
+++ b/tools/memory-model/README
@@ -20,7 +20,7 @@ that litmus test to be exercised within the Linux kernel.
REQUIREMENTS
============
-Version 7.48 of the "herd7" and "klitmus7" tools must be downloaded
+Version 7.49 of the "herd7" and "klitmus7" tools must be downloaded
separately:
https://github.com/herd/herdtools7
diff --git a/tools/memory-model/linux-kernel.bell b/tools/memory-model/linux-kernel.bell
index 432c7cf71b23..64f5740e0e75 100644
--- a/tools/memory-model/linux-kernel.bell
+++ b/tools/memory-model/linux-kernel.bell
@@ -5,10 +5,10 @@
* Copyright (C) 2017 Alan Stern <stern@rowland.harvard.edu>,
* Andrea Parri <parri.andrea@gmail.com>
*
- * An earlier version of this file appears in the companion webpage for
+ * An earlier version of this file appeared in the companion webpage for
* "Frightening small children and disconcerting grown-ups: Concurrency
* in the Linux kernel" by Alglave, Maranget, McKenney, Parri, and Stern,
- * which is to appear in ASPLOS 2018.
+ * which appeared in ASPLOS 2018.
*)
"Linux-kernel memory consistency model"
diff --git a/tools/memory-model/linux-kernel.cat b/tools/memory-model/linux-kernel.cat
index df97db03b6c2..59b5cbe6b624 100644
--- a/tools/memory-model/linux-kernel.cat
+++ b/tools/memory-model/linux-kernel.cat
@@ -5,10 +5,10 @@
* Copyright (C) 2017 Alan Stern <stern@rowland.harvard.edu>,
* Andrea Parri <parri.andrea@gmail.com>
*
- * An earlier version of this file appears in the companion webpage for
+ * An earlier version of this file appeared in the companion webpage for
* "Frightening small children and disconcerting grown-ups: Concurrency
* in the Linux kernel" by Alglave, Maranget, McKenney, Parri, and Stern,
- * which is to appear in ASPLOS 2018.
+ * which appeared in ASPLOS 2018.
*)
"Linux-kernel memory consistency model"
@@ -100,22 +100,29 @@ let rscs = po ; crit^-1 ; po?
* one but two non-rf relations, but only in conjunction with an RCU
* read-side critical section.
*)
-let link = hb* ; pb* ; prop
+let rcu-link = hb* ; pb* ; prop
-(* Chains that affect the RCU grace-period guarantee *)
-let gp-link = gp ; link
-let rscs-link = rscs ; link
+(*
+ * Any sequence containing at least as many grace periods as RCU read-side
+ * critical sections (joined by rcu-link) acts as a generalized strong fence.
+ *)
+let rec rcu-fence = gp |
+ (gp ; rcu-link ; rscs) |
+ (rscs ; rcu-link ; gp) |
+ (gp ; rcu-link ; rcu-fence ; rcu-link ; rscs) |
+ (rscs ; rcu-link ; rcu-fence ; rcu-link ; gp) |
+ (rcu-fence ; rcu-link ; rcu-fence)
+
+(* rb orders instructions just as pb does *)
+let rb = prop ; rcu-fence ; hb* ; pb*
+
+irreflexive rb as rcu
(*
- * A cycle containing at least as many grace periods as RCU read-side
- * critical sections is forbidden.
+ * The happens-before, propagation, and rcu constraints are all
+ * expressions of temporal ordering. They could be replaced by
+ * a single constraint on an "executes-before" relation, xb:
+ *
+ * let xb = hb | pb | rb
+ * acyclic xb as executes-before
*)
-let rec rcu-path =
- gp-link |
- (gp-link ; rscs-link) |
- (rscs-link ; gp-link) |
- (rcu-path ; rcu-path) |
- (gp-link ; rcu-path ; rscs-link) |
- (rscs-link ; rcu-path ; gp-link)
-
-irreflexive rcu-path as rcu
diff --git a/tools/memory-model/linux-kernel.def b/tools/memory-model/linux-kernel.def
index 397e4e67e8c8..6fa3eb28d40b 100644
--- a/tools/memory-model/linux-kernel.def
+++ b/tools/memory-model/linux-kernel.def
@@ -1,9 +1,9 @@
// SPDX-License-Identifier: GPL-2.0+
//
-// An earlier version of this file appears in the companion webpage for
+// An earlier version of this file appeared in the companion webpage for
// "Frightening small children and disconcerting grown-ups: Concurrency
// in the Linux kernel" by Alglave, Maranget, McKenney, Parri, and Stern,
-// which is to appear in ASPLOS 2018.
+// which appeared in ASPLOS 2018.
// ONCE
READ_ONCE(X) __load{once}(X)
@@ -14,14 +14,15 @@ smp_store_release(X,V) { __store{release}(*X,V); }
smp_load_acquire(X) __load{acquire}(*X)
rcu_assign_pointer(X,V) { __store{release}(X,V); }
rcu_dereference(X) __load{once}(X)
+smp_store_mb(X,V) { __store{once}(X,V); __fence{mb}; }
// Fences
-smp_mb() { __fence{mb} ; }
-smp_rmb() { __fence{rmb} ; }
-smp_wmb() { __fence{wmb} ; }
-smp_mb__before_atomic() { __fence{before-atomic} ; }
-smp_mb__after_atomic() { __fence{after-atomic} ; }
-smp_mb__after_spinlock() { __fence{after-spinlock} ; }
+smp_mb() { __fence{mb}; }
+smp_rmb() { __fence{rmb}; }
+smp_wmb() { __fence{wmb}; }
+smp_mb__before_atomic() { __fence{before-atomic}; }
+smp_mb__after_atomic() { __fence{after-atomic}; }
+smp_mb__after_spinlock() { __fence{after-spinlock}; }
// Exchange
xchg(X,V) __xchg{mb}(X,V)
@@ -34,26 +35,27 @@ cmpxchg_acquire(X,V,W) __cmpxchg{acquire}(X,V,W)
cmpxchg_release(X,V,W) __cmpxchg{release}(X,V,W)
// Spinlocks
-spin_lock(X) { __lock(X) ; }
-spin_unlock(X) { __unlock(X) ; }
+spin_lock(X) { __lock(X); }
+spin_unlock(X) { __unlock(X); }
spin_trylock(X) __trylock(X)
+spin_is_locked(X) __islocked(X)
// RCU
rcu_read_lock() { __fence{rcu-lock}; }
-rcu_read_unlock() { __fence{rcu-unlock};}
+rcu_read_unlock() { __fence{rcu-unlock}; }
synchronize_rcu() { __fence{sync-rcu}; }
synchronize_rcu_expedited() { __fence{sync-rcu}; }
// Atomic
atomic_read(X) READ_ONCE(*X)
-atomic_set(X,V) { WRITE_ONCE(*X,V) ; }
+atomic_set(X,V) { WRITE_ONCE(*X,V); }
atomic_read_acquire(X) smp_load_acquire(X)
atomic_set_release(X,V) { smp_store_release(X,V); }
-atomic_add(V,X) { __atomic_op(X,+,V) ; }
-atomic_sub(V,X) { __atomic_op(X,-,V) ; }
-atomic_inc(X) { __atomic_op(X,+,1) ; }
-atomic_dec(X) { __atomic_op(X,-,1) ; }
+atomic_add(V,X) { __atomic_op(X,+,V); }
+atomic_sub(V,X) { __atomic_op(X,-,V); }
+atomic_inc(X) { __atomic_op(X,+,1); }
+atomic_dec(X) { __atomic_op(X,-,1); }
atomic_add_return(V,X) __atomic_op_return{mb}(X,+,V)
atomic_add_return_relaxed(V,X) __atomic_op_return{once}(X,+,V)
diff --git a/tools/memory-model/litmus-tests/.gitignore b/tools/memory-model/litmus-tests/.gitignore
new file mode 100644
index 000000000000..6e2ddc54152f
--- /dev/null
+++ b/tools/memory-model/litmus-tests/.gitignore
@@ -0,0 +1 @@
+*.litmus.out
diff --git a/tools/memory-model/litmus-tests/IRIW+mbonceonces+OnceOnce.litmus b/tools/memory-model/litmus-tests/IRIW+mbonceonces+OnceOnce.litmus
index 50d5db9ea983..98a3716efa37 100644
--- a/tools/memory-model/litmus-tests/IRIW+mbonceonces+OnceOnce.litmus
+++ b/tools/memory-model/litmus-tests/IRIW+mbonceonces+OnceOnce.litmus
@@ -7,7 +7,7 @@ C IRIW+mbonceonces+OnceOnce
* between each pairs of reads. In other words, is smp_mb() sufficient to
* cause two different reading processes to agree on the order of a pair
* of writes, where each write is to a different variable by a different
- * process?
+ * process? This litmus test exercises LKMM's "propagation" rule.
*)
{}
diff --git a/tools/memory-model/litmus-tests/MP+polockmbonce+poacquiresilsil.litmus b/tools/memory-model/litmus-tests/MP+polockmbonce+poacquiresilsil.litmus
new file mode 100644
index 000000000000..50f4d62bbf0e
--- /dev/null
+++ b/tools/memory-model/litmus-tests/MP+polockmbonce+poacquiresilsil.litmus
@@ -0,0 +1,35 @@
+C MP+polockmbonce+poacquiresilsil
+
+(*
+ * Result: Never
+ *
+ * Do spinlocks combined with smp_mb__after_spinlock() provide order
+ * to outside observers using spin_is_locked() to sense the lock-held
+ * state, ordered by acquire? Note that when the first spin_is_locked()
+ * returns false and the second true, we know that the smp_load_acquire()
+ * executed before the lock was acquired (loosely speaking).
+ *)
+
+{
+}
+
+P0(spinlock_t *lo, int *x)
+{
+ spin_lock(lo);
+ smp_mb__after_spinlock();
+ WRITE_ONCE(*x, 1);
+ spin_unlock(lo);
+}
+
+P1(spinlock_t *lo, int *x)
+{
+ int r1;
+ int r2;
+ int r3;
+
+ r1 = smp_load_acquire(x);
+ r2 = spin_is_locked(lo);
+ r3 = spin_is_locked(lo);
+}
+
+exists (1:r1=1 /\ 1:r2=0 /\ 1:r3=1)
diff --git a/tools/memory-model/litmus-tests/MP+polockonce+poacquiresilsil.litmus b/tools/memory-model/litmus-tests/MP+polockonce+poacquiresilsil.litmus
new file mode 100644
index 000000000000..abf81e7a0895
--- /dev/null
+++ b/tools/memory-model/litmus-tests/MP+polockonce+poacquiresilsil.litmus
@@ -0,0 +1,34 @@
+C MP+polockonce+poacquiresilsil
+
+(*
+ * Result: Sometimes
+ *
+ * Do spinlocks provide order to outside observers using spin_is_locked()
+ * to sense the lock-held state, ordered by acquire? Note that when the
+ * first spin_is_locked() returns false and the second true, we know that
+ * the smp_load_acquire() executed before the lock was acquired (loosely
+ * speaking).
+ *)
+
+{
+}
+
+P0(spinlock_t *lo, int *x)
+{
+ spin_lock(lo);
+ WRITE_ONCE(*x, 1);
+ spin_unlock(lo);
+}
+
+P1(spinlock_t *lo, int *x)
+{
+ int r1;
+ int r2;
+ int r3;
+
+ r1 = smp_load_acquire(x);
+ r2 = spin_is_locked(lo);
+ r3 = spin_is_locked(lo);
+}
+
+exists (1:r1=1 /\ 1:r2=0 /\ 1:r3=1)
diff --git a/tools/memory-model/litmus-tests/README b/tools/memory-model/litmus-tests/README
index 04096fb8b8d9..17eb9a8c222d 100644
--- a/tools/memory-model/litmus-tests/README
+++ b/tools/memory-model/litmus-tests/README
@@ -23,7 +23,8 @@ IRIW+mbonceonces+OnceOnce.litmus
between each pairs of reads. In other words, is smp_mb()
sufficient to cause two different reading processes to agree on
the order of a pair of writes, where each write is to a different
- variable by a different process?
+ variable by a different process? This litmus test is forbidden
+ by LKMM's propagation rule.
IRIW+poonceonces+OnceOnce.litmus
Test of independent reads from independent writes with nothing
@@ -63,6 +64,16 @@ LB+poonceonces.litmus
MP+onceassign+derefonce.litmus
As below, but with rcu_assign_pointer() and an rcu_dereference().
+MP+polockmbonce+poacquiresilsil.litmus
+ Protect the access with a lock and an smp_mb__after_spinlock()
+ in one process, and use an acquire load followed by a pair of
+ spin_is_locked() calls in the other process.
+
+MP+polockonce+poacquiresilsil.litmus
+ Protect the access with a lock in one process, and use an
+ acquire load followed by a pair of spin_is_locked() calls
+ in the other process.
+
MP+polocks.litmus
As below, but with the second access of the writer process
and the first access of reader process protected by a lock.
@@ -109,8 +120,10 @@ S+wmbonceonce+poacquireonce.litmus
WRC+poonceonces+Once.litmus
WRC+pooncerelease+rmbonceonce+Once.litmus
- These two are members of an extension of the MP litmus-test class
- in which the first write is moved to a separate process.
+ These two are members of an extension of the MP litmus-test
+ class in which the first write is moved to a separate process.
+ The second is forbidden because smp_store_release() is
+ A-cumulative in LKMM.
Z6.0+pooncelock+pooncelock+pombonce.litmus
Is the ordering provided by a spin_unlock() and a subsequent
diff --git a/tools/memory-model/litmus-tests/WRC+pooncerelease+rmbonceonce+Once.litmus b/tools/memory-model/litmus-tests/WRC+pooncerelease+rmbonceonce+Once.litmus
index 97fcbffde9a0..ad3448b941e6 100644
--- a/tools/memory-model/litmus-tests/WRC+pooncerelease+rmbonceonce+Once.litmus
+++ b/tools/memory-model/litmus-tests/WRC+pooncerelease+rmbonceonce+Once.litmus
@@ -5,7 +5,9 @@ C WRC+pooncerelease+rmbonceonce+Once
*
* This litmus test is an extension of the message-passing pattern, where
* the first write is moved to a separate process. Because it features
- * a release and a read memory barrier, it should be forbidden.
+ * a release and a read memory barrier, it should be forbidden. More
+ * specifically, this litmus test is forbidden because smp_store_release()
+ * is A-cumulative in LKMM.
*)
{}
diff --git a/tools/memory-model/lock.cat b/tools/memory-model/lock.cat
index ba4a4ec6d313..305ded17e741 100644
--- a/tools/memory-model/lock.cat
+++ b/tools/memory-model/lock.cat
@@ -4,46 +4,72 @@
* Copyright (C) 2017 Alan Stern <stern@rowland.harvard.edu>
*)
-(* Generate coherence orders and handle lock operations *)
+(*
+ * Generate coherence orders and handle lock operations
+ *
+ * Warning: spin_is_locked() crashes herd7 versions strictly before 7.48.
+ * spin_is_locked() is functional from herd7 version 7.49.
+ *)
include "cross.cat"
-(* From lock reads to their partner lock writes *)
-let lk-rmw = ([LKR] ; po-loc ; [LKW]) \ (po ; po)
-let rmw = rmw | lk-rmw
-
(*
- * A paired LKR must always see an unlocked value; spin_lock() calls nested
- * inside a critical section (for the same lock) always deadlock.
+ * The lock-related events generated by herd are as follows:
+ *
+ * LKR Lock-Read: the read part of a spin_lock() or successful
+ * spin_trylock() read-modify-write event pair
+ * LKW Lock-Write: the write part of a spin_lock() or successful
+ * spin_trylock() RMW event pair
+ * UL Unlock: a spin_unlock() event
+ * LF Lock-Fail: a failed spin_trylock() event
+ * RL Read-Locked: a spin_is_locked() event which returns True
+ * RU Read-Unlocked: a spin_is_locked() event which returns False
+ *
+ * LKR and LKW events always come paired, like all RMW event sequences.
+ *
+ * LKR, LF, RL, and RU are read events; LKR has Acquire ordering.
+ * LKW and UL are write events; UL has Release ordering.
+ * LKW, LF, RL, and RU have no ordering properties.
*)
-empty ([LKW] ; po-loc ; [domain(lk-rmw)]) \ (po-loc ; [UL] ; po-loc)
- as lock-nest
-(* The litmus test is invalid if an LKW event is not part of an RMW pair *)
-flag ~empty LKW \ range(lk-rmw) as unpaired-LKW
+(* Backward compatibility *)
+let RL = try RL with emptyset
+let RU = try RU with emptyset
-(* This will be allowed if we implement spin_is_locked() *)
-flag ~empty LKR \ domain(lk-rmw) as unpaired-LKR
+(* Treat RL as a kind of LF: a read with no ordering properties *)
+let LF = LF | RL
-(* There should be no R or W accesses to spinlocks *)
-let ALL-LOCKS = LKR | LKW | UL | LF
+(* There should be no ordinary R or W accesses to spinlocks *)
+let ALL-LOCKS = LKR | LKW | UL | LF | RU
flag ~empty [M \ IW] ; loc ; [ALL-LOCKS] as mixed-lock-accesses
+(* Link Lock-Reads to their RMW-partner Lock-Writes *)
+let lk-rmw = ([LKR] ; po-loc ; [LKW]) \ (po ; po)
+let rmw = rmw | lk-rmw
+
+(* The litmus test is invalid if an LKR/LKW event is not part of an RMW pair *)
+flag ~empty LKW \ range(lk-rmw) as unpaired-LKW
+flag ~empty LKR \ domain(lk-rmw) as unpaired-LKR
+
+(*
+ * An LKR must always see an unlocked value; spin_lock() calls nested
+ * inside a critical section (for the same lock) always deadlock.
+ *)
+empty ([LKW] ; po-loc ; [LKR]) \ (po-loc ; [UL] ; po-loc) as lock-nest
+
(* The final value of a spinlock should not be tested *)
flag ~empty [FW] ; loc ; [ALL-LOCKS] as lock-final
-
(*
* Put lock operations in their appropriate classes, but leave UL out of W
* until after the co relation has been generated.
*)
-let R = R | LKR | LF
+let R = R | LKR | LF | RU
let W = W | LKW
let Release = Release | UL
let Acquire = Acquire | LKR
-
(* Match LKW events to their corresponding UL events *)
let critical = ([LKW] ; po-loc ; [UL]) \ (po-loc ; [LKW | UL] ; po-loc)
@@ -53,27 +79,48 @@ flag ~empty UL \ range(critical) as unmatched-unlock
let UNMATCHED-LKW = LKW \ domain(critical)
empty ([UNMATCHED-LKW] ; loc ; [UNMATCHED-LKW]) \ id as unmatched-locks
-
(* rfi for LF events: link each LKW to the LF events in its critical section *)
let rfi-lf = ([LKW] ; po-loc ; [LF]) \ ([LKW] ; po-loc ; [UL] ; po-loc)
(* rfe for LF events *)
let all-possible-rfe-lf =
- (*
- * Given an LF event r, compute the possible rfe edges for that event
- * (all those starting from LKW events in other threads),
- * and then convert that relation to a set of single-edge relations.
- *)
- let possible-rfe-lf r =
- let pair-to-relation p = p ++ 0
- in map pair-to-relation ((LKW * {r}) & loc & ext)
- (* Do this for each LF event r that isn't in rfi-lf *)
- in map possible-rfe-lf (LF \ range(rfi-lf))
+ (*
+ * Given an LF event r, compute the possible rfe edges for that event
+ * (all those starting from LKW events in other threads),
+ * and then convert that relation to a set of single-edge relations.
+ *)
+ let possible-rfe-lf r =
+ let pair-to-relation p = p ++ 0
+ in map pair-to-relation ((LKW * {r}) & loc & ext)
+ (* Do this for each LF event r that isn't in rfi-lf *)
+ in map possible-rfe-lf (LF \ range(rfi-lf))
(* Generate all rf relations for LF events *)
with rfe-lf from cross(all-possible-rfe-lf)
-let rf = rf | rfi-lf | rfe-lf
+let rf-lf = rfe-lf | rfi-lf
+
+(*
+ * RU, i.e., spin_is_locked() returning False, is slightly different.
+ * We rely on the memory model to rule out cases where spin_is_locked()
+ * within one of the lock's critical sections returns False.
+ *)
+
+(* rfi for RU events: an RU may read from the last po-previous UL *)
+let rfi-ru = ([UL] ; po-loc ; [RU]) \ ([UL] ; po-loc ; [LKW] ; po-loc)
+
+(* rfe for RU events: an RU may read from an external UL or the initial write *)
+let all-possible-rfe-ru =
+ let possible-rfe-ru r =
+ let pair-to-relation p = p ++ 0
+ in map pair-to-relation (((UL | IW) * {r}) & loc & ext)
+ in map possible-rfe-ru RU
+
+(* Generate all rf relations for RU events *)
+with rfe-ru from cross(all-possible-rfe-ru)
+let rf-ru = rfe-ru | rfi-ru
+(* Final rf relation *)
+let rf = rf | rf-lf | rf-ru
(* Generate all co relations, including LKW events but not UL *)
let co0 = co0 | ([IW] ; loc ; [LKW]) |
diff --git a/tools/memory-model/scripts/checkalllitmus.sh b/tools/memory-model/scripts/checkalllitmus.sh
new file mode 100644
index 000000000000..af0aa15ab84e
--- /dev/null
+++ b/tools/memory-model/scripts/checkalllitmus.sh
@@ -0,0 +1,73 @@
+#!/bin/sh
+#
+# Run herd tests on all .litmus files in the specified directory (which
+# defaults to litmus-tests) and check each file's result against a "Result:"
+# comment within that litmus test. If the verification result does not
+# match that specified in the litmus test, this script prints an error
+# message prefixed with "^^^". It also outputs verification results to
+# a file whose name is that of the specified litmus test, but with ".out"
+# appended.
+#
+# Usage:
+# sh checkalllitmus.sh [ directory ]
+#
+# The LINUX_HERD_OPTIONS environment variable may be used to specify
+# arguments to herd, whose default is defined by the checklitmus.sh script.
+# Thus, one would normally run this in the directory containing the memory
+# model, specifying the pathname of the litmus test to check.
+#
+# This script makes no attempt to run the litmus tests concurrently.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# Copyright IBM Corporation, 2018
+#
+# Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+
+litmusdir=${1-litmus-tests}
+if test -d "$litmusdir" -a -r "$litmusdir" -a -x "$litmusdir"
+then
+ :
+else
+ echo ' --- ' error: $litmusdir is not an accessible directory
+ exit 255
+fi
+
+# Find the checklitmus script. If it is not where we expect it, then
+# assume that the caller has the PATH environment variable set
+# appropriately.
+if test -x scripts/checklitmus.sh
+then
+ clscript=scripts/checklitmus.sh
+else
+ clscript=checklitmus.sh
+fi
+
+# Run the script on all the litmus tests in the specified directory
+ret=0
+for i in litmus-tests/*.litmus
+do
+ if ! $clscript $i
+ then
+ ret=1
+ fi
+done
+if test "$ret" -ne 0
+then
+ echo " ^^^ VERIFICATION MISMATCHES"
+else
+ echo All litmus tests verified as was expected.
+fi
+exit $ret
diff --git a/tools/memory-model/scripts/checklitmus.sh b/tools/memory-model/scripts/checklitmus.sh
new file mode 100644
index 000000000000..e2e477472844
--- /dev/null
+++ b/tools/memory-model/scripts/checklitmus.sh
@@ -0,0 +1,86 @@
+#!/bin/sh
+#
+# Run a herd test and check the result against a "Result:" comment within
+# the litmus test. If the verification result does not match that specified
+# in the litmus test, this script prints an error message prefixed with
+# "^^^" and exits with a non-zero status. It also outputs verification
+# results to a file whose name is that of the specified litmus test, but
+# with ".out" appended.
+#
+# Usage:
+# sh checklitmus.sh file.litmus
+#
+# The LINUX_HERD_OPTIONS environment variable may be used to specify
+# arguments to herd, which default to "-conf linux-kernel.cfg". Thus,
+# one would normally run this in the directory containing the memory model,
+# specifying the pathname of the litmus test to check.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# Copyright IBM Corporation, 2018
+#
+# Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+
+litmus=$1
+herdoptions=${LINUX_HERD_OPTIONS--conf linux-kernel.cfg}
+
+if test -f "$litmus" -a -r "$litmus"
+then
+ :
+else
+ echo ' --- ' error: \"$litmus\" is not a readable file
+ exit 255
+fi
+if grep -q '^ \* Result: ' $litmus
+then
+ outcome=`grep -m 1 '^ \* Result: ' $litmus | awk '{ print $3 }'`
+else
+ outcome=specified
+fi
+
+echo Herd options: $herdoptions > $litmus.out
+/usr/bin/time herd7 -o ~/tmp $herdoptions $litmus >> $litmus.out 2>&1
+grep "Herd options:" $litmus.out
+grep '^Observation' $litmus.out
+if grep -q '^Observation' $litmus.out
+then
+ :
+else
+ cat $litmus.out
+ echo ' ^^^ Verification error'
+ echo ' ^^^ Verification error' >> $litmus.out 2>&1
+ exit 255
+fi
+if test "$outcome" = DEADLOCK
+then
+ echo grep 3 and 4
+ if grep '^Observation' $litmus.out | grep -q 'Never 0 0$'
+ then
+ ret=0
+ else
+ echo " ^^^ Unexpected non-$outcome verification"
+ echo " ^^^ Unexpected non-$outcome verification" >> $litmus.out 2>&1
+ ret=1
+ fi
+elif grep '^Observation' $litmus.out | grep -q $outcome || test "$outcome" = Maybe
+then
+ ret=0
+else
+ echo " ^^^ Unexpected non-$outcome verification"
+ echo " ^^^ Unexpected non-$outcome verification" >> $litmus.out 2>&1
+ ret=1
+fi
+tail -2 $litmus.out | head -1
+exit $ret
diff --git a/tools/objtool/arch/x86/include/asm/insn.h b/tools/objtool/arch/x86/include/asm/insn.h
index b3e32b010ab1..c2c01f84df75 100644
--- a/tools/objtool/arch/x86/include/asm/insn.h
+++ b/tools/objtool/arch/x86/include/asm/insn.h
@@ -208,4 +208,22 @@ static inline int insn_offset_immediate(struct insn *insn)
return insn_offset_displacement(insn) + insn->displacement.nbytes;
}
+#define POP_SS_OPCODE 0x1f
+#define MOV_SREG_OPCODE 0x8e
+
+/*
+ * Intel SDM Vol.3A 6.8.3 states;
+ * "Any single-step trap that would be delivered following the MOV to SS
+ * instruction or POP to SS instruction (because EFLAGS.TF is 1) is
+ * suppressed."
+ * This function returns true if @insn is MOV SS or POP SS. On these
+ * instructions, single stepping is suppressed.
+ */
+static inline int insn_masking_exception(struct insn *insn)
+{
+ return insn->opcode.bytes[0] == POP_SS_OPCODE ||
+ (insn->opcode.bytes[0] == MOV_SREG_OPCODE &&
+ X86_MODRM_REG(insn->modrm.bytes[0]) == 2);
+}
+
#endif /* _ASM_X86_INSN_H */
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 5409f6f6c48d..3a31b238f885 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -59,6 +59,31 @@ static struct instruction *next_insn_same_sec(struct objtool_file *file,
return next;
}
+static struct instruction *next_insn_same_func(struct objtool_file *file,
+ struct instruction *insn)
+{
+ struct instruction *next = list_next_entry(insn, list);
+ struct symbol *func = insn->func;
+
+ if (!func)
+ return NULL;
+
+ if (&next->list != &file->insn_list && next->func == func)
+ return next;
+
+ /* Check if we're already in the subfunction: */
+ if (func == func->cfunc)
+ return NULL;
+
+ /* Move to the subfunction: */
+ return find_insn(file, func->cfunc->sec, func->cfunc->offset);
+}
+
+#define func_for_each_insn_all(file, func, insn) \
+ for (insn = find_insn(file, func->sec, func->offset); \
+ insn; \
+ insn = next_insn_same_func(file, insn))
+
#define func_for_each_insn(file, func, insn) \
for (insn = find_insn(file, func->sec, func->offset); \
insn && &insn->list != &file->insn_list && \
@@ -149,10 +174,14 @@ static int __dead_end_function(struct objtool_file *file, struct symbol *func,
if (!strcmp(func->name, global_noreturns[i]))
return 1;
- if (!func->sec)
+ if (!func->len)
return 0;
- func_for_each_insn(file, func, insn) {
+ insn = find_insn(file, func->sec, func->offset);
+ if (!insn->func)
+ return 0;
+
+ func_for_each_insn_all(file, func, insn) {
empty = false;
if (insn->type == INSN_RETURN)
@@ -167,35 +196,28 @@ static int __dead_end_function(struct objtool_file *file, struct symbol *func,
* case, the function's dead-end status depends on whether the target
* of the sibling call returns.
*/
- func_for_each_insn(file, func, insn) {
- if (insn->sec != func->sec ||
- insn->offset >= func->offset + func->len)
- break;
-
+ func_for_each_insn_all(file, func, insn) {
if (insn->type == INSN_JUMP_UNCONDITIONAL) {
struct instruction *dest = insn->jump_dest;
- struct symbol *dest_func;
if (!dest)
/* sibling call to another file */
return 0;
- if (dest->sec != func->sec ||
- dest->offset < func->offset ||
- dest->offset >= func->offset + func->len) {
- /* local sibling call */
- dest_func = find_symbol_by_offset(dest->sec,
- dest->offset);
- if (!dest_func)
- continue;
+ if (dest->func && dest->func->pfunc != insn->func->pfunc) {
+ /* local sibling call */
if (recursion == 5) {
- WARN_FUNC("infinite recursion (objtool bug!)",
- dest->sec, dest->offset);
- return -1;
+ /*
+ * Infinite recursion: two functions
+ * have sibling calls to each other.
+ * This is a very rare case. It means
+ * they aren't dead ends.
+ */
+ return 0;
}
- return __dead_end_function(file, dest_func,
+ return __dead_end_function(file, dest->func,
recursion + 1);
}
}
@@ -422,7 +444,7 @@ static void add_ignores(struct objtool_file *file)
if (!ignore_func(file, func))
continue;
- func_for_each_insn(file, func, insn)
+ func_for_each_insn_all(file, func, insn)
insn->ignore = true;
}
}
@@ -782,30 +804,35 @@ out:
return ret;
}
-static int add_switch_table(struct objtool_file *file, struct symbol *func,
- struct instruction *insn, struct rela *table,
- struct rela *next_table)
+static int add_switch_table(struct objtool_file *file, struct instruction *insn,
+ struct rela *table, struct rela *next_table)
{
struct rela *rela = table;
struct instruction *alt_insn;
struct alternative *alt;
+ struct symbol *pfunc = insn->func->pfunc;
+ unsigned int prev_offset = 0;
list_for_each_entry_from(rela, &file->rodata->rela->rela_list, list) {
if (rela == next_table)
break;
- if (rela->sym->sec != insn->sec ||
- rela->addend <= func->offset ||
- rela->addend >= func->offset + func->len)
+ /* Make sure the switch table entries are consecutive: */
+ if (prev_offset && rela->offset != prev_offset + 8)
break;
- alt_insn = find_insn(file, insn->sec, rela->addend);
- if (!alt_insn) {
- WARN("%s: can't find instruction at %s+0x%x",
- file->rodata->rela->name, insn->sec->name,
- rela->addend);
- return -1;
- }
+ /* Detect function pointers from contiguous objects: */
+ if (rela->sym->sec == pfunc->sec &&
+ rela->addend == pfunc->offset)
+ break;
+
+ alt_insn = find_insn(file, rela->sym->sec, rela->addend);
+ if (!alt_insn)
+ break;
+
+ /* Make sure the jmp dest is in the function or subfunction: */
+ if (alt_insn->func->pfunc != pfunc)
+ break;
alt = malloc(sizeof(*alt));
if (!alt) {
@@ -815,6 +842,13 @@ static int add_switch_table(struct objtool_file *file, struct symbol *func,
alt->insn = alt_insn;
list_add_tail(&alt->list, &insn->alts);
+ prev_offset = rela->offset;
+ }
+
+ if (!prev_offset) {
+ WARN_FUNC("can't find switch jump table",
+ insn->sec, insn->offset);
+ return -1;
}
return 0;
@@ -869,40 +903,21 @@ static struct rela *find_switch_table(struct objtool_file *file,
{
struct rela *text_rela, *rodata_rela;
struct instruction *orig_insn = insn;
+ unsigned long table_offset;
- text_rela = find_rela_by_dest_range(insn->sec, insn->offset, insn->len);
- if (text_rela && text_rela->sym == file->rodata->sym) {
- /* case 1 */
- rodata_rela = find_rela_by_dest(file->rodata,
- text_rela->addend);
- if (rodata_rela)
- return rodata_rela;
-
- /* case 2 */
- rodata_rela = find_rela_by_dest(file->rodata,
- text_rela->addend + 4);
- if (!rodata_rela)
- return NULL;
-
- file->ignore_unreachables = true;
- return rodata_rela;
- }
-
- /* case 3 */
/*
* Backward search using the @first_jump_src links, these help avoid
* much of the 'in between' code. Which avoids us getting confused by
* it.
*/
- for (insn = list_prev_entry(insn, list);
-
+ for (;
&insn->list != &file->insn_list &&
insn->sec == func->sec &&
insn->offset >= func->offset;
insn = insn->first_jump_src ?: list_prev_entry(insn, list)) {
- if (insn->type == INSN_JUMP_DYNAMIC)
+ if (insn != orig_insn && insn->type == INSN_JUMP_DYNAMIC)
break;
/* allow small jumps within the range */
@@ -918,18 +933,29 @@ static struct rela *find_switch_table(struct objtool_file *file,
if (!text_rela || text_rela->sym != file->rodata->sym)
continue;
+ table_offset = text_rela->addend;
+ if (text_rela->type == R_X86_64_PC32)
+ table_offset += 4;
+
/*
* Make sure the .rodata address isn't associated with a
* symbol. gcc jump tables are anonymous data.
*/
- if (find_symbol_containing(file->rodata, text_rela->addend))
+ if (find_symbol_containing(file->rodata, table_offset))
continue;
- rodata_rela = find_rela_by_dest(file->rodata, text_rela->addend);
- if (!rodata_rela)
- continue;
+ rodata_rela = find_rela_by_dest(file->rodata, table_offset);
+ if (rodata_rela) {
+ /*
+ * Use of RIP-relative switch jumps is quite rare, and
+ * indicates a rare GCC quirk/bug which can leave dead
+ * code behind.
+ */
+ if (text_rela->type == R_X86_64_PC32)
+ file->ignore_unreachables = true;
- return rodata_rela;
+ return rodata_rela;
+ }
}
return NULL;
@@ -943,7 +969,7 @@ static int add_func_switch_tables(struct objtool_file *file,
struct rela *rela, *prev_rela = NULL;
int ret;
- func_for_each_insn(file, func, insn) {
+ func_for_each_insn_all(file, func, insn) {
if (!last)
last = insn;
@@ -974,8 +1000,7 @@ static int add_func_switch_tables(struct objtool_file *file,
* the beginning of another switch table in the same function.
*/
if (prev_jump) {
- ret = add_switch_table(file, func, prev_jump, prev_rela,
- rela);
+ ret = add_switch_table(file, prev_jump, prev_rela, rela);
if (ret)
return ret;
}
@@ -985,7 +1010,7 @@ static int add_func_switch_tables(struct objtool_file *file,
}
if (prev_jump) {
- ret = add_switch_table(file, func, prev_jump, prev_rela, NULL);
+ ret = add_switch_table(file, prev_jump, prev_rela, NULL);
if (ret)
return ret;
}
@@ -1749,15 +1774,13 @@ static int validate_branch(struct objtool_file *file, struct instruction *first,
while (1) {
next_insn = next_insn_same_sec(file, insn);
-
- if (file->c_file && func && insn->func && func != insn->func) {
+ if (file->c_file && func && insn->func && func != insn->func->pfunc) {
WARN("%s() falls through to next function %s()",
func->name, insn->func->name);
return 1;
}
- if (insn->func)
- func = insn->func;
+ func = insn->func ? insn->func->pfunc : NULL;
if (func && insn->ignore) {
WARN_FUNC("BUG: why am I validating an ignored function?",
@@ -1778,7 +1801,7 @@ static int validate_branch(struct objtool_file *file, struct instruction *first,
i = insn;
save_insn = NULL;
- func_for_each_insn_continue_reverse(file, func, i) {
+ func_for_each_insn_continue_reverse(file, insn->func, i) {
if (i->save) {
save_insn = i;
break;
@@ -1865,7 +1888,7 @@ static int validate_branch(struct objtool_file *file, struct instruction *first,
case INSN_JUMP_UNCONDITIONAL:
if (insn->jump_dest &&
(!func || !insn->jump_dest->func ||
- func == insn->jump_dest->func)) {
+ insn->jump_dest->func->pfunc == func)) {
ret = validate_branch(file, insn->jump_dest,
state);
if (ret)
@@ -2060,7 +2083,7 @@ static int validate_functions(struct objtool_file *file)
for_each_sec(file, sec) {
list_for_each_entry(func, &sec->symbol_list, list) {
- if (func->type != STT_FUNC)
+ if (func->type != STT_FUNC || func->pfunc != func)
continue;
insn = find_insn(file, sec, func->offset);
diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c
index c1c338661699..4e60e105583e 100644
--- a/tools/objtool/elf.c
+++ b/tools/objtool/elf.c
@@ -79,6 +79,19 @@ struct symbol *find_symbol_by_offset(struct section *sec, unsigned long offset)
return NULL;
}
+struct symbol *find_symbol_by_name(struct elf *elf, const char *name)
+{
+ struct section *sec;
+ struct symbol *sym;
+
+ list_for_each_entry(sec, &elf->sections, list)
+ list_for_each_entry(sym, &sec->symbol_list, list)
+ if (!strcmp(sym->name, name))
+ return sym;
+
+ return NULL;
+}
+
struct symbol *find_symbol_containing(struct section *sec, unsigned long offset)
{
struct symbol *sym;
@@ -203,10 +216,11 @@ static int read_sections(struct elf *elf)
static int read_symbols(struct elf *elf)
{
- struct section *symtab;
- struct symbol *sym;
+ struct section *symtab, *sec;
+ struct symbol *sym, *pfunc;
struct list_head *entry, *tmp;
int symbols_nr, i;
+ char *coldstr;
symtab = find_section_by_name(elf, ".symtab");
if (!symtab) {
@@ -281,6 +295,30 @@ static int read_symbols(struct elf *elf)
hash_add(sym->sec->symbol_hash, &sym->hash, sym->idx);
}
+ /* Create parent/child links for any cold subfunctions */
+ list_for_each_entry(sec, &elf->sections, list) {
+ list_for_each_entry(sym, &sec->symbol_list, list) {
+ if (sym->type != STT_FUNC)
+ continue;
+ sym->pfunc = sym->cfunc = sym;
+ coldstr = strstr(sym->name, ".cold.");
+ if (coldstr) {
+ coldstr[0] = '\0';
+ pfunc = find_symbol_by_name(elf, sym->name);
+ coldstr[0] = '.';
+
+ if (!pfunc) {
+ WARN("%s(): can't find parent function",
+ sym->name);
+ goto err;
+ }
+
+ sym->pfunc = pfunc;
+ pfunc->cfunc = sym;
+ }
+ }
+ }
+
return 0;
err:
diff --git a/tools/objtool/elf.h b/tools/objtool/elf.h
index d86e2ff14466..de5cd2ddded9 100644
--- a/tools/objtool/elf.h
+++ b/tools/objtool/elf.h
@@ -61,6 +61,7 @@ struct symbol {
unsigned char bind, type;
unsigned long offset;
unsigned int len;
+ struct symbol *pfunc, *cfunc;
};
struct rela {
@@ -86,6 +87,7 @@ struct elf {
struct elf *elf_open(const char *name, int flags);
struct section *find_section_by_name(struct elf *elf, const char *name);
struct symbol *find_symbol_by_offset(struct section *sec, unsigned long offset);
+struct symbol *find_symbol_by_name(struct elf *elf, const char *name);
struct symbol *find_symbol_containing(struct section *sec, unsigned long offset);
struct rela *find_rela_by_dest(struct section *sec, unsigned long offset);
struct rela *find_rela_by_dest_range(struct section *sec, unsigned long offset,
diff --git a/tools/perf/Documentation/Makefile b/tools/perf/Documentation/Makefile
index db11478e30b4..42261a9b280e 100644
--- a/tools/perf/Documentation/Makefile
+++ b/tools/perf/Documentation/Makefile
@@ -47,7 +47,8 @@ man5dir=$(mandir)/man5
man7dir=$(mandir)/man7
ASCIIDOC=asciidoc
-ASCIIDOC_EXTRA = --unsafe
+ASCIIDOC_EXTRA = --unsafe -f asciidoc.conf
+ASCIIDOC_HTML = xhtml11
MANPAGE_XSL = manpage-normal.xsl
XMLTO_EXTRA =
INSTALL?=install
@@ -55,6 +56,14 @@ RM ?= rm -f
DOC_REF = origin/man
HTML_REF = origin/html
+ifdef USE_ASCIIDOCTOR
+ASCIIDOC = asciidoctor
+ASCIIDOC_EXTRA = -a compat-mode
+ASCIIDOC_EXTRA += -I. -rasciidoctor-extensions
+ASCIIDOC_EXTRA += -a mansource="perf" -a manmanual="perf Manual"
+ASCIIDOC_HTML = xhtml5
+endif
+
infodir?=$(prefix)/share/info
MAKEINFO=makeinfo
INSTALL_INFO=install-info
@@ -73,10 +82,12 @@ ifeq ($(_tmp_tool_path),)
missing_tools = $(ASCIIDOC)
endif
+ifndef USE_ASCIIDOCTOR
_tmp_tool_path := $(call get-executable,$(XMLTO))
ifeq ($(_tmp_tool_path),)
missing_tools += $(XMLTO)
endif
+endif
#
# For asciidoc ...
@@ -264,9 +275,17 @@ clean:
$(MAN_HTML): $(OUTPUT)%.html : %.txt
$(QUIET_ASCIIDOC)$(RM) $@+ $@ && \
- $(ASCIIDOC) -b xhtml11 -d manpage -f asciidoc.conf \
+ $(ASCIIDOC) -b $(ASCIIDOC_HTML) -d manpage \
+ $(ASCIIDOC_EXTRA) -aperf_version=$(PERF_VERSION) -o $@+ $< && \
+ mv $@+ $@
+
+ifdef USE_ASCIIDOCTOR
+$(OUTPUT)%.1 $(OUTPUT)%.5 $(OUTPUT)%.7 : $(OUTPUT)%.txt
+ $(QUIET_ASCIIDOC)$(RM) $@+ $@ && \
+ $(ASCIIDOC) -b manpage -d manpage \
$(ASCIIDOC_EXTRA) -aperf_version=$(PERF_VERSION) -o $@+ $< && \
mv $@+ $@
+endif
$(OUTPUT)%.1 $(OUTPUT)%.5 $(OUTPUT)%.7 : $(OUTPUT)%.xml
$(QUIET_XMLTO)$(RM) $@ && \
@@ -274,7 +293,7 @@ $(OUTPUT)%.1 $(OUTPUT)%.5 $(OUTPUT)%.7 : $(OUTPUT)%.xml
$(OUTPUT)%.xml : %.txt
$(QUIET_ASCIIDOC)$(RM) $@+ $@ && \
- $(ASCIIDOC) -b docbook -d manpage -f asciidoc.conf \
+ $(ASCIIDOC) -b docbook -d manpage \
$(ASCIIDOC_EXTRA) -aperf_version=$(PERF_VERSION) -o $@+ $< && \
mv $@+ $@
@@ -321,13 +340,13 @@ howto-index.txt: howto-index.sh $(wildcard howto/*.txt)
mv $@+ $@
$(patsubst %,%.html,$(ARTICLES)) : %.html : %.txt
- $(QUIET_ASCIIDOC)$(ASCIIDOC) -b xhtml11 $*.txt
+ $(QUIET_ASCIIDOC)$(ASCIIDOC) -b $(ASCIIDOC_HTML) $*.txt
WEBDOC_DEST = /pub/software/tools/perf/docs
$(patsubst %.txt,%.html,$(wildcard howto/*.txt)): %.html : %.txt
$(QUIET_ASCIIDOC)$(RM) $@+ $@ && \
- sed -e '1,/^$$/d' $< | $(ASCIIDOC) -b xhtml11 - >$@+ && \
+ sed -e '1,/^$$/d' $< | $(ASCIIDOC) -b $(ASCIIDOC_HTML) - >$@+ && \
mv $@+ $@
# UNIMPLEMENTED
diff --git a/tools/perf/Documentation/asciidoctor-extensions.rb b/tools/perf/Documentation/asciidoctor-extensions.rb
new file mode 100644
index 000000000000..d148fe95c0c4
--- /dev/null
+++ b/tools/perf/Documentation/asciidoctor-extensions.rb
@@ -0,0 +1,29 @@
+require 'asciidoctor'
+require 'asciidoctor/extensions'
+
+module Perf
+ module Documentation
+ class LinkPerfProcessor < Asciidoctor::Extensions::InlineMacroProcessor
+ use_dsl
+
+ named :chrome
+
+ def process(parent, target, attrs)
+ if parent.document.basebackend? 'html'
+ %(<a href="#{target}.html">#{target}(#{attrs[1]})</a>\n)
+ elsif parent.document.basebackend? 'manpage'
+ "#{target}(#{attrs[1]})"
+ elsif parent.document.basebackend? 'docbook'
+ "<citerefentry>\n" \
+ "<refentrytitle>#{target}</refentrytitle>" \
+ "<manvolnum>#{attrs[1]}</manvolnum>\n" \
+ "</citerefentry>\n"
+ end
+ end
+ end
+ end
+end
+
+Asciidoctor::Extensions.register do
+ inline_macro Perf::Documentation::LinkPerfProcessor, :linkperf
+end
diff --git a/tools/perf/Documentation/perf-buildid-cache.txt b/tools/perf/Documentation/perf-buildid-cache.txt
index 73c2650bd0db..f6de0952ff3c 100644
--- a/tools/perf/Documentation/perf-buildid-cache.txt
+++ b/tools/perf/Documentation/perf-buildid-cache.txt
@@ -48,6 +48,9 @@ OPTIONS
--purge=::
Purge all cached binaries including older caches which have specified
path from the cache.
+-P::
+--purge-all::
+ Purge all cached binaries. This will flush out entire cache.
-M::
--missing=::
List missing build ids in the cache for the specified file.
@@ -59,7 +62,9 @@ OPTIONS
exactly same build-id, that is replaced by new one. It can be used
to update kallsyms and kernel dso to vmlinux in order to support
annotation.
-
+-l::
+--list::
+ List all valid binaries from cache.
-v::
--verbose::
Be more verbose.
diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index e6c3b4e555c2..3a822f308e6d 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -116,6 +116,22 @@ Do not aggregate counts across all monitored CPUs.
print counts using a CSV-style output to make it easy to import directly into
spreadsheets. Columns are separated by the string specified in SEP.
+--table:: Display time for each run (-r option), in a table format, e.g.:
+
+ $ perf stat --null -r 5 --table perf bench sched pipe
+
+ Performance counter stats for 'perf bench sched pipe' (5 runs):
+
+ # Table of individual measurements:
+ 5.189 (-0.293) #
+ 5.189 (-0.294) #
+ 5.186 (-0.296) #
+ 5.663 (+0.181) ##
+ 6.186 (+0.703) ####
+
+ # Final result:
+ 5.483 +- 0.198 seconds time elapsed ( +- 3.62% )
+
-G name::
--cgroup name::
monitor only in the container (cgroup) called "name". This option is available only
diff --git a/tools/perf/Documentation/perf.data-file-format.txt b/tools/perf/Documentation/perf.data-file-format.txt
index d00f0d51cab8..dfb218feaad9 100644
--- a/tools/perf/Documentation/perf.data-file-format.txt
+++ b/tools/perf/Documentation/perf.data-file-format.txt
@@ -111,8 +111,8 @@ A perf_header_string with the CPU architecture (uname -m)
A structure defining the number of CPUs.
struct nr_cpus {
- uint32_t nr_cpus_online;
uint32_t nr_cpus_available; /* CPUs not yet onlined */
+ uint32_t nr_cpus_online;
};
HEADER_CPUDESC = 8,
@@ -153,10 +153,18 @@ struct {
HEADER_CPU_TOPOLOGY = 13,
String lists defining the core and CPU threads topology.
+The string lists are followed by a variable length array
+which contains core_id and socket_id of each cpu.
+The number of entries can be determined by the size of the
+section minus the sizes of both string lists.
struct {
struct perf_header_string_list cores; /* Variable length */
struct perf_header_string_list threads; /* Variable length */
+ struct {
+ uint32_t core_id;
+ uint32_t socket_id;
+ } cpus[nr]; /* Variable length records */
};
Example:
diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
index ae7dc46e8f8a..b5ac356ba323 100644
--- a/tools/perf/Makefile.config
+++ b/tools/perf/Makefile.config
@@ -885,6 +885,8 @@ endif
# Among the variables below, these:
# perfexecdir
+# perf_include_dir
+# perf_examples_dir
# template_dir
# mandir
# infodir
@@ -904,6 +906,8 @@ bindir = $(abspath $(prefix)/$(bindir_relative))
mandir = share/man
infodir = share/info
perfexecdir = libexec/perf-core
+perf_include_dir = lib/include/perf
+perf_examples_dir = lib/examples/perf
sharedir = $(prefix)/share
template_dir = share/perf-core/templates
STRACE_GROUPS_DIR = share/perf-core/strace/groups
@@ -934,6 +938,8 @@ bindir_SQ = $(subst ','\'',$(bindir))
mandir_SQ = $(subst ','\'',$(mandir))
infodir_SQ = $(subst ','\'',$(infodir))
perfexecdir_SQ = $(subst ','\'',$(perfexecdir))
+perf_include_dir_SQ = $(subst ','\'',$(perf_include_dir))
+perf_examples_dir_SQ = $(subst ','\'',$(perf_examples_dir))
template_dir_SQ = $(subst ','\'',$(template_dir))
htmldir_SQ = $(subst ','\'',$(htmldir))
tipdir_SQ = $(subst ','\'',$(tipdir))
@@ -944,14 +950,20 @@ srcdir_SQ = $(subst ','\'',$(srcdir))
ifneq ($(filter /%,$(firstword $(perfexecdir))),)
perfexec_instdir = $(perfexecdir)
+perf_include_instdir = $(perf_include_dir)
+perf_examples_instdir = $(perf_examples_dir)
STRACE_GROUPS_INSTDIR = $(STRACE_GROUPS_DIR)
tip_instdir = $(tipdir)
else
perfexec_instdir = $(prefix)/$(perfexecdir)
+perf_include_instdir = $(prefix)/$(perf_include_dir)
+perf_examples_instdir = $(prefix)/$(perf_examples_dir)
STRACE_GROUPS_INSTDIR = $(prefix)/$(STRACE_GROUPS_DIR)
tip_instdir = $(prefix)/$(tipdir)
endif
perfexec_instdir_SQ = $(subst ','\'',$(perfexec_instdir))
+perf_include_instdir_SQ = $(subst ','\'',$(perf_include_instdir))
+perf_examples_instdir_SQ = $(subst ','\'',$(perf_examples_instdir))
STRACE_GROUPS_INSTDIR_SQ = $(subst ','\'',$(STRACE_GROUPS_INSTDIR))
tip_instdir_SQ = $(subst ','\'',$(tip_instdir))
@@ -999,6 +1011,8 @@ $(call detected_var,ETC_PERFCONFIG_SQ)
$(call detected_var,STRACE_GROUPS_DIR_SQ)
$(call detected_var,prefix_SQ)
$(call detected_var,perfexecdir_SQ)
+$(call detected_var,perf_include_dir_SQ)
+$(call detected_var,perf_examples_dir_SQ)
$(call detected_var,tipdir_SQ)
$(call detected_var,srcdir_SQ)
$(call detected_var,LIBDIR)
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index 83e453de36f8..ecc9fc952655 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -767,6 +767,16 @@ ifndef NO_JVMTI
endif
$(call QUIET_INSTALL, libexec) \
$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)'
+ifndef NO_LIBBPF
+ $(call QUIET_INSTALL, lib) \
+ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perf_include_instdir_SQ)/bpf'
+ $(call QUIET_INSTALL, include/bpf) \
+ $(INSTALL) include/bpf/*.h '$(DESTDIR_SQ)$(perf_include_instdir_SQ)/bpf'
+ $(call QUIET_INSTALL, lib) \
+ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perf_examples_instdir_SQ)/bpf'
+ $(call QUIET_INSTALL, examples/bpf) \
+ $(INSTALL) examples/bpf/*.c '$(DESTDIR_SQ)$(perf_examples_instdir_SQ)/bpf'
+endif
$(call QUIET_INSTALL, perf-archive) \
$(INSTALL) $(OUTPUT)perf-archive -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)'
$(call QUIET_INSTALL, perf-with-kcore) \
diff --git a/tools/perf/arch/arm/tests/dwarf-unwind.c b/tools/perf/arch/arm/tests/dwarf-unwind.c
index 8cb347760233..9a0242e74cfc 100644
--- a/tools/perf/arch/arm/tests/dwarf-unwind.c
+++ b/tools/perf/arch/arm/tests/dwarf-unwind.c
@@ -25,7 +25,7 @@ static int sample_ustack(struct perf_sample *sample,
sp = (unsigned long) regs[PERF_REG_ARM_SP];
- map = map_groups__find(thread->mg, MAP__VARIABLE, (u64) sp);
+ map = map_groups__find(thread->mg, (u64)sp);
if (!map) {
pr_debug("failed to get stack map\n");
free(buf);
diff --git a/tools/perf/arch/arm64/tests/dwarf-unwind.c b/tools/perf/arch/arm64/tests/dwarf-unwind.c
index e907f0f4c20c..5522ce384723 100644
--- a/tools/perf/arch/arm64/tests/dwarf-unwind.c
+++ b/tools/perf/arch/arm64/tests/dwarf-unwind.c
@@ -25,7 +25,7 @@ static int sample_ustack(struct perf_sample *sample,
sp = (unsigned long) regs[PERF_REG_ARM64_SP];
- map = map_groups__find(thread->mg, MAP__VARIABLE, (u64) sp);
+ map = map_groups__find(thread->mg, (u64)sp);
if (!map) {
pr_debug("failed to get stack map\n");
free(buf);
diff --git a/tools/perf/arch/powerpc/tests/dwarf-unwind.c b/tools/perf/arch/powerpc/tests/dwarf-unwind.c
index 30cbbd6d5be0..5f39efef0856 100644
--- a/tools/perf/arch/powerpc/tests/dwarf-unwind.c
+++ b/tools/perf/arch/powerpc/tests/dwarf-unwind.c
@@ -26,7 +26,7 @@ static int sample_ustack(struct perf_sample *sample,
sp = (unsigned long) regs[PERF_REG_POWERPC_R1];
- map = map_groups__find(thread->mg, MAP__VARIABLE, (u64) sp);
+ map = map_groups__find(thread->mg, (u64)sp);
if (!map) {
pr_debug("failed to get stack map\n");
free(buf);
diff --git a/tools/perf/arch/powerpc/util/skip-callchain-idx.c b/tools/perf/arch/powerpc/util/skip-callchain-idx.c
index 0c370f81e002..3598b8b75d27 100644
--- a/tools/perf/arch/powerpc/util/skip-callchain-idx.c
+++ b/tools/perf/arch/powerpc/util/skip-callchain-idx.c
@@ -248,8 +248,7 @@ int arch_skip_callchain_idx(struct thread *thread, struct ip_callchain *chain)
ip = chain->ips[2];
- thread__find_addr_location(thread, PERF_RECORD_MISC_USER,
- MAP__FUNCTION, ip, &al);
+ thread__find_symbol(thread, PERF_RECORD_MISC_USER, ip, &al);
if (al.map)
dso = al.map->dso;
diff --git a/tools/perf/arch/x86/tests/dwarf-unwind.c b/tools/perf/arch/x86/tests/dwarf-unwind.c
index 95036c7a59e8..7879df34569a 100644
--- a/tools/perf/arch/x86/tests/dwarf-unwind.c
+++ b/tools/perf/arch/x86/tests/dwarf-unwind.c
@@ -26,7 +26,7 @@ static int sample_ustack(struct perf_sample *sample,
sp = (unsigned long) regs[PERF_REG_X86_SP];
- map = map_groups__find(thread->mg, MAP__VARIABLE, (u64) sp);
+ map = map_groups__find(thread->mg, (u64)sp);
if (!map) {
pr_debug("failed to get stack map\n");
free(buf);
diff --git a/tools/perf/arch/x86/util/Build b/tools/perf/arch/x86/util/Build
index f95e6f46ef0d..844b8f335532 100644
--- a/tools/perf/arch/x86/util/Build
+++ b/tools/perf/arch/x86/util/Build
@@ -4,6 +4,8 @@ libperf-y += pmu.o
libperf-y += kvm-stat.o
libperf-y += perf_regs.o
libperf-y += group.o
+libperf-y += machine.o
+libperf-y += event.o
libperf-$(CONFIG_DWARF) += dwarf-regs.o
libperf-$(CONFIG_BPF_PROLOGUE) += dwarf-regs.o
diff --git a/tools/perf/arch/x86/util/event.c b/tools/perf/arch/x86/util/event.c
new file mode 100644
index 000000000000..675a0213044d
--- /dev/null
+++ b/tools/perf/arch/x86/util/event.c
@@ -0,0 +1,76 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/types.h>
+#include <linux/string.h>
+
+#include "../../util/machine.h"
+#include "../../util/tool.h"
+#include "../../util/map.h"
+#include "../../util/util.h"
+#include "../../util/debug.h"
+
+#if defined(__x86_64__)
+
+int perf_event__synthesize_extra_kmaps(struct perf_tool *tool,
+ perf_event__handler_t process,
+ struct machine *machine)
+{
+ int rc = 0;
+ struct map *pos;
+ struct map_groups *kmaps = &machine->kmaps;
+ struct maps *maps = &kmaps->maps;
+ union perf_event *event = zalloc(sizeof(event->mmap) +
+ machine->id_hdr_size);
+
+ if (!event) {
+ pr_debug("Not enough memory synthesizing mmap event "
+ "for extra kernel maps\n");
+ return -1;
+ }
+
+ for (pos = maps__first(maps); pos; pos = map__next(pos)) {
+ struct kmap *kmap;
+ size_t size;
+
+ if (!__map__is_extra_kernel_map(pos))
+ continue;
+
+ kmap = map__kmap(pos);
+
+ size = sizeof(event->mmap) - sizeof(event->mmap.filename) +
+ PERF_ALIGN(strlen(kmap->name) + 1, sizeof(u64)) +
+ machine->id_hdr_size;
+
+ memset(event, 0, size);
+
+ event->mmap.header.type = PERF_RECORD_MMAP;
+
+ /*
+ * kernel uses 0 for user space maps, see kernel/perf_event.c
+ * __perf_event_mmap
+ */
+ if (machine__is_host(machine))
+ event->header.misc = PERF_RECORD_MISC_KERNEL;
+ else
+ event->header.misc = PERF_RECORD_MISC_GUEST_KERNEL;
+
+ event->mmap.header.size = size;
+
+ event->mmap.start = pos->start;
+ event->mmap.len = pos->end - pos->start;
+ event->mmap.pgoff = pos->pgoff;
+ event->mmap.pid = machine->pid;
+
+ strlcpy(event->mmap.filename, kmap->name, PATH_MAX);
+
+ if (perf_tool__process_synth_event(tool, event, machine,
+ process) != 0) {
+ rc = -1;
+ break;
+ }
+ }
+
+ free(event);
+ return rc;
+}
+
+#endif
diff --git a/tools/perf/arch/x86/util/machine.c b/tools/perf/arch/x86/util/machine.c
new file mode 100644
index 000000000000..4520ac53caa9
--- /dev/null
+++ b/tools/perf/arch/x86/util/machine.c
@@ -0,0 +1,103 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/types.h>
+#include <linux/string.h>
+#include <stdlib.h>
+
+#include "../../util/machine.h"
+#include "../../util/map.h"
+#include "../../util/symbol.h"
+#include "../../util/sane_ctype.h"
+
+#include <symbol/kallsyms.h>
+
+#if defined(__x86_64__)
+
+struct extra_kernel_map_info {
+ int cnt;
+ int max_cnt;
+ struct extra_kernel_map *maps;
+ bool get_entry_trampolines;
+ u64 entry_trampoline;
+};
+
+static int add_extra_kernel_map(struct extra_kernel_map_info *mi, u64 start,
+ u64 end, u64 pgoff, const char *name)
+{
+ if (mi->cnt >= mi->max_cnt) {
+ void *buf;
+ size_t sz;
+
+ mi->max_cnt = mi->max_cnt ? mi->max_cnt * 2 : 32;
+ sz = sizeof(struct extra_kernel_map) * mi->max_cnt;
+ buf = realloc(mi->maps, sz);
+ if (!buf)
+ return -1;
+ mi->maps = buf;
+ }
+
+ mi->maps[mi->cnt].start = start;
+ mi->maps[mi->cnt].end = end;
+ mi->maps[mi->cnt].pgoff = pgoff;
+ strlcpy(mi->maps[mi->cnt].name, name, KMAP_NAME_LEN);
+
+ mi->cnt += 1;
+
+ return 0;
+}
+
+static int find_extra_kernel_maps(void *arg, const char *name, char type,
+ u64 start)
+{
+ struct extra_kernel_map_info *mi = arg;
+
+ if (!mi->entry_trampoline && kallsyms2elf_binding(type) == STB_GLOBAL &&
+ !strcmp(name, "_entry_trampoline")) {
+ mi->entry_trampoline = start;
+ return 0;
+ }
+
+ if (is_entry_trampoline(name)) {
+ u64 end = start + page_size;
+
+ return add_extra_kernel_map(mi, start, end, 0, name);
+ }
+
+ return 0;
+}
+
+int machine__create_extra_kernel_maps(struct machine *machine,
+ struct dso *kernel)
+{
+ struct extra_kernel_map_info mi = { .cnt = 0, };
+ char filename[PATH_MAX];
+ int ret;
+ int i;
+
+ machine__get_kallsyms_filename(machine, filename, PATH_MAX);
+
+ if (symbol__restricted_filename(filename, "/proc/kallsyms"))
+ return 0;
+
+ ret = kallsyms__parse(filename, &mi, find_extra_kernel_maps);
+ if (ret)
+ goto out_free;
+
+ if (!mi.entry_trampoline)
+ goto out_free;
+
+ for (i = 0; i < mi.cnt; i++) {
+ struct extra_kernel_map *xm = &mi.maps[i];
+
+ xm->pgoff = mi.entry_trampoline;
+ ret = machine__create_extra_kernel_map(machine, kernel, xm);
+ if (ret)
+ goto out_free;
+ }
+
+ machine->trampolines_mapped = mi.cnt;
+out_free:
+ free(mi.maps);
+ return ret;
+}
+
+#endif
diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c
index 944070e98a2c..63eb49082774 100644
--- a/tools/perf/bench/numa.c
+++ b/tools/perf/bench/numa.c
@@ -175,7 +175,7 @@ static const struct option options[] = {
OPT_UINTEGER('s', "nr_secs" , &p0.nr_secs, "max number of seconds to run (default: 5 secs)"),
OPT_UINTEGER('u', "usleep" , &p0.sleep_usecs, "usecs to sleep per loop iteration"),
- OPT_BOOLEAN('R', "data_reads" , &p0.data_reads, "access the data via writes (can be mixed with -W)"),
+ OPT_BOOLEAN('R', "data_reads" , &p0.data_reads, "access the data via reads (can be mixed with -W)"),
OPT_BOOLEAN('W', "data_writes" , &p0.data_writes, "access the data via writes (can be mixed with -R)"),
OPT_BOOLEAN('B', "data_backwards", &p0.data_backwards, "access the data backwards as well"),
OPT_BOOLEAN('Z', "data_zero_memset", &p0.data_zero_memset,"access the data via glibc bzero only"),
diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index 51709a961496..da5704240239 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -45,6 +45,7 @@ struct perf_annotate {
bool print_line;
bool skip_missing;
bool has_br_stack;
+ bool group_set;
const char *sym_hist_filter;
const char *cpu_list;
DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
@@ -228,7 +229,7 @@ static int perf_evsel__add_sample(struct perf_evsel *evsel,
*/
if (al->sym != NULL) {
rb_erase(&al->sym->rb_node,
- &al->map->dso->symbols[al->map->type]);
+ &al->map->dso->symbols);
symbol__delete(al->sym);
dso__reset_find_symbol_cache(al->map->dso);
}
@@ -508,6 +509,9 @@ int cmd_annotate(int argc, const char **argv)
"Don't shorten the displayed pathnames"),
OPT_BOOLEAN(0, "skip-missing", &annotate.skip_missing,
"Skip symbols that cannot be annotated"),
+ OPT_BOOLEAN_SET(0, "group", &symbol_conf.event_group,
+ &annotate.group_set,
+ "Show event group information together"),
OPT_STRING('C', "cpu", &annotate.cpu_list, "cpu", "list of cpus to profile"),
OPT_CALLBACK(0, "symfs", NULL, "directory",
"Look for files with symbols relative to this directory",
@@ -570,6 +574,9 @@ int cmd_annotate(int argc, const char **argv)
annotate.has_br_stack = perf_header__has_feat(&annotate.session->header,
HEADER_BRANCH_STACK);
+ if (annotate.group_set)
+ perf_evlist__force_leader(annotate.session->evlist);
+
ret = symbol__annotation_init();
if (ret < 0)
goto out_delete;
diff --git a/tools/perf/builtin-buildid-cache.c b/tools/perf/builtin-buildid-cache.c
index 41db2cba77eb..115110a4796a 100644
--- a/tools/perf/builtin-buildid-cache.c
+++ b/tools/perf/builtin-buildid-cache.c
@@ -25,6 +25,7 @@
#include "util/session.h"
#include "util/symbol.h"
#include "util/time-utils.h"
+#include "util/probe-file.h"
static int build_id_cache__kcore_buildid(const char *proc_dir, char *sbuildid)
{
@@ -239,6 +240,34 @@ out:
return err;
}
+static int build_id_cache__purge_all(void)
+{
+ struct strlist *list;
+ struct str_node *pos;
+ int err = 0;
+ char *buf;
+
+ list = build_id_cache__list_all(false);
+ if (!list) {
+ pr_debug("Failed to get buildids: -%d\n", errno);
+ return -EINVAL;
+ }
+
+ strlist__for_each_entry(pos, list) {
+ buf = build_id_cache__origname(pos->s);
+ err = build_id_cache__remove_s(pos->s);
+ pr_debug("Removing %s (%s): %s\n", buf, pos->s,
+ err ? "FAIL" : "Ok");
+ free(buf);
+ if (err)
+ break;
+ }
+ strlist__delete(list);
+
+ pr_debug("Purged all: %s\n", err ? "FAIL" : "Ok");
+ return err;
+}
+
static bool dso__missing_buildid_cache(struct dso *dso, int parm __maybe_unused)
{
char filename[PATH_MAX];
@@ -297,6 +326,26 @@ static int build_id_cache__update_file(const char *filename, struct nsinfo *nsi)
return err;
}
+static int build_id_cache__show_all(void)
+{
+ struct strlist *bidlist;
+ struct str_node *nd;
+ char *buf;
+
+ bidlist = build_id_cache__list_all(true);
+ if (!bidlist) {
+ pr_debug("Failed to get buildids: -%d\n", errno);
+ return -1;
+ }
+ strlist__for_each_entry(nd, bidlist) {
+ buf = build_id_cache__origname(nd->s);
+ fprintf(stdout, "%s %s\n", nd->s, buf);
+ free(buf);
+ }
+ strlist__delete(bidlist);
+ return 0;
+}
+
int cmd_buildid_cache(int argc, const char **argv)
{
struct strlist *list;
@@ -304,6 +353,9 @@ int cmd_buildid_cache(int argc, const char **argv)
int ret = 0;
int ns_id = -1;
bool force = false;
+ bool list_files = false;
+ bool opts_flag = false;
+ bool purge_all = false;
char const *add_name_list_str = NULL,
*remove_name_list_str = NULL,
*purge_name_list_str = NULL,
@@ -327,6 +379,8 @@ int cmd_buildid_cache(int argc, const char **argv)
"file(s) to remove"),
OPT_STRING('p', "purge", &purge_name_list_str, "file list",
"file(s) to remove (remove old caches too)"),
+ OPT_BOOLEAN('P', "purge-all", &purge_all, "purge all cached files"),
+ OPT_BOOLEAN('l', "list", &list_files, "list all cached files"),
OPT_STRING('M', "missing", &missing_filename, "file",
"to find missing build ids in the cache"),
OPT_BOOLEAN('f', "force", &force, "don't complain, do it"),
@@ -344,11 +398,20 @@ int cmd_buildid_cache(int argc, const char **argv)
argc = parse_options(argc, argv, buildid_cache_options,
buildid_cache_usage, 0);
- if (argc || (!add_name_list_str && !kcore_filename &&
- !remove_name_list_str && !purge_name_list_str &&
- !missing_filename && !update_name_list_str))
+ opts_flag = add_name_list_str || kcore_filename ||
+ remove_name_list_str || purge_name_list_str ||
+ missing_filename || update_name_list_str ||
+ purge_all;
+
+ if (argc || !(list_files || opts_flag))
usage_with_options(buildid_cache_usage, buildid_cache_options);
+ /* -l is exclusive. It can not be used with other options. */
+ if (list_files && opts_flag) {
+ usage_with_options_msg(buildid_cache_usage,
+ buildid_cache_options, "-l is exclusive.\n");
+ }
+
if (ns_id > 0)
nsi = nsinfo__new(ns_id);
@@ -366,6 +429,11 @@ int cmd_buildid_cache(int argc, const char **argv)
setup_pager();
+ if (list_files) {
+ ret = build_id_cache__show_all();
+ goto out;
+ }
+
if (add_name_list_str) {
list = strlist__new(add_name_list_str, NULL);
if (list) {
@@ -420,6 +488,13 @@ int cmd_buildid_cache(int argc, const char **argv)
}
}
+ if (purge_all) {
+ if (build_id_cache__purge_all()) {
+ pr_warning("Couldn't remove some caches. Error: %s.\n",
+ str_error_r(errno, sbuf, sizeof(sbuf)));
+ }
+ }
+
if (missing_filename)
ret = build_id_cache__fprintf_missing(session, stdout);
diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
index 40fe919bbcf3..a3b346359ba0 100644
--- a/tools/perf/builtin-inject.c
+++ b/tools/perf/builtin-inject.c
@@ -440,9 +440,7 @@ static int perf_event__inject_buildid(struct perf_tool *tool,
goto repipe;
}
- thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, sample->ip, &al);
-
- if (al.map != NULL) {
+ if (thread__find_map(thread, sample->cpumode, sample->ip, &al)) {
if (!al.map->dso->hit) {
al.map->dso->hit = 1;
if (map__load(al.map) >= 0) {
diff --git a/tools/perf/builtin-kallsyms.c b/tools/perf/builtin-kallsyms.c
index bcfb363112d3..90d1a2305b72 100644
--- a/tools/perf/builtin-kallsyms.c
+++ b/tools/perf/builtin-kallsyms.c
@@ -27,7 +27,7 @@ static int __cmd_kallsyms(int argc, const char **argv)
for (i = 0; i < argc; ++i) {
struct map *map;
- struct symbol *symbol = machine__find_kernel_function_by_name(machine, argv[i], &map);
+ struct symbol *symbol = machine__find_kernel_symbol_by_name(machine, argv[i], &map);
if (symbol == NULL) {
printf("%s: not found\n", argv[i]);
diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index ae11e4c3516a..54d3f21b0e62 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c
@@ -1004,7 +1004,7 @@ static void __print_slab_result(struct rb_root *root,
if (is_caller) {
addr = data->call_site;
if (!raw_ip)
- sym = machine__find_kernel_function(machine, addr, &map);
+ sym = machine__find_kernel_symbol(machine, addr, &map);
} else
addr = data->ptr;
@@ -1068,7 +1068,7 @@ static void __print_page_alloc_result(struct perf_session *session, int n_lines)
char *caller = buf;
data = rb_entry(next, struct page_stat, node);
- sym = machine__find_kernel_function(machine, data->callsite, &map);
+ sym = machine__find_kernel_symbol(machine, data->callsite, &map);
if (sym)
caller = sym->name;
else
@@ -1110,7 +1110,7 @@ static void __print_page_caller_result(struct perf_session *session, int n_lines
char *caller = buf;
data = rb_entry(next, struct page_stat, node);
- sym = machine__find_kernel_function(machine, data->callsite, &map);
+ sym = machine__find_kernel_symbol(machine, data->callsite, &map);
if (sym)
caller = sym->name;
else
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 0f198f6d9b77..ad978e3ee2b8 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -194,20 +194,11 @@ out:
return err;
}
-/*
- * Events in data file are not collect in groups, but we still want
- * the group display. Set the artificial group and set the leader's
- * forced_leader flag to notify the display code.
- */
static void setup_forced_leader(struct report *report,
struct perf_evlist *evlist)
{
- if (report->group_set && !evlist->nr_groups) {
- struct perf_evsel *leader = perf_evlist__first(evlist);
-
- perf_evlist__set_leader(evlist);
- leader->forced_leader = true;
- }
+ if (report->group_set)
+ perf_evlist__force_leader(evlist);
}
static int process_feature_event(struct perf_tool *tool,
@@ -523,12 +514,9 @@ static void report__warn_kptr_restrict(const struct report *rep)
"As no suitable kallsyms nor vmlinux was found, kernel samples\n"
"can't be resolved.";
- if (kernel_map) {
- const struct dso *kdso = kernel_map->dso;
- if (!RB_EMPTY_ROOT(&kdso->symbols[MAP__FUNCTION])) {
- desc = "If some relocation was applied (e.g. "
- "kexec) symbols may be misresolved.";
- }
+ if (kernel_map && map__has_symbols(kernel_map)) {
+ desc = "If some relocation was applied (e.g. "
+ "kexec) symbols may be misresolved.";
}
ui__warning(
@@ -718,10 +706,7 @@ static size_t maps__fprintf_task(struct maps *maps, int indent, FILE *fp)
static int map_groups__fprintf_task(struct map_groups *mg, int indent, FILE *fp)
{
- int printed = 0, i;
- for (i = 0; i < MAP__NR_TYPES; ++i)
- printed += maps__fprintf_task(&mg->maps[i], indent, fp);
- return printed;
+ return maps__fprintf_task(&mg->maps, indent, fp);
}
static void task__print_level(struct task *task, FILE *fp, int level)
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index e0a9845b6cbc..cefc8813e91e 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -153,8 +153,8 @@ static struct {
.fields = PERF_OUTPUT_COMM | PERF_OUTPUT_TID |
PERF_OUTPUT_CPU | PERF_OUTPUT_TIME |
PERF_OUTPUT_EVNAME | PERF_OUTPUT_IP |
- PERF_OUTPUT_SYM | PERF_OUTPUT_DSO |
- PERF_OUTPUT_PERIOD,
+ PERF_OUTPUT_SYM | PERF_OUTPUT_SYMOFFSET |
+ PERF_OUTPUT_DSO | PERF_OUTPUT_PERIOD,
.invalid_fields = PERF_OUTPUT_TRACE | PERF_OUTPUT_BPF_OUTPUT,
},
@@ -165,8 +165,9 @@ static struct {
.fields = PERF_OUTPUT_COMM | PERF_OUTPUT_TID |
PERF_OUTPUT_CPU | PERF_OUTPUT_TIME |
PERF_OUTPUT_EVNAME | PERF_OUTPUT_IP |
- PERF_OUTPUT_SYM | PERF_OUTPUT_DSO |
- PERF_OUTPUT_PERIOD | PERF_OUTPUT_BPF_OUTPUT,
+ PERF_OUTPUT_SYM | PERF_OUTPUT_SYMOFFSET |
+ PERF_OUTPUT_DSO | PERF_OUTPUT_PERIOD |
+ PERF_OUTPUT_BPF_OUTPUT,
.invalid_fields = PERF_OUTPUT_TRACE,
},
@@ -185,10 +186,10 @@ static struct {
.fields = PERF_OUTPUT_COMM | PERF_OUTPUT_TID |
PERF_OUTPUT_CPU | PERF_OUTPUT_TIME |
PERF_OUTPUT_EVNAME | PERF_OUTPUT_IP |
- PERF_OUTPUT_SYM | PERF_OUTPUT_DSO |
- PERF_OUTPUT_PERIOD | PERF_OUTPUT_ADDR |
- PERF_OUTPUT_DATA_SRC | PERF_OUTPUT_WEIGHT |
- PERF_OUTPUT_PHYS_ADDR,
+ PERF_OUTPUT_SYM | PERF_OUTPUT_SYMOFFSET |
+ PERF_OUTPUT_DSO | PERF_OUTPUT_PERIOD |
+ PERF_OUTPUT_ADDR | PERF_OUTPUT_DATA_SRC |
+ PERF_OUTPUT_WEIGHT | PERF_OUTPUT_PHYS_ADDR,
.invalid_fields = PERF_OUTPUT_TRACE | PERF_OUTPUT_BPF_OUTPUT,
},
@@ -199,8 +200,8 @@ static struct {
.fields = PERF_OUTPUT_COMM | PERF_OUTPUT_TID |
PERF_OUTPUT_CPU | PERF_OUTPUT_TIME |
PERF_OUTPUT_EVNAME | PERF_OUTPUT_IP |
- PERF_OUTPUT_SYM | PERF_OUTPUT_DSO |
- PERF_OUTPUT_PERIOD,
+ PERF_OUTPUT_SYM | PERF_OUTPUT_SYMOFFSET |
+ PERF_OUTPUT_DSO | PERF_OUTPUT_PERIOD,
.invalid_fields = PERF_OUTPUT_TRACE | PERF_OUTPUT_BPF_OUTPUT,
},
@@ -211,8 +212,8 @@ static struct {
.fields = PERF_OUTPUT_COMM | PERF_OUTPUT_TID |
PERF_OUTPUT_CPU | PERF_OUTPUT_TIME |
PERF_OUTPUT_EVNAME | PERF_OUTPUT_IP |
- PERF_OUTPUT_SYM | PERF_OUTPUT_DSO |
- PERF_OUTPUT_SYNTH,
+ PERF_OUTPUT_SYM | PERF_OUTPUT_SYMOFFSET |
+ PERF_OUTPUT_DSO | PERF_OUTPUT_SYNTH,
.invalid_fields = PERF_OUTPUT_TRACE | PERF_OUTPUT_BPF_OUTPUT,
},
@@ -544,6 +545,7 @@ static int perf_session__check_output_opt(struct perf_session *session)
if (attr->sample_type & PERF_SAMPLE_CALLCHAIN) {
output[j].fields |= PERF_OUTPUT_IP;
output[j].fields |= PERF_OUTPUT_SYM;
+ output[j].fields |= PERF_OUTPUT_SYMOFFSET;
output[j].fields |= PERF_OUTPUT_DSO;
set_print_ip_opts(attr);
goto out;
@@ -717,8 +719,8 @@ static int perf_sample__fprintf_brstack(struct perf_sample *sample,
if (PRINT_FIELD(DSO)) {
memset(&alf, 0, sizeof(alf));
memset(&alt, 0, sizeof(alt));
- thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, from, &alf);
- thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, to, &alt);
+ thread__find_map(thread, sample->cpumode, from, &alf);
+ thread__find_map(thread, sample->cpumode, to, &alt);
}
printed += fprintf(fp, " 0x%"PRIx64, from);
@@ -764,13 +766,8 @@ static int perf_sample__fprintf_brstacksym(struct perf_sample *sample,
from = br->entries[i].from;
to = br->entries[i].to;
- thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, from, &alf);
- if (alf.map)
- alf.sym = map__find_symbol(alf.map, alf.addr);
-
- thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, to, &alt);
- if (alt.map)
- alt.sym = map__find_symbol(alt.map, alt.addr);
+ thread__find_symbol(thread, sample->cpumode, from, &alf);
+ thread__find_symbol(thread, sample->cpumode, to, &alt);
printed += symbol__fprintf_symname_offs(alf.sym, &alf, fp);
if (PRINT_FIELD(DSO)) {
@@ -814,12 +811,12 @@ static int perf_sample__fprintf_brstackoff(struct perf_sample *sample,
from = br->entries[i].from;
to = br->entries[i].to;
- thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, from, &alf);
- if (alf.map && !alf.map->dso->adjust_symbols)
+ if (thread__find_map(thread, sample->cpumode, from, &alf) &&
+ !alf.map->dso->adjust_symbols)
from = map__map_ip(alf.map, from);
- thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, to, &alt);
- if (alt.map && !alt.map->dso->adjust_symbols)
+ if (thread__find_map(thread, sample->cpumode, to, &alt) &&
+ !alt.map->dso->adjust_symbols)
to = map__map_ip(alt.map, to);
printed += fprintf(fp, " 0x%"PRIx64, from);
@@ -882,8 +879,7 @@ static int grab_bb(u8 *buffer, u64 start, u64 end,
return 0;
}
- thread__find_addr_map(thread, *cpumode, MAP__FUNCTION, start, &al);
- if (!al.map || !al.map->dso) {
+ if (!thread__find_map(thread, *cpumode, start, &al) || !al.map->dso) {
pr_debug("\tcannot resolve %" PRIx64 "-%" PRIx64 "\n", start, end);
return 0;
}
@@ -933,10 +929,8 @@ static int ip__fprintf_sym(uint64_t addr, struct thread *thread,
memset(&al, 0, sizeof(al));
- thread__find_addr_map(thread, cpumode, MAP__FUNCTION, addr, &al);
- if (!al.map)
- thread__find_addr_map(thread, cpumode, MAP__VARIABLE,
- addr, &al);
+ thread__find_map(thread, cpumode, addr, &al);
+
if ((*lastsym) && al.addr >= (*lastsym)->start && al.addr < (*lastsym)->end)
return 0;
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index f17dc601b0f3..a4f662a462c6 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -164,6 +164,7 @@ static bool forever = false;
static bool metric_only = false;
static bool force_metric_only = false;
static bool no_merge = false;
+static bool walltime_run_table = false;
static struct timespec ref_time;
static struct cpu_map *aggr_map;
static aggr_get_id_t aggr_get_id;
@@ -173,6 +174,7 @@ static const char *output_name;
static int output_fd;
static int print_free_counters_hint;
static int print_mixed_hw_group_error;
+static u64 *walltime_run;
struct perf_stat {
bool record;
@@ -569,7 +571,7 @@ static struct perf_evsel *perf_evsel__reset_weak_group(struct perf_evsel *evsel)
return leader;
}
-static int __run_perf_stat(int argc, const char **argv)
+static int __run_perf_stat(int argc, const char **argv, int run_idx)
{
int interval = stat_config.interval;
int times = stat_config.times;
@@ -752,6 +754,9 @@ try_again:
t1 = rdclock();
+ if (walltime_run_table)
+ walltime_run[run_idx] = t1 - t0;
+
update_stats(&walltime_nsecs_stats, t1 - t0);
/*
@@ -766,7 +771,7 @@ try_again:
return WEXITSTATUS(status);
}
-static int run_perf_stat(int argc, const char **argv)
+static int run_perf_stat(int argc, const char **argv, int run_idx)
{
int ret;
@@ -779,7 +784,7 @@ static int run_perf_stat(int argc, const char **argv)
if (sync_run)
sync();
- ret = __run_perf_stat(argc, argv);
+ ret = __run_perf_stat(argc, argv, run_idx);
if (ret)
return ret;
@@ -1764,19 +1769,67 @@ static void print_header(int argc, const char **argv)
}
}
+static int get_precision(double num)
+{
+ if (num > 1)
+ return 0;
+
+ return lround(ceil(-log10(num)));
+}
+
+static void print_table(FILE *output, int precision, double avg)
+{
+ char tmp[64];
+ int idx, indent = 0;
+
+ scnprintf(tmp, 64, " %17.*f", precision, avg);
+ while (tmp[indent] == ' ')
+ indent++;
+
+ fprintf(output, "%*s# Table of individual measurements:\n", indent, "");
+
+ for (idx = 0; idx < run_count; idx++) {
+ double run = (double) walltime_run[idx] / NSEC_PER_SEC;
+ int h, n = 1 + abs((int) (100.0 * (run - avg)/run) / 5);
+
+ fprintf(output, " %17.*f (%+.*f) ",
+ precision, run, precision, run - avg);
+
+ for (h = 0; h < n; h++)
+ fprintf(output, "#");
+
+ fprintf(output, "\n");
+ }
+
+ fprintf(output, "\n%*s# Final result:\n", indent, "");
+}
+
static void print_footer(void)
{
+ double avg = avg_stats(&walltime_nsecs_stats) / NSEC_PER_SEC;
FILE *output = stat_config.output;
int n;
if (!null_run)
fprintf(output, "\n");
- fprintf(output, " %17.9f seconds time elapsed",
- avg_stats(&walltime_nsecs_stats) / NSEC_PER_SEC);
- if (run_count > 1) {
- fprintf(output, " ");
- print_noise_pct(stddev_stats(&walltime_nsecs_stats),
- avg_stats(&walltime_nsecs_stats));
+
+ if (run_count == 1) {
+ fprintf(output, " %17.9f seconds time elapsed", avg);
+ } else {
+ double sd = stddev_stats(&walltime_nsecs_stats) / NSEC_PER_SEC;
+ /*
+ * Display at most 2 more significant
+ * digits than the stddev inaccuracy.
+ */
+ int precision = get_precision(sd) + 2;
+
+ if (walltime_run_table)
+ print_table(output, precision, avg);
+
+ fprintf(output, " %17.*f +- %.*f seconds time elapsed",
+ precision, avg, precision, sd);
+
+ print_noise_pct(sd, avg);
}
fprintf(output, "\n\n");
@@ -1952,6 +2005,8 @@ static const struct option stat_options[] = {
"be more verbose (show counter open errors, etc)"),
OPT_INTEGER('r', "repeat", &run_count,
"repeat command and print average + stddev (max: 100, forever: 0)"),
+ OPT_BOOLEAN(0, "table", &walltime_run_table,
+ "display details about each run (only with -r option)"),
OPT_BOOLEAN('n', "null", &null_run,
"null run - dont start any counters"),
OPT_INCR('d', "detailed", &detailed_run,
@@ -2843,6 +2898,13 @@ int cmd_stat(int argc, const char **argv)
goto out;
}
+ if (walltime_run_table && run_count <= 1) {
+ fprintf(stderr, "--table is only supported with -r\n");
+ parse_options_usage(stat_usage, stat_options, "r", 1);
+ parse_options_usage(NULL, stat_options, "table", 0);
+ goto out;
+ }
+
if (output_fd < 0) {
fprintf(stderr, "argument to --log-fd must be a > 0\n");
parse_options_usage(stat_usage, stat_options, "log-fd", 0);
@@ -2897,6 +2959,14 @@ int cmd_stat(int argc, const char **argv)
run_count = 1;
}
+ if (walltime_run_table) {
+ walltime_run = zalloc(run_count * sizeof(walltime_run[0]));
+ if (!walltime_run) {
+ pr_err("failed to setup -r option");
+ goto out;
+ }
+ }
+
if ((stat_config.aggr_mode == AGGR_THREAD) &&
!target__has_task(&target)) {
if (!target.system_wide || target.cpu_list) {
@@ -3012,7 +3082,7 @@ int cmd_stat(int argc, const char **argv)
fprintf(output, "[ perf stat: executing run #%d ... ]\n",
run_idx + 1);
- status = run_perf_stat(argc, argv);
+ status = run_perf_stat(argc, argv, run_idx);
if (forever && status != -1) {
print_counters(NULL, argc, argv);
perf_stat__reset_stats();
@@ -3060,6 +3130,8 @@ int cmd_stat(int argc, const char **argv)
perf_stat__exit_aggr_mode();
perf_evlist__free_stats(evsel_list);
out:
+ free(walltime_run);
+
if (smi_cost && smi_reset)
sysfs__write_int(FREEZE_ON_SMI_PATH, 0);
diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c
index 813698a9b8c7..a827919c6263 100644
--- a/tools/perf/builtin-timechart.c
+++ b/tools/perf/builtin-timechart.c
@@ -533,12 +533,8 @@ static const char *cat_backtrace(union perf_event *event,
}
tal.filtered = 0;
- thread__find_addr_location(al.thread, cpumode,
- MAP__FUNCTION, ip, &tal);
-
- if (tal.sym)
- fprintf(f, "..... %016" PRIx64 " %s\n", ip,
- tal.sym->name);
+ if (thread__find_symbol(al.thread, cpumode, ip, &tal))
+ fprintf(f, "..... %016" PRIx64 " %s\n", ip, tal.sym->name);
else
fprintf(f, "..... %016" PRIx64 "\n", ip);
}
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index f39bd60d2708..7a349fcd3864 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -742,7 +742,7 @@ static void perf_event__process_sample(struct perf_tool *tool,
"Kernel address maps (/proc/{kallsyms,modules}) are restricted.\n\n"
"Check /proc/sys/kernel/kptr_restrict.\n\n"
"Kernel%s samples will not be resolved.\n",
- al.map && !RB_EMPTY_ROOT(&al.map->dso->symbols[MAP__FUNCTION]) ?
+ al.map && map__has_symbols(al.map) ?
" modules" : "");
if (use_browser <= 0)
sleep(5);
@@ -750,7 +750,7 @@ static void perf_event__process_sample(struct perf_tool *tool,
machine->kptr_restrict_warned = true;
}
- if (al.sym == NULL) {
+ if (al.sym == NULL && al.map != NULL) {
const char *msg = "Kernel samples will not be resolved.\n";
/*
* As we do lazy loading of symtabs we only will know if the
@@ -764,8 +764,7 @@ static void perf_event__process_sample(struct perf_tool *tool,
* invalid --vmlinux ;-)
*/
if (!machine->kptr_restrict_warned && !top->vmlinux_warned &&
- al.map == machine->vmlinux_maps[MAP__FUNCTION] &&
- RB_EMPTY_ROOT(&al.map->dso->symbols[MAP__FUNCTION])) {
+ __map__is_kernel(al.map) && map__has_symbols(al.map)) {
if (symbol_conf.vmlinux_name) {
char serr[256];
dso__strerror_load(al.map->dso, serr, sizeof(serr));
@@ -1265,7 +1264,7 @@ int cmd_top(int argc, const char **argv)
.proc_map_timeout = 500,
.overwrite = 1,
},
- .max_stack = sysctl_perf_event_max_stack,
+ .max_stack = sysctl__max_stack(),
.sym_pcnt_filter = 5,
.nr_threads_synthesize = UINT_MAX,
};
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 3ad17ee89403..560aed7da36a 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -2024,8 +2024,7 @@ static int trace__pgfault(struct trace *trace,
if (trace->summary_only)
goto out;
- thread__find_addr_location(thread, sample->cpumode, MAP__FUNCTION,
- sample->ip, &al);
+ thread__find_symbol(thread, sample->cpumode, sample->ip, &al);
trace__fprintf_entry_head(trace, thread, 0, true, sample->time, trace->output);
@@ -2037,12 +2036,10 @@ static int trace__pgfault(struct trace *trace,
fprintf(trace->output, "] => ");
- thread__find_addr_location(thread, sample->cpumode, MAP__VARIABLE,
- sample->addr, &al);
+ thread__find_symbol(thread, sample->cpumode, sample->addr, &al);
if (!al.map) {
- thread__find_addr_location(thread, sample->cpumode,
- MAP__FUNCTION, sample->addr, &al);
+ thread__find_symbol(thread, sample->cpumode, sample->addr, &al);
if (al.map)
map_type = 'x';
@@ -3165,7 +3162,7 @@ int cmd_trace(int argc, const char **argv)
mmap_pages_user_set = false;
if (trace.max_stack == UINT_MAX) {
- trace.max_stack = input_name ? PERF_MAX_STACK_DEPTH : sysctl_perf_event_max_stack;
+ trace.max_stack = input_name ? PERF_MAX_STACK_DEPTH : sysctl__max_stack();
max_stack_user_set = false;
}
diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh
index 9aff89bc7535..10f333e2e825 100755
--- a/tools/perf/check-headers.sh
+++ b/tools/perf/check-headers.sh
@@ -55,22 +55,26 @@ include/uapi/asm-generic/ioctls.h
include/uapi/asm-generic/mman-common.h
'
-check () {
- file=$1
+check_2 () {
+ file1=$1
+ file2=$2
shift
- opts=
- while [ -n "$*" ]; do
- opts="$opts \"$1\""
- shift
- done
+ shift
- cmd="diff $opts ../$file ../../$file > /dev/null"
+ cmd="diff $* $file1 $file2 > /dev/null"
- test -f ../../$file &&
+ test -f $file2 &&
eval $cmd || echo "Warning: Kernel ABI header at 'tools/$file' differs from latest version at '$file'" >&2
}
+check () {
+ file=$1
+
+ shift
+
+ check_2 ../$file ../../$file $*
+}
# Check if we have the kernel headers (tools/perf/../../include), else
# we're probably on a detached tarball, so no point in trying to check
@@ -83,7 +87,7 @@ for i in $HEADERS; do
done
# diff with extra ignore lines
-check arch/x86/lib/memcpy_64.S -I "^EXPORT_SYMBOL" -I "^#include <asm/export.h>"
-check arch/x86/lib/memset_64.S -I "^EXPORT_SYMBOL" -I "^#include <asm/export.h>"
-check include/uapi/asm-generic/mman.h -I "^#include <\(uapi/\)*asm-generic/mman-common.h>"
-check include/uapi/linux/mman.h -I "^#include <\(uapi/\)*asm/mman.h>"
+check arch/x86/lib/memcpy_64.S '-I "^EXPORT_SYMBOL" -I "^#include <asm/export.h>"'
+check arch/x86/lib/memset_64.S '-I "^EXPORT_SYMBOL" -I "^#include <asm/export.h>"'
+check include/uapi/asm-generic/mman.h '-I "^#include <\(uapi/\)*asm-generic/mman-common.h>"'
+check include/uapi/linux/mman.h '-I "^#include <\(uapi/\)*asm/mman.h>"'
diff --git a/tools/perf/examples/bpf/5sec.c b/tools/perf/examples/bpf/5sec.c
new file mode 100644
index 000000000000..b9c203219691
--- /dev/null
+++ b/tools/perf/examples/bpf/5sec.c
@@ -0,0 +1,49 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ Description:
+
+ . Disable strace like syscall tracing (--no-syscalls), or try tracing
+ just some (-e *sleep).
+
+ . Attach a filter function to a kernel function, returning when it should
+ be considered, i.e. appear on the output.
+
+ . Run it system wide, so that any sleep of >= 5 seconds and < than 6
+ seconds gets caught.
+
+ . Ask for callgraphs using DWARF info, so that userspace can be unwound
+
+ . While this is running, run something like "sleep 5s".
+
+ . If we decide to add tv_nsec as well, then it becomes:
+
+ int probe(hrtimer_nanosleep, rqtp->tv_sec rqtp->tv_nsec)(void *ctx, int err, long sec, long nsec)
+
+ I.e. add where it comes from (rqtp->tv_nsec) and where it will be
+ accessible in the function body (nsec)
+
+ # perf trace --no-syscalls -e tools/perf/examples/bpf/5sec.c/call-graph=dwarf/
+ 0.000 perf_bpf_probe:func:(ffffffff9811b5f0) tv_sec=5
+ hrtimer_nanosleep ([kernel.kallsyms])
+ __x64_sys_nanosleep ([kernel.kallsyms])
+ do_syscall_64 ([kernel.kallsyms])
+ entry_SYSCALL_64 ([kernel.kallsyms])
+ __GI___nanosleep (/usr/lib64/libc-2.26.so)
+ rpl_nanosleep (/usr/bin/sleep)
+ xnanosleep (/usr/bin/sleep)
+ main (/usr/bin/sleep)
+ __libc_start_main (/usr/lib64/libc-2.26.so)
+ _start (/usr/bin/sleep)
+ ^C#
+
+ Copyright (C) 2018 Red Hat, Inc., Arnaldo Carvalho de Melo <acme@redhat.com>
+*/
+
+#include <bpf.h>
+
+int probe(hrtimer_nanosleep, rqtp->tv_sec)(void *ctx, int err, long sec)
+{
+ return sec == 5;
+}
+
+license(GPL);
diff --git a/tools/perf/examples/bpf/empty.c b/tools/perf/examples/bpf/empty.c
new file mode 100644
index 000000000000..3776d26db9e7
--- /dev/null
+++ b/tools/perf/examples/bpf/empty.c
@@ -0,0 +1,3 @@
+#include <bpf.h>
+
+license(GPL);
diff --git a/tools/perf/include/bpf/bpf.h b/tools/perf/include/bpf/bpf.h
new file mode 100644
index 000000000000..dd764ad5efdf
--- /dev/null
+++ b/tools/perf/include/bpf/bpf.h
@@ -0,0 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0
+#ifndef _PERF_BPF_H
+#define _PERF_BPF_H
+#define SEC(NAME) __attribute__((section(NAME), used))
+
+#define probe(function, vars) \
+ SEC(#function "=" #function " " #vars) function
+
+#define license(name) \
+char _license[] SEC("license") = #name; \
+int _version SEC("version") = LINUX_VERSION_CODE;
+
+#endif /* _PERF_BPF_H */
diff --git a/tools/perf/perf.c b/tools/perf/perf.c
index 20a08cb32332..51c81509a315 100644
--- a/tools/perf/perf.c
+++ b/tools/perf/perf.c
@@ -238,7 +238,7 @@ static int handle_options(const char ***argv, int *argc, int *envchanged)
(*argc)--;
} else if (strstarts(cmd, CMD_DEBUGFS_DIR)) {
tracing_path_set(cmd + strlen(CMD_DEBUGFS_DIR));
- fprintf(stderr, "dir: %s\n", tracing_path);
+ fprintf(stderr, "dir: %s\n", tracing_path_mount());
if (envchanged)
*envchanged = 1;
} else if (!strcmp(cmd, "--list-cmds")) {
@@ -421,22 +421,11 @@ void pthread__unblock_sigwinch(void)
pthread_sigmask(SIG_UNBLOCK, &set, NULL);
}
-#ifdef _SC_LEVEL1_DCACHE_LINESIZE
-#define cache_line_size(cacheline_sizep) *cacheline_sizep = sysconf(_SC_LEVEL1_DCACHE_LINESIZE)
-#else
-static void cache_line_size(int *cacheline_sizep)
-{
- if (sysfs__read_int("devices/system/cpu/cpu0/cache/index0/coherency_line_size", cacheline_sizep))
- pr_debug("cannot determine cache line size");
-}
-#endif
-
int main(int argc, const char **argv)
{
int err;
const char *cmd;
char sbuf[STRERR_BUFSIZE];
- int value;
/* libsubcmd init */
exec_cmd_init("perf", PREFIX, PERF_EXEC_PATH, EXEC_PATH_ENVIRONMENT);
@@ -444,13 +433,6 @@ int main(int argc, const char **argv)
/* The page_size is placed in util object. */
page_size = sysconf(_SC_PAGE_SIZE);
- cache_line_size(&cacheline_size);
-
- if (sysctl__read_int("kernel/perf_event_max_stack", &value) == 0)
- sysctl_perf_event_max_stack = value;
-
- if (sysctl__read_int("kernel/perf_event_max_contexts_per_stack", &value) == 0)
- sysctl_perf_event_max_contexts_per_stack = value;
cmd = extract_argv0_path(argv[0]);
if (!cmd)
@@ -458,15 +440,11 @@ int main(int argc, const char **argv)
srandom(time(NULL));
- perf_config__init();
err = perf_config(perf_default_config, NULL);
if (err)
return err;
set_buildid_dir(NULL);
- /* get debugfs/tracefs mount point from /proc/mounts */
- tracing_path_mount();
-
/*
* "perf-xxxx" is the same as "perf xxxx", but we obviously:
*
diff --git a/tools/perf/pmu-events/arch/x86/mapfile.csv b/tools/perf/pmu-events/arch/x86/mapfile.csv
index 93656f2fd53a..7e3cce3bcf3b 100644
--- a/tools/perf/pmu-events/arch/x86/mapfile.csv
+++ b/tools/perf/pmu-events/arch/x86/mapfile.csv
@@ -29,7 +29,6 @@ GenuineIntel-6-4D,v13,silvermont,core
GenuineIntel-6-4C,v13,silvermont,core
GenuineIntel-6-2A,v15,sandybridge,core
GenuineIntel-6-2C,v2,westmereep-dp,core
-GenuineIntel-6-2C,v2,westmereep-dp,core
GenuineIntel-6-25,v2,westmereep-sp,core
GenuineIntel-6-2F,v2,westmereex,core
GenuineIntel-6-55,v1,skylakex,core
diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c
index cac8f8889bc3..2bde505e2e7e 100644
--- a/tools/perf/tests/builtin-test.c
+++ b/tools/perf/tests/builtin-test.c
@@ -654,6 +654,15 @@ static int perf_test__list(int argc, const char **argv)
continue;
pr_info("%2d: %s\n", i, t->desc);
+
+ if (t->subtest.get_nr) {
+ int subn = t->subtest.get_nr();
+ int subi;
+
+ for (subi = 0; subi < subn; subi++)
+ pr_info("%2d:%1d: %s\n", i, subi + 1,
+ t->subtest.get_desc(subi));
+ }
}
perf_test__list_shell(argc, argv, i);
diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c
index 99936352df4f..afa4ce21ba7c 100644
--- a/tools/perf/tests/code-reading.c
+++ b/tools/perf/tests/code-reading.c
@@ -236,14 +236,13 @@ static int read_object_code(u64 addr, size_t len, u8 cpumode,
pr_debug("Reading object code for memory address: %#"PRIx64"\n", addr);
- thread__find_addr_map(thread, cpumode, MAP__FUNCTION, addr, &al);
- if (!al.map || !al.map->dso) {
+ if (!thread__find_map(thread, cpumode, addr, &al) || !al.map->dso) {
if (cpumode == PERF_RECORD_MISC_HYPERVISOR) {
pr_debug("Hypervisor address can not be resolved - skipping\n");
return 0;
}
- pr_debug("thread__find_addr_map failed\n");
+ pr_debug("thread__find_map failed\n");
return -1;
}
diff --git a/tools/perf/tests/hists_common.c b/tools/perf/tests/hists_common.c
index f7c5b613d667..b889a28fd80b 100644
--- a/tools/perf/tests/hists_common.c
+++ b/tools/perf/tests/hists_common.c
@@ -131,20 +131,20 @@ struct machine *setup_fake_machine(struct machines *machines)
goto out;
/* emulate dso__load() */
- dso__set_loaded(dso, MAP__FUNCTION);
+ dso__set_loaded(dso);
for (k = 0; k < fake_symbols[i].nr_syms; k++) {
struct symbol *sym;
struct fake_sym *fsym = &fake_symbols[i].syms[k];
sym = symbol__new(fsym->start, fsym->length,
- STB_GLOBAL, fsym->name);
+ STB_GLOBAL, STT_FUNC, fsym->name);
if (sym == NULL) {
dso__put(dso);
goto out;
}
- symbols__insert(&dso->symbols[MAP__FUNCTION], sym);
+ symbols__insert(&dso->symbols, sym);
}
dso__put(dso);
diff --git a/tools/perf/tests/mmap-thread-lookup.c b/tools/perf/tests/mmap-thread-lookup.c
index 868d82b501f4..b1af2499a3c9 100644
--- a/tools/perf/tests/mmap-thread-lookup.c
+++ b/tools/perf/tests/mmap-thread-lookup.c
@@ -188,9 +188,8 @@ static int mmap_events(synth_cb synth)
pr_debug("looking for map %p\n", td->map);
- thread__find_addr_map(thread,
- PERF_RECORD_MISC_USER, MAP__FUNCTION,
- (unsigned long) (td->map + 1), &al);
+ thread__find_map(thread, PERF_RECORD_MISC_USER,
+ (unsigned long) (td->map + 1), &al);
thread__put(thread);
@@ -218,7 +217,7 @@ static int mmap_events(synth_cb synth)
* perf_event__synthesize_threads (global)
*
* We test we can find all memory maps via:
- * thread__find_addr_map
+ * thread__find_map
*
* by using all thread objects.
*/
diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c
index 18b06444f230..b9ebe15afb13 100644
--- a/tools/perf/tests/parse-events.c
+++ b/tools/perf/tests/parse-events.c
@@ -1309,18 +1309,26 @@ static int test__checkevent_config_cache(struct perf_evlist *evlist)
return 0;
}
+static int test__intel_pt(struct perf_evlist *evlist)
+{
+ struct perf_evsel *evsel = perf_evlist__first(evlist);
+
+ TEST_ASSERT_VAL("wrong name setting", strcmp(evsel->name, "intel_pt//u") == 0);
+ return 0;
+}
+
static int count_tracepoints(void)
{
struct dirent *events_ent;
DIR *events_dir;
int cnt = 0;
- events_dir = opendir(tracing_events_path);
+ events_dir = tracing_events__opendir();
TEST_ASSERT_VAL("Can't open events dir", events_dir);
while ((events_ent = readdir(events_dir))) {
- char sys_path[PATH_MAX];
+ char *sys_path;
struct dirent *sys_ent;
DIR *sys_dir;
@@ -1331,8 +1339,8 @@ static int count_tracepoints(void)
|| !strcmp(events_ent->d_name, "header_page"))
continue;
- scnprintf(sys_path, PATH_MAX, "%s/%s",
- tracing_events_path, events_ent->d_name);
+ sys_path = get_events_file(events_ent->d_name);
+ TEST_ASSERT_VAL("Can't get sys path", sys_path);
sys_dir = opendir(sys_path);
TEST_ASSERT_VAL("Can't open sys dir", sys_dir);
@@ -1348,6 +1356,7 @@ static int count_tracepoints(void)
}
closedir(sys_dir);
+ put_events_file(sys_path);
}
closedir(events_dir);
@@ -1637,6 +1646,11 @@ static struct evlist_test test__events[] = {
.check = test__checkevent_config_cache,
.id = 51,
},
+ {
+ .name = "intel_pt//u",
+ .check = test__intel_pt,
+ .id = 52,
+ },
};
static struct evlist_test test__events_pmu[] = {
diff --git a/tools/perf/tests/shell/record+probe_libc_inet_pton.sh b/tools/perf/tests/shell/record+probe_libc_inet_pton.sh
index 016882dbbc16..650b208f700f 100755
--- a/tools/perf/tests/shell/record+probe_libc_inet_pton.sh
+++ b/tools/perf/tests/shell/record+probe_libc_inet_pton.sh
@@ -16,18 +16,18 @@ nm -g $libc 2>/dev/null | fgrep -q inet_pton || exit 254
trace_libc_inet_pton_backtrace() {
idx=0
expected[0]="ping[][0-9 \.:]+probe_libc:inet_pton: \([[:xdigit:]]+\)"
- expected[1]=".*inet_pton[[:space:]]\($libc\)$"
+ expected[1]=".*inet_pton\+0x[[:xdigit:]]+[[:space:]]\($libc|inlined\)$"
case "$(uname -m)" in
s390x)
eventattr='call-graph=dwarf,max-stack=4'
- expected[2]="gaih_inet.*[[:space:]]\($libc|inlined\)$"
- expected[3]="(__GI_)?getaddrinfo[[:space:]]\($libc|inlined\)$"
- expected[4]="main[[:space:]]\(.*/bin/ping.*\)$"
+ expected[2]="gaih_inet.*\+0x[[:xdigit:]]+[[:space:]]\($libc|inlined\)$"
+ expected[3]="(__GI_)?getaddrinfo\+0x[[:xdigit:]]+[[:space:]]\($libc|inlined\)$"
+ expected[4]="main\+0x[[:xdigit:]]+[[:space:]]\(.*/bin/ping.*\)$"
;;
*)
eventattr='max-stack=3'
- expected[2]="getaddrinfo[[:space:]]\($libc\)$"
- expected[3]=".*\(.*/bin/ping.*\)$"
+ expected[2]="getaddrinfo\+0x[[:xdigit:]]+[[:space:]]\($libc\)$"
+ expected[3]=".*\+0x[[:xdigit:]]+[[:space:]]\(.*/bin/ping.*\)$"
;;
esac
diff --git a/tools/perf/tests/topology.c b/tools/perf/tests/topology.c
index 17cb1bb3448c..40e30a26b23c 100644
--- a/tools/perf/tests/topology.c
+++ b/tools/perf/tests/topology.c
@@ -70,6 +70,27 @@ static int check_cpu_topology(char *path, struct cpu_map *map)
session = perf_session__new(&data, false, NULL);
TEST_ASSERT_VAL("can't get session", session);
+ /* On platforms with large numbers of CPUs process_cpu_topology()
+ * might issue an error while reading the perf.data file section
+ * HEADER_CPU_TOPOLOGY and the cpu_topology_map pointed to by member
+ * cpu is a NULL pointer.
+ * Example: On s390
+ * CPU 0 is on core_id 0 and physical_package_id 6
+ * CPU 1 is on core_id 1 and physical_package_id 3
+ *
+ * Core_id and physical_package_id are platform and architecture
+ * dependend and might have higher numbers than the CPU id.
+ * This actually depends on the configuration.
+ *
+ * In this case process_cpu_topology() prints error message:
+ * "socket_id number is too big. You may need to upgrade the
+ * perf tool."
+ *
+ * This is the reason why this test might be skipped.
+ */
+ if (!session->header.env.cpu)
+ return TEST_SKIP;
+
for (i = 0; i < session->header.env.nr_cpus_avail; i++) {
if (!cpu_map__has(map, i))
continue;
@@ -95,7 +116,7 @@ int test__session_topology(struct test *test __maybe_unused, int subtest __maybe
{
char path[PATH_MAX];
struct cpu_map *map;
- int ret = -1;
+ int ret = TEST_FAIL;
TEST_ASSERT_VAL("can't get templ file", !get_temp(path));
@@ -110,12 +131,9 @@ int test__session_topology(struct test *test __maybe_unused, int subtest __maybe
goto free_path;
}
- if (check_cpu_topology(path, map))
- goto free_map;
- ret = 0;
-
-free_map:
+ ret = check_cpu_topology(path, map);
cpu_map__put(map);
+
free_path:
unlink(path);
return ret;
diff --git a/tools/perf/tests/vmlinux-kallsyms.c b/tools/perf/tests/vmlinux-kallsyms.c
index 1e5adb65632a..7691980b7df1 100644
--- a/tools/perf/tests/vmlinux-kallsyms.c
+++ b/tools/perf/tests/vmlinux-kallsyms.c
@@ -19,8 +19,7 @@ int test__vmlinux_matches_kallsyms(struct test *test __maybe_unused, int subtest
struct symbol *sym;
struct map *kallsyms_map, *vmlinux_map, *map;
struct machine kallsyms, vmlinux;
- enum map_type type = MAP__FUNCTION;
- struct maps *maps = &vmlinux.kmaps.maps[type];
+ struct maps *maps = machine__kernel_maps(&vmlinux);
u64 mem_start, mem_end;
bool header_printed;
@@ -56,7 +55,7 @@ int test__vmlinux_matches_kallsyms(struct test *test __maybe_unused, int subtest
* be compacted against the list of modules found in the "vmlinux"
* code and with the one got from /proc/modules from the "kallsyms" code.
*/
- if (machine__load_kallsyms(&kallsyms, "/proc/kallsyms", type) <= 0) {
+ if (machine__load_kallsyms(&kallsyms, "/proc/kallsyms") <= 0) {
pr_debug("dso__load_kallsyms ");
goto out;
}
@@ -94,7 +93,7 @@ int test__vmlinux_matches_kallsyms(struct test *test __maybe_unused, int subtest
* maps__reloc_vmlinux will notice and set proper ->[un]map_ip routines
* to fixup the symbols.
*/
- if (machine__load_vmlinux_path(&vmlinux, type) <= 0) {
+ if (machine__load_vmlinux_path(&vmlinux) <= 0) {
pr_debug("Couldn't find a vmlinux that matches the kernel running on this machine, skipping test\n");
err = TEST_SKIP;
goto out;
@@ -108,7 +107,7 @@ int test__vmlinux_matches_kallsyms(struct test *test __maybe_unused, int subtest
* in the kallsyms dso. For the ones that are in both, check its names and
* end addresses too.
*/
- for (nd = rb_first(&vmlinux_map->dso->symbols[type]); nd; nd = rb_next(nd)) {
+ map__for_each_symbol(vmlinux_map, sym, nd) {
struct symbol *pair, *first_pair;
sym = rb_entry(nd, struct symbol, rb_node);
@@ -119,8 +118,7 @@ int test__vmlinux_matches_kallsyms(struct test *test __maybe_unused, int subtest
mem_start = vmlinux_map->unmap_ip(vmlinux_map, sym->start);
mem_end = vmlinux_map->unmap_ip(vmlinux_map, sym->end);
- first_pair = machine__find_kernel_symbol(&kallsyms, type,
- mem_start, NULL);
+ first_pair = machine__find_kernel_symbol(&kallsyms, mem_start, NULL);
pair = first_pair;
if (pair && UM(pair->start) == mem_start) {
@@ -149,7 +147,7 @@ next_pair:
*/
continue;
} else {
- pair = machine__find_kernel_symbol_by_name(&kallsyms, type, sym->name, NULL);
+ pair = machine__find_kernel_symbol_by_name(&kallsyms, sym->name, NULL);
if (pair) {
if (UM(pair->start) == mem_start)
goto next_pair;
@@ -183,7 +181,7 @@ next_pair:
* so use the short name, less descriptive but the same ("[kernel]" in
* both cases.
*/
- pair = map_groups__find_by_name(&kallsyms.kmaps, type,
+ pair = map_groups__find_by_name(&kallsyms.kmaps,
(map->dso->kernel ?
map->dso->short_name :
map->dso->name));
@@ -206,7 +204,7 @@ next_pair:
mem_start = vmlinux_map->unmap_ip(vmlinux_map, map->start);
mem_end = vmlinux_map->unmap_ip(vmlinux_map, map->end);
- pair = map_groups__find(&kallsyms.kmaps, type, mem_start);
+ pair = map_groups__find(&kallsyms.kmaps, mem_start);
if (pair == NULL || pair->priv)
continue;
@@ -228,7 +226,7 @@ next_pair:
header_printed = false;
- maps = &kallsyms.kmaps.maps[type];
+ maps = machine__kernel_maps(&kallsyms);
for (map = maps__first(maps); map; map = map__next(map)) {
if (!map->priv) {
diff --git a/tools/perf/trace/beauty/prctl_option.sh b/tools/perf/trace/beauty/prctl_option.sh
index 0be4138fbe71..f24722146ebe 100755
--- a/tools/perf/trace/beauty/prctl_option.sh
+++ b/tools/perf/trace/beauty/prctl_option.sh
@@ -1,6 +1,6 @@
#!/bin/sh
-header_dir=$1
+[ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/linux/
printf "static const char *prctl_options[] = {\n"
regex='^#define[[:space:]]+PR_([GS]ET\w+)[[:space:]]*([[:xdigit:]]+).*'
diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
index 3781d74088a7..8be40fa903aa 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -695,6 +695,7 @@ static int annotate_browser__run(struct annotate_browser *browser,
"O Bump offset level (jump targets -> +call -> all -> cycle thru)\n"
"s Toggle source code view\n"
"t Circulate percent, total period, samples view\n"
+ "c Show min/max cycle\n"
"/ Search string\n"
"k Toggle line numbers\n"
"P Print to [symbol_name].annotation file.\n"
@@ -791,6 +792,13 @@ show_sup_ins:
notes->options->show_total_period = true;
annotation__update_column_widths(notes);
continue;
+ case 'c':
+ if (notes->options->show_minmax_cycle)
+ notes->options->show_minmax_cycle = false;
+ else
+ notes->options->show_minmax_cycle = true;
+ annotation__update_column_widths(notes);
+ continue;
case K_LEFT:
case K_ESC:
case 'q':
diff --git a/tools/perf/ui/browsers/map.c b/tools/perf/ui/browsers/map.c
index e03fa75f108a..5b8b8c637686 100644
--- a/tools/perf/ui/browsers/map.c
+++ b/tools/perf/ui/browsers/map.c
@@ -104,7 +104,7 @@ int map__browse(struct map *map)
{
struct map_browser mb = {
.b = {
- .entries = &map->dso->symbols[map->type],
+ .entries = &map->dso->symbols,
.refresh = ui_browser__rb_tree_refresh,
.seek = ui_browser__rb_tree_seek,
.write = map_browser__write,
diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c
index 6832fcb2e6ff..c1eb476da91b 100644
--- a/tools/perf/ui/stdio/hist.c
+++ b/tools/perf/ui/stdio/hist.c
@@ -819,8 +819,7 @@ size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows,
}
if (h->ms.map == NULL && verbose > 1) {
- __map_groups__fprintf_maps(h->thread->mg,
- MAP__FUNCTION, fp);
+ map_groups__fprintf(h->thread->mg, fp);
fprintf(fp, "%.10s end\n", graph_dotted_line);
}
}
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index 8052373bcd6a..5d4c45b76895 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -152,6 +152,8 @@ libperf-y += perf-hooks.o
libperf-$(CONFIG_CXX) += c++/
CFLAGS_config.o += -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))"
+CFLAGS_llvm-utils.o += -DPERF_INCLUDE_DIR="BUILD_STR($(perf_include_dir_SQ))"
+
# avoid compiler warnings in 32-bit mode
CFLAGS_genelf_debug.o += -Wno-packed
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 536ee148bff8..71897689dacf 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -760,6 +760,15 @@ static int __symbol__account_cycles(struct annotation *notes,
ch[offset].num_aggr++;
ch[offset].cycles_aggr += cycles;
+ if (cycles > ch[offset].cycles_max)
+ ch[offset].cycles_max = cycles;
+
+ if (ch[offset].cycles_min) {
+ if (cycles && cycles < ch[offset].cycles_min)
+ ch[offset].cycles_min = cycles;
+ } else
+ ch[offset].cycles_min = cycles;
+
if (!have_start && ch[offset].have_start)
return 0;
if (ch[offset].num) {
@@ -953,8 +962,11 @@ void annotation__compute_ipc(struct annotation *notes, size_t size)
if (ch->have_start)
annotation__count_and_fill(notes, ch->start, offset, ch);
al = notes->offsets[offset];
- if (al && ch->num_aggr)
+ if (al && ch->num_aggr) {
al->cycles = ch->cycles_aggr / ch->num_aggr;
+ al->cycles_max = ch->cycles_max;
+ al->cycles_min = ch->cycles_min;
+ }
notes->have_cycles = true;
}
}
@@ -1263,6 +1275,9 @@ annotation_line__print(struct annotation_line *al, struct symbol *sym, u64 start
max_percent = sample->percent;
}
+ if (al->samples_nr > nr_percent)
+ nr_percent = al->samples_nr;
+
if (max_percent < min_pcnt)
return -1;
@@ -1950,6 +1965,7 @@ int symbol__annotate_printf(struct symbol *sym, struct map *map,
u64 len;
int width = symbol_conf.show_total_period ? 12 : 8;
int graph_dotted_len;
+ char buf[512];
filename = strdup(dso->long_name);
if (!filename)
@@ -1962,8 +1978,11 @@ int symbol__annotate_printf(struct symbol *sym, struct map *map,
len = symbol__size(sym);
- if (perf_evsel__is_group_event(evsel))
+ if (perf_evsel__is_group_event(evsel)) {
width *= evsel->nr_members;
+ perf_evsel__group_desc(evsel, buf, sizeof(buf));
+ evsel_name = buf;
+ }
graph_dotted_len = printf(" %-*.*s| Source code & Disassembly of %s for %s (%" PRIu64 " samples)\n",
width, width, symbol_conf.show_total_period ? "Period" :
@@ -2483,13 +2502,38 @@ static void __annotation_line__write(struct annotation_line *al, struct annotati
else
obj__printf(obj, "%*s ", ANNOTATION__IPC_WIDTH - 1, "IPC");
- if (al->cycles)
- obj__printf(obj, "%*" PRIu64 " ",
+ if (!notes->options->show_minmax_cycle) {
+ if (al->cycles)
+ obj__printf(obj, "%*" PRIu64 " ",
ANNOTATION__CYCLES_WIDTH - 1, al->cycles);
- else if (!show_title)
- obj__printf(obj, "%*s", ANNOTATION__CYCLES_WIDTH, " ");
- else
- obj__printf(obj, "%*s ", ANNOTATION__CYCLES_WIDTH - 1, "Cycle");
+ else if (!show_title)
+ obj__printf(obj, "%*s",
+ ANNOTATION__CYCLES_WIDTH, " ");
+ else
+ obj__printf(obj, "%*s ",
+ ANNOTATION__CYCLES_WIDTH - 1,
+ "Cycle");
+ } else {
+ if (al->cycles) {
+ char str[32];
+
+ scnprintf(str, sizeof(str),
+ "%" PRIu64 "(%" PRIu64 "/%" PRIu64 ")",
+ al->cycles, al->cycles_min,
+ al->cycles_max);
+
+ obj__printf(obj, "%*s ",
+ ANNOTATION__MINMAX_CYCLES_WIDTH - 1,
+ str);
+ } else if (!show_title)
+ obj__printf(obj, "%*s",
+ ANNOTATION__MINMAX_CYCLES_WIDTH,
+ " ");
+ else
+ obj__printf(obj, "%*s ",
+ ANNOTATION__MINMAX_CYCLES_WIDTH - 1,
+ "Cycle(min/max)");
+ }
}
obj__printf(obj, " ");
diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h
index f28a9e43421d..5080b6dd98b8 100644
--- a/tools/perf/util/annotate.h
+++ b/tools/perf/util/annotate.h
@@ -61,6 +61,7 @@ bool ins__is_fused(struct arch *arch, const char *ins1, const char *ins2);
#define ANNOTATION__IPC_WIDTH 6
#define ANNOTATION__CYCLES_WIDTH 6
+#define ANNOTATION__MINMAX_CYCLES_WIDTH 19
struct annotation_options {
bool hide_src_code,
@@ -69,7 +70,8 @@ struct annotation_options {
show_linenr,
show_nr_jumps,
show_nr_samples,
- show_total_period;
+ show_total_period,
+ show_minmax_cycle;
u8 offset_level;
};
@@ -105,6 +107,8 @@ struct annotation_line {
int jump_sources;
float ipc;
u64 cycles;
+ u64 cycles_max;
+ u64 cycles_min;
size_t privsize;
char *path;
u32 idx;
@@ -186,6 +190,8 @@ struct cyc_hist {
u64 start;
u64 cycles;
u64 cycles_aggr;
+ u64 cycles_max;
+ u64 cycles_min;
u32 num;
u32 num_aggr;
u8 have_start;
@@ -239,6 +245,9 @@ struct annotation {
static inline int annotation__cycles_width(struct annotation *notes)
{
+ if (notes->have_cycles && notes->options->show_minmax_cycle)
+ return ANNOTATION__IPC_WIDTH + ANNOTATION__MINMAX_CYCLES_WIDTH;
+
return notes->have_cycles ? ANNOTATION__IPC_WIDTH + ANNOTATION__CYCLES_WIDTH : 0;
}
diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c
index 857de69a5361..d056447520a2 100644
--- a/tools/perf/util/auxtrace.c
+++ b/tools/perf/util/auxtrace.c
@@ -1679,7 +1679,7 @@ struct sym_args {
static bool kern_sym_match(struct sym_args *args, const char *name, char type)
{
/* A function with the same name, and global or the n'th found or any */
- return symbol_type__is_a(type, MAP__FUNCTION) &&
+ return kallsyms__is_function(type) &&
!strcmp(name, args->name) &&
((args->global && isupper(type)) ||
(args->selected && ++(args->cnt) == args->idx) ||
@@ -1784,7 +1784,7 @@ static int find_entire_kern_cb(void *arg, const char *name __maybe_unused,
{
struct sym_args *args = arg;
- if (!symbol_type__is_a(type, MAP__FUNCTION))
+ if (!kallsyms__is_function(type))
return 0;
if (!args->started) {
@@ -1915,7 +1915,7 @@ static void print_duplicate_syms(struct dso *dso, const char *sym_name)
pr_err("Multiple symbols with name '%s'\n", sym_name);
- sym = dso__first_symbol(dso, MAP__FUNCTION);
+ sym = dso__first_symbol(dso);
while (sym) {
if (dso_sym_match(sym, sym_name, &cnt, -1)) {
pr_err("#%d\t0x%"PRIx64"\t%c\t%s\n",
@@ -1945,7 +1945,7 @@ static int find_dso_sym(struct dso *dso, const char *sym_name, u64 *start,
*start = 0;
*size = 0;
- sym = dso__first_symbol(dso, MAP__FUNCTION);
+ sym = dso__first_symbol(dso);
while (sym) {
if (*start) {
if (!*size)
@@ -1972,8 +1972,8 @@ static int find_dso_sym(struct dso *dso, const char *sym_name, u64 *start,
static int addr_filter__entire_dso(struct addr_filter *filt, struct dso *dso)
{
- struct symbol *first_sym = dso__first_symbol(dso, MAP__FUNCTION);
- struct symbol *last_sym = dso__last_symbol(dso, MAP__FUNCTION);
+ struct symbol *first_sym = dso__first_symbol(dso);
+ struct symbol *last_sym = dso__last_symbol(dso);
if (!first_sym || !last_sym) {
pr_err("Failed to determine filter for %s\nNo symbols found.\n",
diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c
index af7ad814b2c3..cee658733e2c 100644
--- a/tools/perf/util/bpf-loader.c
+++ b/tools/perf/util/bpf-loader.c
@@ -66,7 +66,7 @@ bpf__prepare_load_buffer(void *obj_buf, size_t obj_buf_sz, const char *name)
}
obj = bpf_object__open_buffer(obj_buf, obj_buf_sz, name);
- if (IS_ERR(obj)) {
+ if (IS_ERR_OR_NULL(obj)) {
pr_debug("bpf: failed to load buffer\n");
return ERR_PTR(-EINVAL);
}
@@ -102,14 +102,14 @@ struct bpf_object *bpf__prepare_load(const char *filename, bool source)
pr_debug("bpf: successfull builtin compilation\n");
obj = bpf_object__open_buffer(obj_buf, obj_buf_sz, filename);
- if (!IS_ERR(obj) && llvm_param.dump_obj)
+ if (!IS_ERR_OR_NULL(obj) && llvm_param.dump_obj)
llvm__dump_obj(filename, obj_buf, obj_buf_sz);
free(obj_buf);
} else
obj = bpf_object__open(filename);
- if (IS_ERR(obj)) {
+ if (IS_ERR_OR_NULL(obj)) {
pr_debug("bpf: failed to load %s\n", filename);
return obj;
}
diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c
index 537eadd81914..04b1d53e4bf9 100644
--- a/tools/perf/util/build-id.c
+++ b/tools/perf/util/build-id.c
@@ -47,9 +47,7 @@ int build_id__mark_dso_hit(struct perf_tool *tool __maybe_unused,
return -1;
}
- thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, sample->ip, &al);
-
- if (al.map != NULL)
+ if (thread__find_map(thread, sample->cpumode, sample->ip, &al))
al.map->dso->hit = 1;
thread__put(thread);
diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c
index 84eb9393c7db..5ac157056cdf 100644
--- a/tools/perf/util/config.c
+++ b/tools/perf/util/config.c
@@ -707,6 +707,14 @@ struct perf_config_set *perf_config_set__new(void)
return set;
}
+static int perf_config__init(void)
+{
+ if (config_set == NULL)
+ config_set = perf_config_set__new();
+
+ return config_set == NULL;
+}
+
int perf_config(config_fn_t fn, void *data)
{
int ret = 0;
@@ -714,7 +722,7 @@ int perf_config(config_fn_t fn, void *data)
struct perf_config_section *section;
struct perf_config_item *item;
- if (config_set == NULL)
+ if (config_set == NULL && perf_config__init())
return -1;
perf_config_set__for_each_entry(config_set, section, item) {
@@ -735,12 +743,6 @@ int perf_config(config_fn_t fn, void *data)
return ret;
}
-void perf_config__init(void)
-{
- if (config_set == NULL)
- config_set = perf_config_set__new();
-}
-
void perf_config__exit(void)
{
perf_config_set__delete(config_set);
diff --git a/tools/perf/util/config.h b/tools/perf/util/config.h
index baf82bf227ac..bd0a5897c76a 100644
--- a/tools/perf/util/config.h
+++ b/tools/perf/util/config.h
@@ -38,7 +38,6 @@ struct perf_config_set *perf_config_set__new(void);
void perf_config_set__delete(struct perf_config_set *set);
int perf_config_set__collect(struct perf_config_set *set, const char *file_name,
const char *var, const char *value);
-void perf_config__init(void);
void perf_config__exit(void);
void perf_config__refresh(void);
diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
index c8b98fa22997..4d5fc374e730 100644
--- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
+++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
@@ -96,11 +96,19 @@ int cs_etm_decoder__get_packet(struct cs_etm_decoder *decoder,
/* Nothing to do, might as well just return */
if (decoder->packet_count == 0)
return 0;
+ /*
+ * The queueing process in function cs_etm_decoder__buffer_packet()
+ * increments the tail *before* using it. This is somewhat counter
+ * intuitive but it has the advantage of centralizing tail management
+ * at a single location. Because of that we need to follow the same
+ * heuristic with the head, i.e we increment it before using its
+ * value. Otherwise the first element of the packet queue is not
+ * used.
+ */
+ decoder->head = (decoder->head + 1) & (MAX_BUFFER - 1);
*packet = decoder->packet_buffer[decoder->head];
- decoder->head = (decoder->head + 1) & (MAX_BUFFER - 1);
-
decoder->packet_count--;
return 1;
diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
index 40020b1ca54f..822ba915d144 100644
--- a/tools/perf/util/cs-etm.c
+++ b/tools/perf/util/cs-etm.c
@@ -239,6 +239,7 @@ static void cs_etm__free(struct perf_session *session)
for (i = 0; i < aux->num_cpu; i++)
zfree(&aux->metadata[i]);
+ thread__zput(aux->unknown_thread);
zfree(&aux->metadata);
zfree(&aux);
}
@@ -269,9 +270,7 @@ static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u64 address,
thread = etmq->etm->unknown_thread;
}
- thread__find_addr_map(thread, cpumode, MAP__FUNCTION, address, &al);
-
- if (!al.map || !al.map->dso)
+ if (!thread__find_map(thread, cpumode, address, &al) || !al.map->dso)
return 0;
if (al.map->dso->data.status == DSO_DATA_STATUS_ERROR &&
@@ -612,8 +611,8 @@ cs_etm__get_trace(struct cs_etm_buffer *buff, struct cs_etm_queue *etmq)
return buff->len;
}
-static void cs_etm__set_pid_tid_cpu(struct cs_etm_auxtrace *etm,
- struct auxtrace_queue *queue)
+static void cs_etm__set_pid_tid_cpu(struct cs_etm_auxtrace *etm,
+ struct auxtrace_queue *queue)
{
struct cs_etm_queue *etmq = queue->priv;
@@ -1357,6 +1356,23 @@ int cs_etm__process_auxtrace_info(union perf_event *event,
etm->auxtrace.free = cs_etm__free;
session->auxtrace = &etm->auxtrace;
+ etm->unknown_thread = thread__new(999999999, 999999999);
+ if (!etm->unknown_thread)
+ goto err_free_queues;
+
+ /*
+ * Initialize list node so that at thread__zput() we can avoid
+ * segmentation fault at list_del_init().
+ */
+ INIT_LIST_HEAD(&etm->unknown_thread->node);
+
+ err = thread__set_comm(etm->unknown_thread, "unknown", 0);
+ if (err)
+ goto err_delete_thread;
+
+ if (thread__init_map_groups(etm->unknown_thread, etm->machine))
+ goto err_delete_thread;
+
if (dump_trace) {
cs_etm__print_auxtrace_info(auxtrace_info->priv, num_cpu);
return 0;
@@ -1371,16 +1387,18 @@ int cs_etm__process_auxtrace_info(union perf_event *event,
err = cs_etm__synth_events(etm, session);
if (err)
- goto err_free_queues;
+ goto err_delete_thread;
err = auxtrace_queues__process_index(&etm->queues, session);
if (err)
- goto err_free_queues;
+ goto err_delete_thread;
etm->data_queued = etm->queues.populated;
return 0;
+err_delete_thread:
+ thread__zput(etm->unknown_thread);
err_free_queues:
auxtrace_queues__free(&etm->queues);
session->auxtrace = NULL;
diff --git a/tools/perf/util/db-export.c b/tools/perf/util/db-export.c
index b0c2b5c5d337..7123746edcf4 100644
--- a/tools/perf/util/db-export.c
+++ b/tools/perf/util/db-export.c
@@ -247,9 +247,9 @@ static int db_ids_from_al(struct db_export *dbe, struct addr_location *al,
*dso_db_id = dso->db_id;
if (!al->sym) {
- al->sym = symbol__new(al->addr, 0, 0, "unknown");
+ al->sym = symbol__new(al->addr, 0, 0, 0, "unknown");
if (al->sym)
- dso__insert_symbol(dso, al->map->type, al->sym);
+ dso__insert_symbol(dso, al->sym);
}
if (al->sym) {
@@ -315,8 +315,7 @@ static struct call_path *call_path_from_sample(struct db_export *dbe,
al.addr = node->ip;
if (al.map && !al.sym)
- al.sym = dso__find_symbol(al.map->dso, MAP__FUNCTION,
- al.addr);
+ al.sym = dso__find_symbol(al.map->dso, al.addr);
db_ids_from_al(dbe, &al, &dso_db_id, &sym_db_id, &offset);
diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c
index 36ef45b2e89d..cdfc2e5f55f5 100644
--- a/tools/perf/util/dso.c
+++ b/tools/perf/util/dso.c
@@ -1014,7 +1014,7 @@ struct map *dso__new_map(const char *name)
struct dso *dso = dso__new(name);
if (dso)
- map = map__new2(0, dso, MAP__FUNCTION);
+ map = map__new2(0, dso);
return map;
}
@@ -1176,19 +1176,19 @@ int dso__name_len(const struct dso *dso)
return dso->short_name_len;
}
-bool dso__loaded(const struct dso *dso, enum map_type type)
+bool dso__loaded(const struct dso *dso)
{
- return dso->loaded & (1 << type);
+ return dso->loaded;
}
-bool dso__sorted_by_name(const struct dso *dso, enum map_type type)
+bool dso__sorted_by_name(const struct dso *dso)
{
- return dso->sorted_by_name & (1 << type);
+ return dso->sorted_by_name;
}
-void dso__set_sorted_by_name(struct dso *dso, enum map_type type)
+void dso__set_sorted_by_name(struct dso *dso)
{
- dso->sorted_by_name |= (1 << type);
+ dso->sorted_by_name = true;
}
struct dso *dso__new(const char *name)
@@ -1196,12 +1196,10 @@ struct dso *dso__new(const char *name)
struct dso *dso = calloc(1, sizeof(*dso) + strlen(name) + 1);
if (dso != NULL) {
- int i;
strcpy(dso->name, name);
dso__set_long_name(dso, dso->name, false);
dso__set_short_name(dso, dso->name, false);
- for (i = 0; i < MAP__NR_TYPES; ++i)
- dso->symbols[i] = dso->symbol_names[i] = RB_ROOT;
+ dso->symbols = dso->symbol_names = RB_ROOT;
dso->data.cache = RB_ROOT;
dso->inlined_nodes = RB_ROOT;
dso->srclines = RB_ROOT;
@@ -1231,8 +1229,6 @@ struct dso *dso__new(const char *name)
void dso__delete(struct dso *dso)
{
- int i;
-
if (!RB_EMPTY_NODE(&dso->rb_node))
pr_err("DSO %s is still in rbtree when being deleted!\n",
dso->long_name);
@@ -1240,8 +1236,7 @@ void dso__delete(struct dso *dso)
/* free inlines first, as they reference symbols */
inlines__tree_delete(&dso->inlined_nodes);
srcline__tree_delete(&dso->srclines);
- for (i = 0; i < MAP__NR_TYPES; ++i)
- symbols__delete(&dso->symbols[i]);
+ symbols__delete(&dso->symbols);
if (dso->short_name_allocated) {
zfree((char **)&dso->short_name);
@@ -1451,9 +1446,7 @@ size_t __dsos__fprintf(struct list_head *head, FILE *fp)
size_t ret = 0;
list_for_each_entry(pos, head, node) {
- int i;
- for (i = 0; i < MAP__NR_TYPES; ++i)
- ret += dso__fprintf(pos, i, fp);
+ ret += dso__fprintf(pos, fp);
}
return ret;
@@ -1467,18 +1460,17 @@ size_t dso__fprintf_buildid(struct dso *dso, FILE *fp)
return fprintf(fp, "%s", sbuild_id);
}
-size_t dso__fprintf(struct dso *dso, enum map_type type, FILE *fp)
+size_t dso__fprintf(struct dso *dso, FILE *fp)
{
struct rb_node *nd;
size_t ret = fprintf(fp, "dso: %s (", dso->short_name);
if (dso->short_name != dso->long_name)
ret += fprintf(fp, "%s, ", dso->long_name);
- ret += fprintf(fp, "%s, %sloaded, ", map_type__name[type],
- dso__loaded(dso, type) ? "" : "NOT ");
+ ret += fprintf(fp, "%sloaded, ", dso__loaded(dso) ? "" : "NOT ");
ret += dso__fprintf_buildid(dso, fp);
ret += fprintf(fp, ")\n");
- for (nd = rb_first(&dso->symbols[type]); nd; nd = rb_next(nd)) {
+ for (nd = rb_first(&dso->symbols); nd; nd = rb_next(nd)) {
struct symbol *pos = rb_entry(nd, struct symbol, rb_node);
ret += symbol__fprintf(pos, fp);
}
diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h
index c229dbe0277a..ef69de2e69ea 100644
--- a/tools/perf/util/dso.h
+++ b/tools/perf/util/dso.h
@@ -140,14 +140,14 @@ struct dso {
struct list_head node;
struct rb_node rb_node; /* rbtree node sorted by long name */
struct rb_root *root; /* root of rbtree that rb_node is in */
- struct rb_root symbols[MAP__NR_TYPES];
- struct rb_root symbol_names[MAP__NR_TYPES];
+ struct rb_root symbols;
+ struct rb_root symbol_names;
struct rb_root inlined_nodes;
struct rb_root srclines;
struct {
u64 addr;
struct symbol *symbol;
- } last_find_result[MAP__NR_TYPES];
+ } last_find_result;
void *a2l;
char *symsrc_filename;
unsigned int a2l_fails;
@@ -164,8 +164,8 @@ struct dso {
u8 short_name_allocated:1;
u8 long_name_allocated:1;
u8 is_64_bit:1;
- u8 sorted_by_name;
- u8 loaded;
+ bool sorted_by_name;
+ bool loaded;
u8 rel;
u8 build_id[BUILD_ID_SIZE];
u64 text_offset;
@@ -202,14 +202,13 @@ struct dso {
* @dso: the 'struct dso *' in which symbols itereated
* @pos: the 'struct symbol *' to use as a loop cursor
* @n: the 'struct rb_node *' to use as a temporary storage
- * @type: the 'enum map_type' type of symbols
*/
-#define dso__for_each_symbol(dso, pos, n, type) \
- symbols__for_each_entry(&(dso)->symbols[(type)], pos, n)
+#define dso__for_each_symbol(dso, pos, n) \
+ symbols__for_each_entry(&(dso)->symbols, pos, n)
-static inline void dso__set_loaded(struct dso *dso, enum map_type type)
+static inline void dso__set_loaded(struct dso *dso)
{
- dso->loaded |= (1 << type);
+ dso->loaded = true;
}
struct dso *dso__new(const char *name);
@@ -231,11 +230,16 @@ static inline void __dso__zput(struct dso **dso)
#define dso__zput(dso) __dso__zput(&dso)
-bool dso__loaded(const struct dso *dso, enum map_type type);
+bool dso__loaded(const struct dso *dso);
-bool dso__sorted_by_name(const struct dso *dso, enum map_type type);
-void dso__set_sorted_by_name(struct dso *dso, enum map_type type);
-void dso__sort_by_name(struct dso *dso, enum map_type type);
+static inline bool dso__has_symbols(const struct dso *dso)
+{
+ return !RB_EMPTY_ROOT(&dso->symbols);
+}
+
+bool dso__sorted_by_name(const struct dso *dso);
+void dso__set_sorted_by_name(struct dso *dso);
+void dso__sort_by_name(struct dso *dso);
void dso__set_build_id(struct dso *dso, void *build_id);
bool dso__build_id_equal(const struct dso *dso, u8 *build_id);
@@ -349,9 +353,8 @@ size_t __dsos__fprintf_buildid(struct list_head *head, FILE *fp,
size_t __dsos__fprintf(struct list_head *head, FILE *fp);
size_t dso__fprintf_buildid(struct dso *dso, FILE *fp);
-size_t dso__fprintf_symbols_by_name(struct dso *dso,
- enum map_type type, FILE *fp);
-size_t dso__fprintf(struct dso *dso, enum map_type type, FILE *fp);
+size_t dso__fprintf_symbols_by_name(struct dso *dso, FILE *fp);
+size_t dso__fprintf(struct dso *dso, FILE *fp);
static inline bool dso__is_vmlinux(struct dso *dso)
{
diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c
index 4c842762e3f2..59f38c7693f8 100644
--- a/tools/perf/util/env.c
+++ b/tools/perf/util/env.c
@@ -93,6 +93,37 @@ int perf_env__read_cpu_topology_map(struct perf_env *env)
return 0;
}
+static int perf_env__read_arch(struct perf_env *env)
+{
+ struct utsname uts;
+
+ if (env->arch)
+ return 0;
+
+ if (!uname(&uts))
+ env->arch = strdup(uts.machine);
+
+ return env->arch ? 0 : -ENOMEM;
+}
+
+static int perf_env__read_nr_cpus_avail(struct perf_env *env)
+{
+ if (env->nr_cpus_avail == 0)
+ env->nr_cpus_avail = cpu__max_present_cpu();
+
+ return env->nr_cpus_avail ? 0 : -ENOENT;
+}
+
+const char *perf_env__raw_arch(struct perf_env *env)
+{
+ return env && !perf_env__read_arch(env) ? env->arch : "unknown";
+}
+
+int perf_env__nr_cpus_avail(struct perf_env *env)
+{
+ return env && !perf_env__read_nr_cpus_avail(env) ? env->nr_cpus_avail : 0;
+}
+
void cpu_cache_level__free(struct cpu_cache_level *cache)
{
free(cache->type);
diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h
index c4ef2e523367..1f3ccc368530 100644
--- a/tools/perf/util/env.h
+++ b/tools/perf/util/env.h
@@ -76,4 +76,7 @@ int perf_env__read_cpu_topology_map(struct perf_env *env);
void cpu_cache_level__free(struct cpu_cache_level *cache);
const char *perf_env__arch(struct perf_env *env);
+const char *perf_env__raw_arch(struct perf_env *env);
+int perf_env__nr_cpus_avail(struct perf_env *env);
+
#endif /* __PERF_ENV_H */
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index 98ff3a6a3d50..0c8ecf0c78a4 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -88,10 +88,10 @@ static const char *perf_ns__name(unsigned int id)
return perf_ns__names[id];
}
-static int perf_tool__process_synth_event(struct perf_tool *tool,
- union perf_event *event,
- struct machine *machine,
- perf_event__handler_t process)
+int perf_tool__process_synth_event(struct perf_tool *tool,
+ union perf_event *event,
+ struct machine *machine,
+ perf_event__handler_t process)
{
struct perf_sample synth_sample = {
.pid = -1,
@@ -464,8 +464,7 @@ int perf_event__synthesize_modules(struct perf_tool *tool,
{
int rc = 0;
struct map *pos;
- struct map_groups *kmaps = &machine->kmaps;
- struct maps *maps = &kmaps->maps[MAP__FUNCTION];
+ struct maps *maps = machine__kernel_maps(machine);
union perf_event *event = zalloc((sizeof(event->mmap) +
machine->id_hdr_size));
if (event == NULL) {
@@ -488,7 +487,7 @@ int perf_event__synthesize_modules(struct perf_tool *tool,
for (pos = maps__first(maps); pos; pos = map__next(pos)) {
size_t size;
- if (__map__is_kernel(pos))
+ if (!__map__is_kmodule(pos))
continue;
size = PERF_ALIGN(pos->dso->long_name_len + 1, sizeof(u64));
@@ -869,7 +868,7 @@ static int find_symbol_cb(void *arg, const char *name, char type,
* Must be a function or at least an alias, as in PARISC64, where "_text" is
* an 'A' to the same address as "_stext".
*/
- if (!(symbol_type__is_a(type, MAP__FUNCTION) ||
+ if (!(kallsyms__is_function(type) ||
type == 'A') || strcmp(name, args->name))
return 0;
@@ -889,9 +888,16 @@ int kallsyms__get_function_start(const char *kallsyms_filename,
return 0;
}
-int perf_event__synthesize_kernel_mmap(struct perf_tool *tool,
- perf_event__handler_t process,
- struct machine *machine)
+int __weak perf_event__synthesize_extra_kmaps(struct perf_tool *tool __maybe_unused,
+ perf_event__handler_t process __maybe_unused,
+ struct machine *machine __maybe_unused)
+{
+ return 0;
+}
+
+static int __perf_event__synthesize_kernel_mmap(struct perf_tool *tool,
+ perf_event__handler_t process,
+ struct machine *machine)
{
size_t size;
struct map *map = machine__kernel_map(machine);
@@ -944,6 +950,19 @@ int perf_event__synthesize_kernel_mmap(struct perf_tool *tool,
return err;
}
+int perf_event__synthesize_kernel_mmap(struct perf_tool *tool,
+ perf_event__handler_t process,
+ struct machine *machine)
+{
+ int err;
+
+ err = __perf_event__synthesize_kernel_mmap(tool, process, machine);
+ if (err < 0)
+ return err;
+
+ return perf_event__synthesize_extra_kmaps(tool, process, machine);
+}
+
int perf_event__synthesize_thread_map2(struct perf_tool *tool,
struct thread_map *threads,
perf_event__handler_t process,
@@ -1489,9 +1508,8 @@ int perf_event__process(struct perf_tool *tool __maybe_unused,
return machine__process_event(machine, event, sample);
}
-void thread__find_addr_map(struct thread *thread, u8 cpumode,
- enum map_type type, u64 addr,
- struct addr_location *al)
+struct map *thread__find_map(struct thread *thread, u8 cpumode, u64 addr,
+ struct addr_location *al)
{
struct map_groups *mg = thread->mg;
struct machine *machine = mg->machine;
@@ -1505,7 +1523,7 @@ void thread__find_addr_map(struct thread *thread, u8 cpumode,
if (machine == NULL) {
al->map = NULL;
- return;
+ return NULL;
}
if (cpumode == PERF_RECORD_MISC_KERNEL && perf_host) {
@@ -1533,10 +1551,10 @@ void thread__find_addr_map(struct thread *thread, u8 cpumode,
!perf_host)
al->filtered |= (1 << HIST_FILTER__HOST);
- return;
+ return NULL;
}
try_again:
- al->map = map_groups__find(mg, type, al->addr);
+ al->map = map_groups__find(mg, al->addr);
if (al->map == NULL) {
/*
* If this is outside of all known maps, and is a negative
@@ -1563,17 +1581,17 @@ try_again:
map__load(al->map);
al->addr = al->map->map_ip(al->map, al->addr);
}
+
+ return al->map;
}
-void thread__find_addr_location(struct thread *thread,
- u8 cpumode, enum map_type type, u64 addr,
- struct addr_location *al)
+struct symbol *thread__find_symbol(struct thread *thread, u8 cpumode,
+ u64 addr, struct addr_location *al)
{
- thread__find_addr_map(thread, cpumode, type, addr, al);
- if (al->map != NULL)
+ al->sym = NULL;
+ if (thread__find_map(thread, cpumode, addr, al))
al->sym = map__find_symbol(al->map, al->addr);
- else
- al->sym = NULL;
+ return al->sym;
}
/*
@@ -1590,7 +1608,7 @@ int machine__resolve(struct machine *machine, struct addr_location *al,
return -1;
dump_printf(" ... thread: %s:%d\n", thread__comm_str(thread), thread->tid);
- thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, sample->ip, al);
+ thread__find_map(thread, sample->cpumode, sample->ip, al);
dump_printf(" ...... dso: %s\n",
al->map ? al->map->dso->long_name :
al->level == 'H' ? "[hypervisor]" : "<not found>");
@@ -1669,10 +1687,7 @@ bool sample_addr_correlates_sym(struct perf_event_attr *attr)
void thread__resolve(struct thread *thread, struct addr_location *al,
struct perf_sample *sample)
{
- thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, sample->addr, al);
- if (!al->map)
- thread__find_addr_map(thread, sample->cpumode, MAP__VARIABLE,
- sample->addr, al);
+ thread__find_map(thread, sample->cpumode, sample->addr, al);
al->cpu = sample->cpu;
al->sym = NULL;
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index 0f794744919c..bfa60bcafbde 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -750,6 +750,10 @@ int perf_event__process_exit(struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine);
+int perf_tool__process_synth_event(struct perf_tool *tool,
+ union perf_event *event,
+ struct machine *machine,
+ perf_event__handler_t process);
int perf_event__process(struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
@@ -796,6 +800,10 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool,
bool mmap_data,
unsigned int proc_map_timeout);
+int perf_event__synthesize_extra_kmaps(struct perf_tool *tool,
+ perf_event__handler_t process,
+ struct machine *machine);
+
size_t perf_event__fprintf_comm(union perf_event *event, FILE *fp);
size_t perf_event__fprintf_mmap(union perf_event *event, FILE *fp);
size_t perf_event__fprintf_mmap2(union perf_event *event, FILE *fp);
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index a59281d64368..e7a4b31a84fb 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -1795,3 +1795,18 @@ bool perf_evlist__exclude_kernel(struct perf_evlist *evlist)
return true;
}
+
+/*
+ * Events in data file are not collect in groups, but we still want
+ * the group display. Set the artificial group and set the leader's
+ * forced_leader flag to notify the display code.
+ */
+void perf_evlist__force_leader(struct perf_evlist *evlist)
+{
+ if (!evlist->nr_groups) {
+ struct perf_evsel *leader = perf_evlist__first(evlist);
+
+ perf_evlist__set_leader(evlist);
+ leader->forced_leader = true;
+ }
+}
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index 6c41b2f78713..dc66436add98 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -309,4 +309,7 @@ struct perf_evsel *perf_evlist__event2evsel(struct perf_evlist *evlist,
union perf_event *event);
bool perf_evlist__exclude_kernel(struct perf_evlist *evlist);
+
+void perf_evlist__force_leader(struct perf_evlist *evlist);
+
#endif /* __PERF_EVLIST_H */
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 4cd2cf93f726..150db5ed7400 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -2862,7 +2862,7 @@ int perf_evsel__open_strerror(struct perf_evsel *evsel, struct target *target,
return scnprintf(msg, size,
"Not enough memory to setup event with callchain.\n"
"Hint: Try tweaking /proc/sys/kernel/perf_event_max_stack\n"
- "Hint: Current value: %d", sysctl_perf_event_max_stack);
+ "Hint: Current value: %d", sysctl__max_stack());
break;
case ENODEV:
if (target->cpu_list)
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 92ec009a292d..b13f5f234c8f 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -127,6 +127,7 @@ struct perf_evsel {
bool precise_max;
bool ignore_missing_thread;
bool forced_leader;
+ bool use_uncore_alias;
/* parse modifier helper */
int exclude_GH;
int nr_members;
diff --git a/tools/perf/util/genelf.c b/tools/perf/util/genelf.c
index c540d47583e7..aafbe54fd3fa 100644
--- a/tools/perf/util/genelf.c
+++ b/tools/perf/util/genelf.c
@@ -114,7 +114,7 @@ gen_build_id(struct buildid_note *note,
fd = open("/dev/urandom", O_RDONLY);
if (fd == -1)
- err(1, "cannot access /dev/urandom for builid");
+ err(1, "cannot access /dev/urandom for buildid");
sret = read(fd, note->build_id, sz);
diff --git a/tools/perf/util/intel-bts.c b/tools/perf/util/intel-bts.c
index 72db2744876d..7f0c83b6332b 100644
--- a/tools/perf/util/intel-bts.c
+++ b/tools/perf/util/intel-bts.c
@@ -335,8 +335,7 @@ static int intel_bts_get_next_insn(struct intel_bts_queue *btsq, u64 ip)
if (!thread)
return -1;
- thread__find_addr_map(thread, cpumode, MAP__FUNCTION, ip, &al);
- if (!al.map || !al.map->dso)
+ if (!thread__find_map(thread, cpumode, ip, &al) || !al.map->dso)
goto out_put;
len = dso__data_read_addr(al.map->dso, al.map, machine, ip, buf,
diff --git a/tools/perf/util/intel-pt-decoder/insn.h b/tools/perf/util/intel-pt-decoder/insn.h
index e23578c7b1be..2669c9f748e4 100644
--- a/tools/perf/util/intel-pt-decoder/insn.h
+++ b/tools/perf/util/intel-pt-decoder/insn.h
@@ -208,4 +208,22 @@ static inline int insn_offset_immediate(struct insn *insn)
return insn_offset_displacement(insn) + insn->displacement.nbytes;
}
+#define POP_SS_OPCODE 0x1f
+#define MOV_SREG_OPCODE 0x8e
+
+/*
+ * Intel SDM Vol.3A 6.8.3 states;
+ * "Any single-step trap that would be delivered following the MOV to SS
+ * instruction or POP to SS instruction (because EFLAGS.TF is 1) is
+ * suppressed."
+ * This function returns true if @insn is MOV SS or POP SS. On these
+ * instructions, single stepping is suppressed.
+ */
+static inline int insn_masking_exception(struct insn *insn)
+{
+ return insn->opcode.bytes[0] == POP_SS_OPCODE ||
+ (insn->opcode.bytes[0] == MOV_SREG_OPCODE &&
+ X86_MODRM_REG(insn->modrm.bytes[0]) == 2);
+}
+
#endif /* _ASM_X86_INSN_H */
diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
index 0effaff57020..492986a25ef6 100644
--- a/tools/perf/util/intel-pt.c
+++ b/tools/perf/util/intel-pt.c
@@ -442,8 +442,7 @@ static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn,
}
while (1) {
- thread__find_addr_map(thread, cpumode, MAP__FUNCTION, *ip, &al);
- if (!al.map || !al.map->dso)
+ if (!thread__find_map(thread, cpumode, *ip, &al) || !al.map->dso)
return -EINVAL;
if (al.map->dso->data.status == DSO_DATA_STATUS_ERROR &&
@@ -596,8 +595,7 @@ static int __intel_pt_pgd_ip(uint64_t ip, void *data)
if (!thread)
return -EINVAL;
- thread__find_addr_map(thread, cpumode, MAP__FUNCTION, ip, &al);
- if (!al.map || !al.map->dso)
+ if (!thread__find_map(thread, cpumode, ip, &al) || !al.map->dso)
return -EINVAL;
offset = al.map->map_ip(al.map, ip);
@@ -1565,7 +1563,7 @@ static u64 intel_pt_switch_ip(struct intel_pt *pt, u64 *ptss_ip)
if (map__load(map))
return 0;
- start = dso__first_symbol(map->dso, MAP__FUNCTION);
+ start = dso__first_symbol(map->dso);
for (sym = start; sym; sym = dso__next_symbol(sym)) {
if (sym->binding == STB_GLOBAL &&
diff --git a/tools/perf/util/llvm-utils.c b/tools/perf/util/llvm-utils.c
index 1cca0a2fa641..976e658e38dc 100644
--- a/tools/perf/util/llvm-utils.c
+++ b/tools/perf/util/llvm-utils.c
@@ -14,11 +14,12 @@
#include "config.h"
#include "util.h"
#include <sys/wait.h>
+#include <subcmd/exec-cmd.h>
#define CLANG_BPF_CMD_DEFAULT_TEMPLATE \
"$CLANG_EXEC -D__KERNEL__ -D__NR_CPUS__=$NR_CPUS "\
"-DLINUX_VERSION_CODE=$LINUX_VERSION_CODE " \
- "$CLANG_OPTIONS $KERNEL_INC_OPTIONS " \
+ "$CLANG_OPTIONS $KERNEL_INC_OPTIONS $PERF_BPF_INC_OPTIONS " \
"-Wno-unused-value -Wno-pointer-sign " \
"-working-directory $WORKING_DIR " \
"-c \"$CLANG_SOURCE\" -target bpf -O2 -o -"
@@ -212,7 +213,7 @@ version_notice(void)
" \t\thttp://llvm.org/apt\n\n"
" \tIf you are using old version of clang, change 'clang-bpf-cmd-template'\n"
" \toption in [llvm] section of ~/.perfconfig to:\n\n"
-" \t \"$CLANG_EXEC $CLANG_OPTIONS $KERNEL_INC_OPTIONS \\\n"
+" \t \"$CLANG_EXEC $CLANG_OPTIONS $KERNEL_INC_OPTIONS $PERF_BPF_INC_OPTIONS \\\n"
" \t -working-directory $WORKING_DIR -c $CLANG_SOURCE \\\n"
" \t -emit-llvm -o - | /path/to/llc -march=bpf -filetype=obj -o -\"\n"
" \t(Replace /path/to/llc with path to your llc)\n\n"
@@ -431,9 +432,11 @@ int llvm__compile_bpf(const char *path, void **p_obj_buf,
const char *clang_opt = llvm_param.clang_opt;
char clang_path[PATH_MAX], abspath[PATH_MAX], nr_cpus_avail_str[64];
char serr[STRERR_BUFSIZE];
- char *kbuild_dir = NULL, *kbuild_include_opts = NULL;
+ char *kbuild_dir = NULL, *kbuild_include_opts = NULL,
+ *perf_bpf_include_opts = NULL;
const char *template = llvm_param.clang_bpf_cmd_template;
- char *command_echo, *command_out;
+ char *command_echo = NULL, *command_out;
+ char *perf_include_dir = system_path(PERF_INCLUDE_DIR);
if (path[0] != '-' && realpath(path, abspath) == NULL) {
err = errno;
@@ -471,12 +474,14 @@ int llvm__compile_bpf(const char *path, void **p_obj_buf,
snprintf(linux_version_code_str, sizeof(linux_version_code_str),
"0x%x", kernel_version);
-
+ if (asprintf(&perf_bpf_include_opts, "-I%s/bpf", perf_include_dir) < 0)
+ goto errout;
force_set_env("NR_CPUS", nr_cpus_avail_str);
force_set_env("LINUX_VERSION_CODE", linux_version_code_str);
force_set_env("CLANG_EXEC", clang_path);
force_set_env("CLANG_OPTIONS", clang_opt);
force_set_env("KERNEL_INC_OPTIONS", kbuild_include_opts);
+ force_set_env("PERF_BPF_INC_OPTIONS", perf_bpf_include_opts);
force_set_env("WORKING_DIR", kbuild_dir ? : ".");
/*
@@ -512,6 +517,8 @@ int llvm__compile_bpf(const char *path, void **p_obj_buf,
free(command_out);
free(kbuild_dir);
free(kbuild_include_opts);
+ free(perf_bpf_include_opts);
+ free(perf_include_dir);
if (!p_obj_buf)
free(obj_buf);
@@ -526,6 +533,8 @@ errout:
free(kbuild_dir);
free(kbuild_include_opts);
free(obj_buf);
+ free(perf_bpf_include_opts);
+ free(perf_include_dir);
if (p_obj_buf)
*p_obj_buf = NULL;
if (p_obj_buf_sz)
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 32d50492505d..e7b4a8b513f2 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -24,6 +24,7 @@
#include "sane_ctype.h"
#include <symbol/kallsyms.h>
+#include <linux/mman.h>
static void __machine__remove_thread(struct machine *machine, struct thread *th, bool lock);
@@ -81,8 +82,7 @@ int machine__init(struct machine *machine, const char *root_dir, pid_t pid)
machine->kptr_restrict_warned = false;
machine->comm_exec = false;
machine->kernel_start = 0;
-
- memset(machine->vmlinux_maps, 0, sizeof(machine->vmlinux_maps));
+ machine->vmlinux_map = NULL;
machine->root_dir = strdup(root_dir);
if (machine->root_dir == NULL)
@@ -137,13 +137,11 @@ struct machine *machine__new_kallsyms(void)
struct machine *machine = machine__new_host();
/*
* FIXME:
- * 1) MAP__FUNCTION will go away when we stop loading separate maps for
- * functions and data objects.
- * 2) We should switch to machine__load_kallsyms(), i.e. not explicitely
+ * 1) We should switch to machine__load_kallsyms(), i.e. not explicitely
* ask for not using the kcore parsing code, once this one is fixed
* to create a map per module.
*/
- if (machine && machine__load_kallsyms(machine, "/proc/kallsyms", MAP__FUNCTION) <= 0) {
+ if (machine && machine__load_kallsyms(machine, "/proc/kallsyms") <= 0) {
machine__delete(machine);
machine = NULL;
}
@@ -673,8 +671,7 @@ struct map *machine__findnew_module_map(struct machine *machine, u64 start,
if (kmod_path__parse_name(&m, filename))
return NULL;
- map = map_groups__find_by_name(&machine->kmaps, MAP__FUNCTION,
- m.name);
+ map = map_groups__find_by_name(&machine->kmaps, m.name);
if (map) {
/*
* If the map's dso is an offline module, give dso__load()
@@ -689,7 +686,7 @@ struct map *machine__findnew_module_map(struct machine *machine, u64 start,
if (dso == NULL)
goto out;
- map = map__new2(start, dso, MAP__FUNCTION);
+ map = map__new2(start, dso);
if (map == NULL)
goto out;
@@ -810,8 +807,8 @@ struct process_args {
u64 start;
};
-static void machine__get_kallsyms_filename(struct machine *machine, char *buf,
- size_t bufsz)
+void machine__get_kallsyms_filename(struct machine *machine, char *buf,
+ size_t bufsz)
{
if (machine__is_default_guest(machine))
scnprintf(buf, bufsz, "%s", symbol_conf.default_guest_kallsyms);
@@ -854,65 +851,171 @@ static int machine__get_running_kernel_start(struct machine *machine,
return 0;
}
+int machine__create_extra_kernel_map(struct machine *machine,
+ struct dso *kernel,
+ struct extra_kernel_map *xm)
+{
+ struct kmap *kmap;
+ struct map *map;
+
+ map = map__new2(xm->start, kernel);
+ if (!map)
+ return -1;
+
+ map->end = xm->end;
+ map->pgoff = xm->pgoff;
+
+ kmap = map__kmap(map);
+
+ kmap->kmaps = &machine->kmaps;
+ strlcpy(kmap->name, xm->name, KMAP_NAME_LEN);
+
+ map_groups__insert(&machine->kmaps, map);
+
+ pr_debug2("Added extra kernel map %s %" PRIx64 "-%" PRIx64 "\n",
+ kmap->name, map->start, map->end);
+
+ map__put(map);
+
+ return 0;
+}
+
+static u64 find_entry_trampoline(struct dso *dso)
+{
+ /* Duplicates are removed so lookup all aliases */
+ const char *syms[] = {
+ "_entry_trampoline",
+ "__entry_trampoline_start",
+ "entry_SYSCALL_64_trampoline",
+ };
+ struct symbol *sym = dso__first_symbol(dso);
+ unsigned int i;
+
+ for (; sym; sym = dso__next_symbol(sym)) {
+ if (sym->binding != STB_GLOBAL)
+ continue;
+ for (i = 0; i < ARRAY_SIZE(syms); i++) {
+ if (!strcmp(sym->name, syms[i]))
+ return sym->start;
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * These values can be used for kernels that do not have symbols for the entry
+ * trampolines in kallsyms.
+ */
+#define X86_64_CPU_ENTRY_AREA_PER_CPU 0xfffffe0000000000ULL
+#define X86_64_CPU_ENTRY_AREA_SIZE 0x2c000
+#define X86_64_ENTRY_TRAMPOLINE 0x6000
+
+/* Map x86_64 PTI entry trampolines */
+int machine__map_x86_64_entry_trampolines(struct machine *machine,
+ struct dso *kernel)
+{
+ struct map_groups *kmaps = &machine->kmaps;
+ struct maps *maps = &kmaps->maps;
+ int nr_cpus_avail, cpu;
+ bool found = false;
+ struct map *map;
+ u64 pgoff;
+
+ /*
+ * In the vmlinux case, pgoff is a virtual address which must now be
+ * mapped to a vmlinux offset.
+ */
+ for (map = maps__first(maps); map; map = map__next(map)) {
+ struct kmap *kmap = __map__kmap(map);
+ struct map *dest_map;
+
+ if (!kmap || !is_entry_trampoline(kmap->name))
+ continue;
+
+ dest_map = map_groups__find(kmaps, map->pgoff);
+ if (dest_map != map)
+ map->pgoff = dest_map->map_ip(dest_map, map->pgoff);
+ found = true;
+ }
+ if (found || machine->trampolines_mapped)
+ return 0;
+
+ pgoff = find_entry_trampoline(kernel);
+ if (!pgoff)
+ return 0;
+
+ nr_cpus_avail = machine__nr_cpus_avail(machine);
+
+ /* Add a 1 page map for each CPU's entry trampoline */
+ for (cpu = 0; cpu < nr_cpus_avail; cpu++) {
+ u64 va = X86_64_CPU_ENTRY_AREA_PER_CPU +
+ cpu * X86_64_CPU_ENTRY_AREA_SIZE +
+ X86_64_ENTRY_TRAMPOLINE;
+ struct extra_kernel_map xm = {
+ .start = va,
+ .end = va + page_size,
+ .pgoff = pgoff,
+ };
+
+ strlcpy(xm.name, ENTRY_TRAMPOLINE_NAME, KMAP_NAME_LEN);
+
+ if (machine__create_extra_kernel_map(machine, kernel, &xm) < 0)
+ return -1;
+ }
+
+ machine->trampolines_mapped = nr_cpus_avail;
+
+ return 0;
+}
+
+int __weak machine__create_extra_kernel_maps(struct machine *machine __maybe_unused,
+ struct dso *kernel __maybe_unused)
+{
+ return 0;
+}
+
static int
__machine__create_kernel_maps(struct machine *machine, struct dso *kernel)
{
- int type;
+ struct kmap *kmap;
+ struct map *map;
/* In case of renewal the kernel map, destroy previous one */
machine__destroy_kernel_maps(machine);
- for (type = 0; type < MAP__NR_TYPES; ++type) {
- struct kmap *kmap;
- struct map *map;
-
- machine->vmlinux_maps[type] = map__new2(0, kernel, type);
- if (machine->vmlinux_maps[type] == NULL)
- return -1;
+ machine->vmlinux_map = map__new2(0, kernel);
+ if (machine->vmlinux_map == NULL)
+ return -1;
- machine->vmlinux_maps[type]->map_ip =
- machine->vmlinux_maps[type]->unmap_ip =
- identity__map_ip;
- map = __machine__kernel_map(machine, type);
- kmap = map__kmap(map);
- if (!kmap)
- return -1;
+ machine->vmlinux_map->map_ip = machine->vmlinux_map->unmap_ip = identity__map_ip;
+ map = machine__kernel_map(machine);
+ kmap = map__kmap(map);
+ if (!kmap)
+ return -1;
- kmap->kmaps = &machine->kmaps;
- map_groups__insert(&machine->kmaps, map);
- }
+ kmap->kmaps = &machine->kmaps;
+ map_groups__insert(&machine->kmaps, map);
return 0;
}
void machine__destroy_kernel_maps(struct machine *machine)
{
- int type;
-
- for (type = 0; type < MAP__NR_TYPES; ++type) {
- struct kmap *kmap;
- struct map *map = __machine__kernel_map(machine, type);
-
- if (map == NULL)
- continue;
+ struct kmap *kmap;
+ struct map *map = machine__kernel_map(machine);
- kmap = map__kmap(map);
- map_groups__remove(&machine->kmaps, map);
- if (kmap && kmap->ref_reloc_sym) {
- /*
- * ref_reloc_sym is shared among all maps, so free just
- * on one of them.
- */
- if (type == MAP__FUNCTION) {
- zfree((char **)&kmap->ref_reloc_sym->name);
- zfree(&kmap->ref_reloc_sym);
- } else
- kmap->ref_reloc_sym = NULL;
- }
+ if (map == NULL)
+ return;
- map__put(machine->vmlinux_maps[type]);
- machine->vmlinux_maps[type] = NULL;
+ kmap = map__kmap(map);
+ map_groups__remove(&machine->kmaps, map);
+ if (kmap && kmap->ref_reloc_sym) {
+ zfree((char **)&kmap->ref_reloc_sym->name);
+ zfree(&kmap->ref_reloc_sym);
}
+
+ map__zput(machine->vmlinux_map);
}
int machines__create_guest_kernel_maps(struct machines *machines)
@@ -989,32 +1092,31 @@ int machines__create_kernel_maps(struct machines *machines, pid_t pid)
return machine__create_kernel_maps(machine);
}
-int machine__load_kallsyms(struct machine *machine, const char *filename,
- enum map_type type)
+int machine__load_kallsyms(struct machine *machine, const char *filename)
{
struct map *map = machine__kernel_map(machine);
int ret = __dso__load_kallsyms(map->dso, filename, map, true);
if (ret > 0) {
- dso__set_loaded(map->dso, type);
+ dso__set_loaded(map->dso);
/*
* Since /proc/kallsyms will have multiple sessions for the
* kernel, with modules between them, fixup the end of all
* sections.
*/
- __map_groups__fixup_end(&machine->kmaps, type);
+ map_groups__fixup_end(&machine->kmaps);
}
return ret;
}
-int machine__load_vmlinux_path(struct machine *machine, enum map_type type)
+int machine__load_vmlinux_path(struct machine *machine)
{
struct map *map = machine__kernel_map(machine);
int ret = dso__load_vmlinux_path(map->dso, map);
if (ret > 0)
- dso__set_loaded(map->dso, type);
+ dso__set_loaded(map->dso);
return ret;
}
@@ -1055,10 +1157,9 @@ static bool is_kmod_dso(struct dso *dso)
static int map_groups__set_module_path(struct map_groups *mg, const char *path,
struct kmod_path *m)
{
- struct map *map;
char *long_name;
+ struct map *map = map_groups__find_by_name(mg, m->name);
- map = map_groups__find_by_name(mg, MAP__FUNCTION, m->name);
if (map == NULL)
return 0;
@@ -1207,19 +1308,14 @@ static int machine__create_modules(struct machine *machine)
static void machine__set_kernel_mmap(struct machine *machine,
u64 start, u64 end)
{
- int i;
-
- for (i = 0; i < MAP__NR_TYPES; i++) {
- machine->vmlinux_maps[i]->start = start;
- machine->vmlinux_maps[i]->end = end;
-
- /*
- * Be a bit paranoid here, some perf.data file came with
- * a zero sized synthesized MMAP event for the kernel.
- */
- if (start == 0 && end == 0)
- machine->vmlinux_maps[i]->end = ~0ULL;
- }
+ machine->vmlinux_map->start = start;
+ machine->vmlinux_map->end = end;
+ /*
+ * Be a bit paranoid here, some perf.data file came with
+ * a zero sized synthesized MMAP event for the kernel.
+ */
+ if (start == 0 && end == 0)
+ machine->vmlinux_map->end = ~0ULL;
}
int machine__create_kernel_maps(struct machine *machine)
@@ -1234,9 +1330,8 @@ int machine__create_kernel_maps(struct machine *machine)
return -1;
ret = __machine__create_kernel_maps(machine, kernel);
- dso__put(kernel);
if (ret < 0)
- return -1;
+ goto out_put;
if (symbol_conf.use_modules && machine__create_modules(machine) < 0) {
if (machine__is_host(machine))
@@ -1249,9 +1344,10 @@ int machine__create_kernel_maps(struct machine *machine)
if (!machine__get_running_kernel_start(machine, &name, &addr)) {
if (name &&
- maps__set_kallsyms_ref_reloc_sym(machine->vmlinux_maps, name, addr)) {
+ map__set_kallsyms_ref_reloc_sym(machine->vmlinux_map, name, addr)) {
machine__destroy_kernel_maps(machine);
- return -1;
+ ret = -1;
+ goto out_put;
}
/* we have a real start address now, so re-order the kmaps */
@@ -1267,12 +1363,16 @@ int machine__create_kernel_maps(struct machine *machine)
map__put(map);
}
+ if (machine__create_extra_kernel_maps(machine, kernel))
+ pr_debug("Problems creating extra kernel maps, continuing anyway...\n");
+
/* update end address of the kernel map using adjacent module address */
map = map__next(machine__kernel_map(machine));
if (map)
machine__set_kernel_mmap(machine, addr, map->start);
-
- return 0;
+out_put:
+ dso__put(kernel);
+ return ret;
}
static bool machine__uses_kcore(struct machine *machine)
@@ -1287,6 +1387,32 @@ static bool machine__uses_kcore(struct machine *machine)
return false;
}
+static bool perf_event__is_extra_kernel_mmap(struct machine *machine,
+ union perf_event *event)
+{
+ return machine__is(machine, "x86_64") &&
+ is_entry_trampoline(event->mmap.filename);
+}
+
+static int machine__process_extra_kernel_map(struct machine *machine,
+ union perf_event *event)
+{
+ struct map *kernel_map = machine__kernel_map(machine);
+ struct dso *kernel = kernel_map ? kernel_map->dso : NULL;
+ struct extra_kernel_map xm = {
+ .start = event->mmap.start,
+ .end = event->mmap.start + event->mmap.len,
+ .pgoff = event->mmap.pgoff,
+ };
+
+ if (kernel == NULL)
+ return -1;
+
+ strlcpy(xm.name, event->mmap.filename, KMAP_NAME_LEN);
+
+ return machine__create_extra_kernel_map(machine, kernel, &xm);
+}
+
static int machine__process_kernel_mmap_event(struct machine *machine,
union perf_event *event)
{
@@ -1379,9 +1505,9 @@ static int machine__process_kernel_mmap_event(struct machine *machine,
* time /proc/sys/kernel/kptr_restrict was non zero.
*/
if (event->mmap.pgoff != 0) {
- maps__set_kallsyms_ref_reloc_sym(machine->vmlinux_maps,
- symbol_name,
- event->mmap.pgoff);
+ map__set_kallsyms_ref_reloc_sym(machine->vmlinux_map,
+ symbol_name,
+ event->mmap.pgoff);
}
if (machine__is_default_guest(machine)) {
@@ -1390,6 +1516,8 @@ static int machine__process_kernel_mmap_event(struct machine *machine,
*/
dso__load(kernel, machine__kernel_map(machine));
}
+ } else if (perf_event__is_extra_kernel_mmap(machine, event)) {
+ return machine__process_extra_kernel_map(machine, event);
}
return 0;
out_problem:
@@ -1402,7 +1530,6 @@ int machine__process_mmap2_event(struct machine *machine,
{
struct thread *thread;
struct map *map;
- enum map_type type;
int ret = 0;
if (dump_trace)
@@ -1421,11 +1548,6 @@ int machine__process_mmap2_event(struct machine *machine,
if (thread == NULL)
goto out_problem;
- if (event->header.misc & PERF_RECORD_MISC_MMAP_DATA)
- type = MAP__VARIABLE;
- else
- type = MAP__FUNCTION;
-
map = map__new(machine, event->mmap2.start,
event->mmap2.len, event->mmap2.pgoff,
event->mmap2.maj,
@@ -1433,7 +1555,7 @@ int machine__process_mmap2_event(struct machine *machine,
event->mmap2.ino_generation,
event->mmap2.prot,
event->mmap2.flags,
- event->mmap2.filename, type, thread);
+ event->mmap2.filename, thread);
if (map == NULL)
goto out_problem_map;
@@ -1460,7 +1582,7 @@ int machine__process_mmap_event(struct machine *machine, union perf_event *event
{
struct thread *thread;
struct map *map;
- enum map_type type;
+ u32 prot = 0;
int ret = 0;
if (dump_trace)
@@ -1479,16 +1601,14 @@ int machine__process_mmap_event(struct machine *machine, union perf_event *event
if (thread == NULL)
goto out_problem;
- if (event->header.misc & PERF_RECORD_MISC_MMAP_DATA)
- type = MAP__VARIABLE;
- else
- type = MAP__FUNCTION;
+ if (!(event->header.misc & PERF_RECORD_MISC_MMAP_DATA))
+ prot = PROT_EXEC;
map = map__new(machine, event->mmap.start,
event->mmap.len, event->mmap.pgoff,
- 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, prot, 0,
event->mmap.filename,
- type, thread);
+ thread);
if (map == NULL)
goto out_problem_map;
@@ -1664,7 +1784,7 @@ static void ip__resolve_ams(struct thread *thread,
* Thus, we have to try consecutively until we find a match
* or else, the symbol is unknown
*/
- thread__find_cpumode_addr_location(thread, MAP__FUNCTION, ip, &al);
+ thread__find_cpumode_addr_location(thread, ip, &al);
ams->addr = ip;
ams->al_addr = al.addr;
@@ -1681,15 +1801,7 @@ static void ip__resolve_data(struct thread *thread,
memset(&al, 0, sizeof(al));
- thread__find_addr_location(thread, m, MAP__VARIABLE, addr, &al);
- if (al.map == NULL) {
- /*
- * some shared data regions have execute bit set which puts
- * their mapping in the MAP__FUNCTION type array.
- * Check there as a fallback option before dropping the sample.
- */
- thread__find_addr_location(thread, m, MAP__FUNCTION, addr, &al);
- }
+ thread__find_symbol(thread, m, addr, &al);
ams->addr = addr;
ams->al_addr = al.addr;
@@ -1758,8 +1870,7 @@ static int add_callchain_ip(struct thread *thread,
al.filtered = 0;
al.sym = NULL;
if (!cpumode) {
- thread__find_cpumode_addr_location(thread, MAP__FUNCTION,
- ip, &al);
+ thread__find_cpumode_addr_location(thread, ip, &al);
} else {
if (ip >= PERF_CONTEXT_MAX) {
switch (ip) {
@@ -1784,8 +1895,7 @@ static int add_callchain_ip(struct thread *thread,
}
return 0;
}
- thread__find_addr_location(thread, *cpumode, MAP__FUNCTION,
- ip, &al);
+ thread__find_symbol(thread, *cpumode, ip, &al);
}
if (al.sym != NULL) {
@@ -1810,7 +1920,7 @@ static int add_callchain_ip(struct thread *thread,
}
srcline = callchain_srcline(al.map, al.sym, al.addr);
- return callchain_cursor_append(cursor, al.addr, al.map, al.sym,
+ return callchain_cursor_append(cursor, ip, al.map, al.sym,
branch, flags, nr_loop_iter,
iter_cycles, branch_from, srcline);
}
@@ -2342,6 +2452,20 @@ int machine__set_current_tid(struct machine *machine, int cpu, pid_t pid,
return 0;
}
+/*
+ * Compares the raw arch string. N.B. see instead perf_env__arch() if a
+ * normalized arch is needed.
+ */
+bool machine__is(struct machine *machine, const char *arch)
+{
+ return machine && !strcmp(perf_env__raw_arch(machine->env), arch);
+}
+
+int machine__nr_cpus_avail(struct machine *machine)
+{
+ return machine ? perf_env__nr_cpus_avail(machine->env) : 0;
+}
+
int machine__get_kernel_start(struct machine *machine)
{
struct map *map = machine__kernel_map(machine);
@@ -2358,7 +2482,12 @@ int machine__get_kernel_start(struct machine *machine)
machine->kernel_start = 1ULL << 63;
if (map) {
err = map__load(map);
- if (!err)
+ /*
+ * On x86_64, PTI entry trampolines are less than the
+ * start of kernel text, but still above 2^63. So leave
+ * kernel_start = 1ULL << 63 for x86_64.
+ */
+ if (!err && !machine__is(machine, "x86_64"))
machine->kernel_start = map->start;
}
return err;
@@ -2373,7 +2502,7 @@ char *machine__resolve_kernel_addr(void *vmachine, unsigned long long *addrp, ch
{
struct machine *machine = vmachine;
struct map *map;
- struct symbol *sym = map_groups__find_symbol(&machine->kmaps, MAP__FUNCTION, *addrp, &map);
+ struct symbol *sym = machine__find_kernel_symbol(machine, *addrp, &map);
if (sym == NULL)
return NULL;
diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h
index 66cc200ef86f..1de7660d93e9 100644
--- a/tools/perf/util/machine.h
+++ b/tools/perf/util/machine.h
@@ -49,13 +49,14 @@ struct machine {
struct perf_env *env;
struct dsos dsos;
struct map_groups kmaps;
- struct map *vmlinux_maps[MAP__NR_TYPES];
+ struct map *vmlinux_map;
u64 kernel_start;
pid_t *current_tid;
union { /* Tool specific area */
void *priv;
u64 db_id;
};
+ bool trampolines_mapped;
};
static inline struct threads *machine__threads(struct machine *machine, pid_t tid)
@@ -64,16 +65,22 @@ static inline struct threads *machine__threads(struct machine *machine, pid_t ti
return &machine->threads[(unsigned int)tid % THREADS__TABLE_SIZE];
}
+/*
+ * The main kernel (vmlinux) map
+ */
static inline
-struct map *__machine__kernel_map(struct machine *machine, enum map_type type)
+struct map *machine__kernel_map(struct machine *machine)
{
- return machine->vmlinux_maps[type];
+ return machine->vmlinux_map;
}
+/*
+ * kernel (the one returned by machine__kernel_map()) plus kernel modules maps
+ */
static inline
-struct map *machine__kernel_map(struct machine *machine)
+struct maps *machine__kernel_maps(struct machine *machine)
{
- return __machine__kernel_map(machine, MAP__FUNCTION);
+ return &machine->kmaps.maps;
}
int machine__get_kernel_start(struct machine *machine);
@@ -182,6 +189,9 @@ static inline bool machine__is_host(struct machine *machine)
return machine ? machine->pid == HOST_KERNEL_ID : false;
}
+bool machine__is(struct machine *machine, const char *arch);
+int machine__nr_cpus_avail(struct machine *machine);
+
struct thread *__machine__findnew_thread(struct machine *machine, pid_t pid, pid_t tid);
struct thread *machine__findnew_thread(struct machine *machine, pid_t pid, pid_t tid);
@@ -190,44 +200,27 @@ struct dso *machine__findnew_dso(struct machine *machine, const char *filename);
size_t machine__fprintf(struct machine *machine, FILE *fp);
static inline
-struct symbol *machine__find_kernel_symbol(struct machine *machine,
- enum map_type type, u64 addr,
+struct symbol *machine__find_kernel_symbol(struct machine *machine, u64 addr,
struct map **mapp)
{
- return map_groups__find_symbol(&machine->kmaps, type, addr, mapp);
+ return map_groups__find_symbol(&machine->kmaps, addr, mapp);
}
static inline
struct symbol *machine__find_kernel_symbol_by_name(struct machine *machine,
- enum map_type type, const char *name,
+ const char *name,
struct map **mapp)
{
- return map_groups__find_symbol_by_name(&machine->kmaps, type, name, mapp);
-}
-
-static inline
-struct symbol *machine__find_kernel_function(struct machine *machine, u64 addr,
- struct map **mapp)
-{
- return machine__find_kernel_symbol(machine, MAP__FUNCTION, addr,
- mapp);
-}
-
-static inline
-struct symbol *machine__find_kernel_function_by_name(struct machine *machine,
- const char *name,
- struct map **mapp)
-{
- return map_groups__find_function_by_name(&machine->kmaps, name, mapp);
+ return map_groups__find_symbol_by_name(&machine->kmaps, name, mapp);
}
struct map *machine__findnew_module_map(struct machine *machine, u64 start,
const char *filename);
int arch__fix_module_text_start(u64 *start, const char *name);
-int machine__load_kallsyms(struct machine *machine, const char *filename,
- enum map_type type);
-int machine__load_vmlinux_path(struct machine *machine, enum map_type type);
+int machine__load_kallsyms(struct machine *machine, const char *filename);
+
+int machine__load_vmlinux_path(struct machine *machine);
size_t machine__fprintf_dsos_buildid(struct machine *machine, FILE *fp,
bool (skip)(struct dso *dso, int parm), int parm);
@@ -276,4 +269,25 @@ int machine__set_current_tid(struct machine *machine, int cpu, pid_t pid,
*/
char *machine__resolve_kernel_addr(void *vmachine, unsigned long long *addrp, char **modp);
+void machine__get_kallsyms_filename(struct machine *machine, char *buf,
+ size_t bufsz);
+
+int machine__create_extra_kernel_maps(struct machine *machine,
+ struct dso *kernel);
+
+/* Kernel-space maps for symbols that are outside the main kernel map and module maps */
+struct extra_kernel_map {
+ u64 start;
+ u64 end;
+ u64 pgoff;
+ char name[KMAP_NAME_LEN];
+};
+
+int machine__create_extra_kernel_map(struct machine *machine,
+ struct dso *kernel,
+ struct extra_kernel_map *xm);
+
+int machine__map_x86_64_entry_trampolines(struct machine *machine,
+ struct dso *kernel);
+
#endif /* __PERF_MACHINE_H */
diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index 8fe57031e1a8..6ae97eda370b 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -22,11 +22,6 @@
static void __maps__insert(struct maps *maps, struct map *map);
-const char *map_type__name[MAP__NR_TYPES] = {
- [MAP__FUNCTION] = "Functions",
- [MAP__VARIABLE] = "Variables",
-};
-
static inline int is_anon_memory(const char *filename, u32 flags)
{
return flags & MAP_HUGETLB ||
@@ -129,10 +124,8 @@ static inline bool replace_android_lib(const char *filename, char *newfilename)
return false;
}
-void map__init(struct map *map, enum map_type type,
- u64 start, u64 end, u64 pgoff, struct dso *dso)
+void map__init(struct map *map, u64 start, u64 end, u64 pgoff, struct dso *dso)
{
- map->type = type;
map->start = start;
map->end = end;
map->pgoff = pgoff;
@@ -149,7 +142,7 @@ void map__init(struct map *map, enum map_type type,
struct map *map__new(struct machine *machine, u64 start, u64 len,
u64 pgoff, u32 d_maj, u32 d_min, u64 ino,
u64 ino_gen, u32 prot, u32 flags, char *filename,
- enum map_type type, struct thread *thread)
+ struct thread *thread)
{
struct map *map = malloc(sizeof(*map));
struct nsinfo *nsi = NULL;
@@ -173,7 +166,7 @@ struct map *map__new(struct machine *machine, u64 start, u64 len,
map->flags = flags;
nsi = nsinfo__get(thread->nsinfo);
- if ((anon || no_dso) && nsi && type == MAP__FUNCTION) {
+ if ((anon || no_dso) && nsi && (prot & PROT_EXEC)) {
snprintf(newfilename, sizeof(newfilename),
"/tmp/perf-%d.map", nsi->pid);
filename = newfilename;
@@ -203,7 +196,7 @@ struct map *map__new(struct machine *machine, u64 start, u64 len,
if (dso == NULL)
goto out_delete;
- map__init(map, type, start, start + len, pgoff, dso);
+ map__init(map, start, start + len, pgoff, dso);
if (anon || no_dso) {
map->map_ip = map->unmap_ip = identity__map_ip;
@@ -213,8 +206,8 @@ struct map *map__new(struct machine *machine, u64 start, u64 len,
* functions still return NULL, and we avoid the
* unnecessary map__load warning.
*/
- if (type != MAP__FUNCTION)
- dso__set_loaded(dso, map->type);
+ if (!(prot & PROT_EXEC))
+ dso__set_loaded(dso);
}
dso->nsinfo = nsi;
dso__put(dso);
@@ -231,7 +224,7 @@ out_delete:
* they are loaded) and for vmlinux, where only after we load all the
* symbols we'll know where it starts and ends.
*/
-struct map *map__new2(u64 start, struct dso *dso, enum map_type type)
+struct map *map__new2(u64 start, struct dso *dso)
{
struct map *map = calloc(1, (sizeof(*map) +
(dso->kernel ? sizeof(struct kmap) : 0)));
@@ -239,7 +232,7 @@ struct map *map__new2(u64 start, struct dso *dso, enum map_type type)
/*
* ->end will be filled after we load all the symbols
*/
- map__init(map, type, start, 0, 0, dso);
+ map__init(map, start, 0, 0, dso);
}
return map;
@@ -256,7 +249,19 @@ struct map *map__new2(u64 start, struct dso *dso, enum map_type type)
*/
bool __map__is_kernel(const struct map *map)
{
- return __machine__kernel_map(map->groups->machine, map->type) == map;
+ return machine__kernel_map(map->groups->machine) == map;
+}
+
+bool __map__is_extra_kernel_map(const struct map *map)
+{
+ struct kmap *kmap = __map__kmap((struct map *)map);
+
+ return kmap && kmap->name[0];
+}
+
+bool map__has_symbols(const struct map *map)
+{
+ return dso__has_symbols(map->dso);
}
static void map__exit(struct map *map)
@@ -279,7 +284,7 @@ void map__put(struct map *map)
void map__fixup_start(struct map *map)
{
- struct rb_root *symbols = &map->dso->symbols[map->type];
+ struct rb_root *symbols = &map->dso->symbols;
struct rb_node *nd = rb_first(symbols);
if (nd != NULL) {
struct symbol *sym = rb_entry(nd, struct symbol, rb_node);
@@ -289,7 +294,7 @@ void map__fixup_start(struct map *map)
void map__fixup_end(struct map *map)
{
- struct rb_root *symbols = &map->dso->symbols[map->type];
+ struct rb_root *symbols = &map->dso->symbols;
struct rb_node *nd = rb_last(symbols);
if (nd != NULL) {
struct symbol *sym = rb_entry(nd, struct symbol, rb_node);
@@ -304,7 +309,7 @@ int map__load(struct map *map)
const char *name = map->dso->long_name;
int nr;
- if (dso__loaded(map->dso, map->type))
+ if (dso__loaded(map->dso))
return 0;
nr = dso__load(map->dso, map);
@@ -348,7 +353,7 @@ struct symbol *map__find_symbol(struct map *map, u64 addr)
if (map__load(map) < 0)
return NULL;
- return dso__find_symbol(map->dso, map->type, addr);
+ return dso__find_symbol(map->dso, addr);
}
struct symbol *map__find_symbol_by_name(struct map *map, const char *name)
@@ -356,10 +361,10 @@ struct symbol *map__find_symbol_by_name(struct map *map, const char *name)
if (map__load(map) < 0)
return NULL;
- if (!dso__sorted_by_name(map->dso, map->type))
- dso__sort_by_name(map->dso, map->type);
+ if (!dso__sorted_by_name(map->dso))
+ dso__sort_by_name(map->dso);
- return dso__find_symbol_by_name(map->dso, map->type, name);
+ return dso__find_symbol_by_name(map->dso, name);
}
struct map *map__clone(struct map *from)
@@ -494,10 +499,7 @@ static void maps__init(struct maps *maps)
void map_groups__init(struct map_groups *mg, struct machine *machine)
{
- int i;
- for (i = 0; i < MAP__NR_TYPES; ++i) {
- maps__init(&mg->maps[i]);
- }
+ maps__init(&mg->maps);
mg->machine = machine;
refcount_set(&mg->refcnt, 1);
}
@@ -525,22 +527,12 @@ static void maps__exit(struct maps *maps)
void map_groups__exit(struct map_groups *mg)
{
- int i;
-
- for (i = 0; i < MAP__NR_TYPES; ++i)
- maps__exit(&mg->maps[i]);
+ maps__exit(&mg->maps);
}
bool map_groups__empty(struct map_groups *mg)
{
- int i;
-
- for (i = 0; i < MAP__NR_TYPES; ++i) {
- if (maps__first(&mg->maps[i]))
- return false;
- }
-
- return true;
+ return !maps__first(&mg->maps);
}
struct map_groups *map_groups__new(struct machine *machine)
@@ -566,10 +558,9 @@ void map_groups__put(struct map_groups *mg)
}
struct symbol *map_groups__find_symbol(struct map_groups *mg,
- enum map_type type, u64 addr,
- struct map **mapp)
+ u64 addr, struct map **mapp)
{
- struct map *map = map_groups__find(mg, type, addr);
+ struct map *map = map_groups__find(mg, addr);
/* Ensure map is loaded before using map->map_ip */
if (map != NULL && map__load(map) >= 0) {
@@ -608,13 +599,10 @@ out:
}
struct symbol *map_groups__find_symbol_by_name(struct map_groups *mg,
- enum map_type type,
const char *name,
struct map **mapp)
{
- struct symbol *sym = maps__find_symbol_by_name(&mg->maps[type], name, mapp);
-
- return sym;
+ return maps__find_symbol_by_name(&mg->maps, name, mapp);
}
int map_groups__find_ams(struct addr_map_symbol *ams)
@@ -622,8 +610,7 @@ int map_groups__find_ams(struct addr_map_symbol *ams)
if (ams->addr < ams->map->start || ams->addr >= ams->map->end) {
if (ams->map->groups == NULL)
return -1;
- ams->map = map_groups__find(ams->map->groups, ams->map->type,
- ams->addr);
+ ams->map = map_groups__find(ams->map->groups, ams->addr);
if (ams->map == NULL)
return -1;
}
@@ -646,7 +633,7 @@ static size_t maps__fprintf(struct maps *maps, FILE *fp)
printed += fprintf(fp, "Map:");
printed += map__fprintf(pos, fp);
if (verbose > 2) {
- printed += dso__fprintf(pos->dso, pos->type, fp);
+ printed += dso__fprintf(pos->dso, fp);
printed += fprintf(fp, "--\n");
}
}
@@ -656,24 +643,14 @@ static size_t maps__fprintf(struct maps *maps, FILE *fp)
return printed;
}
-size_t __map_groups__fprintf_maps(struct map_groups *mg, enum map_type type,
- FILE *fp)
-{
- size_t printed = fprintf(fp, "%s:\n", map_type__name[type]);
- return printed += maps__fprintf(&mg->maps[type], fp);
-}
-
size_t map_groups__fprintf(struct map_groups *mg, FILE *fp)
{
- size_t printed = 0, i;
- for (i = 0; i < MAP__NR_TYPES; ++i)
- printed += __map_groups__fprintf_maps(mg, i, fp);
- return printed;
+ return maps__fprintf(&mg->maps, fp);
}
static void __map_groups__insert(struct map_groups *mg, struct map *map)
{
- __maps__insert(&mg->maps[map->type], map);
+ __maps__insert(&mg->maps, map);
map->groups = mg;
}
@@ -758,19 +735,18 @@ out:
int map_groups__fixup_overlappings(struct map_groups *mg, struct map *map,
FILE *fp)
{
- return maps__fixup_overlappings(&mg->maps[map->type], map, fp);
+ return maps__fixup_overlappings(&mg->maps, map, fp);
}
/*
* XXX This should not really _copy_ te maps, but refcount them.
*/
-int map_groups__clone(struct thread *thread,
- struct map_groups *parent, enum map_type type)
+int map_groups__clone(struct thread *thread, struct map_groups *parent)
{
struct map_groups *mg = thread->mg;
int err = -ENOMEM;
struct map *map;
- struct maps *maps = &parent->maps[type];
+ struct maps *maps = &parent->maps;
down_read(&maps->lock);
@@ -877,15 +853,22 @@ struct map *map__next(struct map *map)
return NULL;
}
-struct kmap *map__kmap(struct map *map)
+struct kmap *__map__kmap(struct map *map)
{
- if (!map->dso || !map->dso->kernel) {
- pr_err("Internal error: map__kmap with a non-kernel map\n");
+ if (!map->dso || !map->dso->kernel)
return NULL;
- }
return (struct kmap *)(map + 1);
}
+struct kmap *map__kmap(struct map *map)
+{
+ struct kmap *kmap = __map__kmap(map);
+
+ if (!kmap)
+ pr_err("Internal error: map__kmap with a non-kernel map\n");
+ return kmap;
+}
+
struct map_groups *map__kmaps(struct map *map)
{
struct kmap *kmap = map__kmap(map);
diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h
index 0e9bbe01b0ab..97e2a063bd65 100644
--- a/tools/perf/util/map.h
+++ b/tools/perf/util/map.h
@@ -8,19 +8,11 @@
#include <linux/rbtree.h>
#include <pthread.h>
#include <stdio.h>
+#include <string.h>
#include <stdbool.h>
#include <linux/types.h>
#include "rwsem.h"
-enum map_type {
- MAP__FUNCTION = 0,
- MAP__VARIABLE,
-};
-
-#define MAP__NR_TYPES (MAP__VARIABLE + 1)
-
-extern const char *map_type__name[MAP__NR_TYPES];
-
struct dso;
struct ip_callchain;
struct ref_reloc_sym;
@@ -35,7 +27,6 @@ struct map {
};
u64 start;
u64 end;
- u8 /* enum map_type */ type;
bool erange_warned;
u32 priv;
u32 prot;
@@ -56,9 +47,12 @@ struct map {
refcount_t refcnt;
};
+#define KMAP_NAME_LEN 256
+
struct kmap {
struct ref_reloc_sym *ref_reloc_sym;
struct map_groups *kmaps;
+ char name[KMAP_NAME_LEN];
};
struct maps {
@@ -67,7 +61,7 @@ struct maps {
};
struct map_groups {
- struct maps maps[MAP__NR_TYPES];
+ struct maps maps;
struct machine *machine;
refcount_t refcnt;
};
@@ -85,6 +79,7 @@ static inline struct map_groups *map_groups__get(struct map_groups *mg)
void map_groups__put(struct map_groups *mg);
+struct kmap *__map__kmap(struct map *map);
struct kmap *map__kmap(struct map *map);
struct map_groups *map__kmaps(struct map *map);
@@ -125,7 +120,7 @@ struct thread;
* Note: caller must ensure map->dso is not NULL (map is loaded).
*/
#define map__for_each_symbol(map, pos, n) \
- dso__for_each_symbol(map->dso, pos, n, map->type)
+ dso__for_each_symbol(map->dso, pos, n)
/* map__for_each_symbol_with_name - iterate over the symbols in the given map
* that have the given name
@@ -144,13 +139,13 @@ struct thread;
#define map__for_each_symbol_by_name(map, sym_name, pos) \
__map__for_each_symbol_by_name(map, sym_name, (pos))
-void map__init(struct map *map, enum map_type type,
+void map__init(struct map *map,
u64 start, u64 end, u64 pgoff, struct dso *dso);
struct map *map__new(struct machine *machine, u64 start, u64 len,
u64 pgoff, u32 d_maj, u32 d_min, u64 ino,
u64 ino_gen, u32 prot, u32 flags,
- char *filename, enum map_type type, struct thread *thread);
-struct map *map__new2(u64 start, struct dso *dso, enum map_type type);
+ char *filename, struct thread *thread);
+struct map *map__new2(u64 start, struct dso *dso);
void map__delete(struct map *map);
struct map *map__clone(struct map *map);
@@ -185,8 +180,6 @@ void map__fixup_end(struct map *map);
void map__reloc_vmlinux(struct map *map);
-size_t __map_groups__fprintf_maps(struct map_groups *mg, enum map_type type,
- FILE *fp);
void maps__insert(struct maps *maps, struct map *map);
void maps__remove(struct maps *maps, struct map *map);
struct map *maps__find(struct maps *maps, u64 addr);
@@ -197,34 +190,29 @@ struct symbol *maps__find_symbol_by_name(struct maps *maps, const char *name,
void map_groups__init(struct map_groups *mg, struct machine *machine);
void map_groups__exit(struct map_groups *mg);
int map_groups__clone(struct thread *thread,
- struct map_groups *parent, enum map_type type);
+ struct map_groups *parent);
size_t map_groups__fprintf(struct map_groups *mg, FILE *fp);
-int maps__set_kallsyms_ref_reloc_sym(struct map **maps, const char *symbol_name,
- u64 addr);
+int map__set_kallsyms_ref_reloc_sym(struct map *map, const char *symbol_name,
+ u64 addr);
static inline void map_groups__insert(struct map_groups *mg, struct map *map)
{
- maps__insert(&mg->maps[map->type], map);
+ maps__insert(&mg->maps, map);
map->groups = mg;
}
static inline void map_groups__remove(struct map_groups *mg, struct map *map)
{
- maps__remove(&mg->maps[map->type], map);
+ maps__remove(&mg->maps, map);
}
-static inline struct map *map_groups__find(struct map_groups *mg,
- enum map_type type, u64 addr)
+static inline struct map *map_groups__find(struct map_groups *mg, u64 addr)
{
- return maps__find(&mg->maps[type], addr);
+ return maps__find(&mg->maps, addr);
}
-static inline struct map *map_groups__first(struct map_groups *mg,
- enum map_type type)
-{
- return maps__first(&mg->maps[type]);
-}
+struct map *map_groups__first(struct map_groups *mg);
static inline struct map *map_groups__next(struct map *map)
{
@@ -232,11 +220,9 @@ static inline struct map *map_groups__next(struct map *map)
}
struct symbol *map_groups__find_symbol(struct map_groups *mg,
- enum map_type type, u64 addr,
- struct map **mapp);
+ u64 addr, struct map **mapp);
struct symbol *map_groups__find_symbol_by_name(struct map_groups *mg,
- enum map_type type,
const char *name,
struct map **mapp);
@@ -244,24 +230,26 @@ struct addr_map_symbol;
int map_groups__find_ams(struct addr_map_symbol *ams);
-static inline
-struct symbol *map_groups__find_function_by_name(struct map_groups *mg,
- const char *name, struct map **mapp)
-{
- return map_groups__find_symbol_by_name(mg, MAP__FUNCTION, name, mapp);
-}
-
int map_groups__fixup_overlappings(struct map_groups *mg, struct map *map,
FILE *fp);
-struct map *map_groups__find_by_name(struct map_groups *mg,
- enum map_type type, const char *name);
+struct map *map_groups__find_by_name(struct map_groups *mg, const char *name);
bool __map__is_kernel(const struct map *map);
+bool __map__is_extra_kernel_map(const struct map *map);
static inline bool __map__is_kmodule(const struct map *map)
{
- return !__map__is_kernel(map);
+ return !__map__is_kernel(map) && !__map__is_extra_kernel_map(map);
+}
+
+bool map__has_symbols(const struct map *map);
+
+#define ENTRY_TRAMPOLINE_NAME "__entry_SYSCALL_64_trampoline"
+
+static inline bool is_entry_trampoline(const char *name)
+{
+ return !strcmp(name, ENTRY_TRAMPOLINE_NAME);
}
#endif /* __PERF_MAP_H */
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 2fb0272146d8..15eec49e71a1 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -156,13 +156,12 @@ struct event_symbol event_symbols_sw[PERF_COUNT_SW_MAX] = {
(strcmp(sys_dirent->d_name, ".")) && \
(strcmp(sys_dirent->d_name, "..")))
-static int tp_event_has_id(struct dirent *sys_dir, struct dirent *evt_dir)
+static int tp_event_has_id(const char *dir_path, struct dirent *evt_dir)
{
char evt_path[MAXPATHLEN];
int fd;
- snprintf(evt_path, MAXPATHLEN, "%s/%s/%s/id", tracing_events_path,
- sys_dir->d_name, evt_dir->d_name);
+ snprintf(evt_path, MAXPATHLEN, "%s/%s/id", dir_path, evt_dir->d_name);
fd = open(evt_path, O_RDONLY);
if (fd < 0)
return -EINVAL;
@@ -171,12 +170,12 @@ static int tp_event_has_id(struct dirent *sys_dir, struct dirent *evt_dir)
return 0;
}
-#define for_each_event(sys_dirent, evt_dir, evt_dirent) \
+#define for_each_event(dir_path, evt_dir, evt_dirent) \
while ((evt_dirent = readdir(evt_dir)) != NULL) \
if (evt_dirent->d_type == DT_DIR && \
(strcmp(evt_dirent->d_name, ".")) && \
(strcmp(evt_dirent->d_name, "..")) && \
- (!tp_event_has_id(sys_dirent, evt_dirent)))
+ (!tp_event_has_id(dir_path, evt_dirent)))
#define MAX_EVENT_LENGTH 512
@@ -190,21 +189,21 @@ struct tracepoint_path *tracepoint_id_to_path(u64 config)
int fd;
u64 id;
char evt_path[MAXPATHLEN];
- char dir_path[MAXPATHLEN];
+ char *dir_path;
- sys_dir = opendir(tracing_events_path);
+ sys_dir = tracing_events__opendir();
if (!sys_dir)
return NULL;
for_each_subsystem(sys_dir, sys_dirent) {
-
- snprintf(dir_path, MAXPATHLEN, "%s/%s", tracing_events_path,
- sys_dirent->d_name);
+ dir_path = get_events_file(sys_dirent->d_name);
+ if (!dir_path)
+ continue;
evt_dir = opendir(dir_path);
if (!evt_dir)
- continue;
+ goto next;
- for_each_event(sys_dirent, evt_dir, evt_dirent) {
+ for_each_event(dir_path, evt_dir, evt_dirent) {
scnprintf(evt_path, MAXPATHLEN, "%s/%s/id", dir_path,
evt_dirent->d_name);
@@ -218,6 +217,7 @@ struct tracepoint_path *tracepoint_id_to_path(u64 config)
close(fd);
id = atoll(id_buf);
if (id == config) {
+ put_events_file(dir_path);
closedir(evt_dir);
closedir(sys_dir);
path = zalloc(sizeof(*path));
@@ -242,6 +242,8 @@ struct tracepoint_path *tracepoint_id_to_path(u64 config)
}
}
closedir(evt_dir);
+next:
+ put_events_file(dir_path);
}
closedir(sys_dir);
@@ -512,14 +514,19 @@ static int add_tracepoint_multi_event(struct list_head *list, int *idx,
struct parse_events_error *err,
struct list_head *head_config)
{
- char evt_path[MAXPATHLEN];
+ char *evt_path;
struct dirent *evt_ent;
DIR *evt_dir;
int ret = 0, found = 0;
- snprintf(evt_path, MAXPATHLEN, "%s/%s", tracing_events_path, sys_name);
+ evt_path = get_events_file(sys_name);
+ if (!evt_path) {
+ tracepoint_error(err, errno, sys_name, evt_name);
+ return -1;
+ }
evt_dir = opendir(evt_path);
if (!evt_dir) {
+ put_events_file(evt_path);
tracepoint_error(err, errno, sys_name, evt_name);
return -1;
}
@@ -545,6 +552,7 @@ static int add_tracepoint_multi_event(struct list_head *list, int *idx,
ret = -1;
}
+ put_events_file(evt_path);
closedir(evt_dir);
return ret;
}
@@ -570,7 +578,7 @@ static int add_tracepoint_multi_sys(struct list_head *list, int *idx,
DIR *events_dir;
int ret = 0;
- events_dir = opendir(tracing_events_path);
+ events_dir = tracing_events__opendir();
if (!events_dir) {
tracepoint_error(err, errno, sys_name, evt_name);
return -1;
@@ -1219,13 +1227,16 @@ int parse_events_add_numeric(struct parse_events_state *parse_state,
int parse_events_add_pmu(struct parse_events_state *parse_state,
struct list_head *list, char *name,
- struct list_head *head_config, bool auto_merge_stats)
+ struct list_head *head_config,
+ bool auto_merge_stats,
+ bool use_alias)
{
struct perf_event_attr attr;
struct perf_pmu_info info;
struct perf_pmu *pmu;
struct perf_evsel *evsel;
struct parse_events_error *err = parse_state->error;
+ bool use_uncore_alias;
LIST_HEAD(config_terms);
pmu = perf_pmu__find(name);
@@ -1244,11 +1255,14 @@ int parse_events_add_pmu(struct parse_events_state *parse_state,
memset(&attr, 0, sizeof(attr));
}
+ use_uncore_alias = (pmu->is_uncore && use_alias);
+
if (!head_config) {
attr.type = pmu->type;
evsel = __add_event(list, &parse_state->idx, &attr, NULL, pmu, NULL, auto_merge_stats);
if (evsel) {
evsel->pmu_name = name;
+ evsel->use_uncore_alias = use_uncore_alias;
return 0;
} else {
return -ENOMEM;
@@ -1282,6 +1296,7 @@ int parse_events_add_pmu(struct parse_events_state *parse_state,
evsel->metric_expr = info.metric_expr;
evsel->metric_name = info.metric_name;
evsel->pmu_name = name;
+ evsel->use_uncore_alias = use_uncore_alias;
}
return evsel ? 0 : -ENOMEM;
@@ -1317,7 +1332,8 @@ int parse_events_multi_pmu_add(struct parse_events_state *parse_state,
list_add_tail(&term->list, head);
if (!parse_events_add_pmu(parse_state, list,
- pmu->name, head, true)) {
+ pmu->name, head,
+ true, true)) {
pr_debug("%s -> %s/%s/\n", str,
pmu->name, alias->str);
ok++;
@@ -1339,7 +1355,120 @@ int parse_events__modifier_group(struct list_head *list,
return parse_events__modifier_event(list, event_mod, true);
}
-void parse_events__set_leader(char *name, struct list_head *list)
+/*
+ * Check if the two uncore PMUs are from the same uncore block
+ * The format of the uncore PMU name is uncore_#blockname_#pmuidx
+ */
+static bool is_same_uncore_block(const char *pmu_name_a, const char *pmu_name_b)
+{
+ char *end_a, *end_b;
+
+ end_a = strrchr(pmu_name_a, '_');
+ end_b = strrchr(pmu_name_b, '_');
+
+ if (!end_a || !end_b)
+ return false;
+
+ if ((end_a - pmu_name_a) != (end_b - pmu_name_b))
+ return false;
+
+ return (strncmp(pmu_name_a, pmu_name_b, end_a - pmu_name_a) == 0);
+}
+
+static int
+parse_events__set_leader_for_uncore_aliase(char *name, struct list_head *list,
+ struct parse_events_state *parse_state)
+{
+ struct perf_evsel *evsel, *leader;
+ uintptr_t *leaders;
+ bool is_leader = true;
+ int i, nr_pmu = 0, total_members, ret = 0;
+
+ leader = list_first_entry(list, struct perf_evsel, node);
+ evsel = list_last_entry(list, struct perf_evsel, node);
+ total_members = evsel->idx - leader->idx + 1;
+
+ leaders = calloc(total_members, sizeof(uintptr_t));
+ if (WARN_ON(!leaders))
+ return 0;
+
+ /*
+ * Going through the whole group and doing sanity check.
+ * All members must use alias, and be from the same uncore block.
+ * Also, storing the leader events in an array.
+ */
+ __evlist__for_each_entry(list, evsel) {
+
+ /* Only split the uncore group which members use alias */
+ if (!evsel->use_uncore_alias)
+ goto out;
+
+ /* The events must be from the same uncore block */
+ if (!is_same_uncore_block(leader->pmu_name, evsel->pmu_name))
+ goto out;
+
+ if (!is_leader)
+ continue;
+ /*
+ * If the event's PMU name starts to repeat, it must be a new
+ * event. That can be used to distinguish the leader from
+ * other members, even they have the same event name.
+ */
+ if ((leader != evsel) && (leader->pmu_name == evsel->pmu_name)) {
+ is_leader = false;
+ continue;
+ }
+ /* The name is always alias name */
+ WARN_ON(strcmp(leader->name, evsel->name));
+
+ /* Store the leader event for each PMU */
+ leaders[nr_pmu++] = (uintptr_t) evsel;
+ }
+
+ /* only one event alias */
+ if (nr_pmu == total_members) {
+ parse_state->nr_groups--;
+ goto handled;
+ }
+
+ /*
+ * An uncore event alias is a joint name which means the same event
+ * runs on all PMUs of a block.
+ * Perf doesn't support mixed events from different PMUs in the same
+ * group. The big group has to be split into multiple small groups
+ * which only include the events from the same PMU.
+ *
+ * Here the uncore event aliases must be from the same uncore block.
+ * The number of PMUs must be same for each alias. The number of new
+ * small groups equals to the number of PMUs.
+ * Setting the leader event for corresponding members in each group.
+ */
+ i = 0;
+ __evlist__for_each_entry(list, evsel) {
+ if (i >= nr_pmu)
+ i = 0;
+ evsel->leader = (struct perf_evsel *) leaders[i++];
+ }
+
+ /* The number of members and group name are same for each group */
+ for (i = 0; i < nr_pmu; i++) {
+ evsel = (struct perf_evsel *) leaders[i];
+ evsel->nr_members = total_members / nr_pmu;
+ evsel->group_name = name ? strdup(name) : NULL;
+ }
+
+ /* Take the new small groups into account */
+ parse_state->nr_groups += nr_pmu - 1;
+
+handled:
+ ret = 1;
+out:
+ free(leaders);
+ return ret;
+}
+
+void parse_events__set_leader(char *name, struct list_head *list,
+ struct parse_events_state *parse_state)
{
struct perf_evsel *leader;
@@ -1348,6 +1477,9 @@ void parse_events__set_leader(char *name, struct list_head *list)
return;
}
+ if (parse_events__set_leader_for_uncore_aliase(name, list, parse_state))
+ return;
+
__perf_evlist__set_leader(list);
leader = list_entry(list->next, struct perf_evsel, node);
leader->group_name = name ? strdup(name) : NULL;
@@ -1715,7 +1847,7 @@ int parse_events(struct perf_evlist *evlist, const char *str,
struct perf_evsel *last;
if (list_empty(&parse_state.list)) {
- WARN_ONCE(true, "WARNING: event parser found nothing");
+ WARN_ONCE(true, "WARNING: event parser found nothing\n");
return -1;
}
@@ -1968,13 +2100,13 @@ void print_tracepoint_events(const char *subsys_glob, const char *event_glob,
DIR *sys_dir, *evt_dir;
struct dirent *sys_dirent, *evt_dirent;
char evt_path[MAXPATHLEN];
- char dir_path[MAXPATHLEN];
+ char *dir_path;
char **evt_list = NULL;
unsigned int evt_i = 0, evt_num = 0;
bool evt_num_known = false;
restart:
- sys_dir = opendir(tracing_events_path);
+ sys_dir = tracing_events__opendir();
if (!sys_dir)
return;
@@ -1989,13 +2121,14 @@ restart:
!strglobmatch(sys_dirent->d_name, subsys_glob))
continue;
- snprintf(dir_path, MAXPATHLEN, "%s/%s", tracing_events_path,
- sys_dirent->d_name);
+ dir_path = get_events_file(sys_dirent->d_name);
+ if (!dir_path)
+ continue;
evt_dir = opendir(dir_path);
if (!evt_dir)
- continue;
+ goto next;
- for_each_event(sys_dirent, evt_dir, evt_dirent) {
+ for_each_event(dir_path, evt_dir, evt_dirent) {
if (event_glob != NULL &&
!strglobmatch(evt_dirent->d_name, event_glob))
continue;
@@ -2009,11 +2142,15 @@ restart:
sys_dirent->d_name, evt_dirent->d_name);
evt_list[evt_i] = strdup(evt_path);
- if (evt_list[evt_i] == NULL)
+ if (evt_list[evt_i] == NULL) {
+ put_events_file(dir_path);
goto out_close_evt_dir;
+ }
evt_i++;
}
closedir(evt_dir);
+next:
+ put_events_file(dir_path);
}
closedir(sys_dir);
@@ -2061,21 +2198,21 @@ int is_valid_tracepoint(const char *event_string)
DIR *sys_dir, *evt_dir;
struct dirent *sys_dirent, *evt_dirent;
char evt_path[MAXPATHLEN];
- char dir_path[MAXPATHLEN];
+ char *dir_path;
- sys_dir = opendir(tracing_events_path);
+ sys_dir = tracing_events__opendir();
if (!sys_dir)
return 0;
for_each_subsystem(sys_dir, sys_dirent) {
-
- snprintf(dir_path, MAXPATHLEN, "%s/%s", tracing_events_path,
- sys_dirent->d_name);
+ dir_path = get_events_file(sys_dirent->d_name);
+ if (!dir_path)
+ continue;
evt_dir = opendir(dir_path);
if (!evt_dir)
- continue;
+ goto next;
- for_each_event(sys_dirent, evt_dir, evt_dirent) {
+ for_each_event(dir_path, evt_dir, evt_dirent) {
snprintf(evt_path, MAXPATHLEN, "%s:%s",
sys_dirent->d_name, evt_dirent->d_name);
if (!strcmp(evt_path, event_string)) {
@@ -2085,6 +2222,8 @@ int is_valid_tracepoint(const char *event_string)
}
}
closedir(evt_dir);
+next:
+ put_events_file(dir_path);
}
closedir(sys_dir);
return 0;
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
index 5015cfd58277..4473dac27aee 100644
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h
@@ -167,7 +167,9 @@ int parse_events_add_breakpoint(struct list_head *list, int *idx,
void *ptr, char *type, u64 len);
int parse_events_add_pmu(struct parse_events_state *parse_state,
struct list_head *list, char *name,
- struct list_head *head_config, bool auto_merge_stats);
+ struct list_head *head_config,
+ bool auto_merge_stats,
+ bool use_alias);
int parse_events_multi_pmu_add(struct parse_events_state *parse_state,
char *str,
@@ -178,7 +180,8 @@ int parse_events_copy_term_list(struct list_head *old,
enum perf_pmu_event_symbol_type
perf_pmu__parse_check(const char *name);
-void parse_events__set_leader(char *name, struct list_head *list);
+void parse_events__set_leader(char *name, struct list_head *list,
+ struct parse_events_state *parse_state);
void parse_events_update_lists(struct list_head *list_event,
struct list_head *list_all);
void parse_events_evlist_error(struct parse_events_state *parse_state,
diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y
index d14464c42714..e37608a87dba 100644
--- a/tools/perf/util/parse-events.y
+++ b/tools/perf/util/parse-events.y
@@ -161,7 +161,7 @@ PE_NAME '{' events '}'
struct list_head *list = $3;
inc_group_count(list, _parse_state);
- parse_events__set_leader($1, list);
+ parse_events__set_leader($1, list, _parse_state);
$$ = list;
}
|
@@ -170,7 +170,7 @@ PE_NAME '{' events '}'
struct list_head *list = $2;
inc_group_count(list, _parse_state);
- parse_events__set_leader(NULL, list);
+ parse_events__set_leader(NULL, list, _parse_state);
$$ = list;
}
@@ -224,15 +224,15 @@ event_def: event_pmu |
event_bpf_file
event_pmu:
-PE_NAME '/' event_config '/'
+PE_NAME opt_event_config
{
struct list_head *list, *orig_terms, *terms;
- if (parse_events_copy_term_list($3, &orig_terms))
+ if (parse_events_copy_term_list($2, &orig_terms))
YYABORT;
ALLOC_LIST(list);
- if (parse_events_add_pmu(_parse_state, list, $1, $3, false)) {
+ if (parse_events_add_pmu(_parse_state, list, $1, $2, false, false)) {
struct perf_pmu *pmu = NULL;
int ok = 0;
char *pattern;
@@ -251,7 +251,7 @@ PE_NAME '/' event_config '/'
free(pattern);
YYABORT;
}
- if (!parse_events_add_pmu(_parse_state, list, pmu->name, terms, true))
+ if (!parse_events_add_pmu(_parse_state, list, pmu->name, terms, true, false))
ok++;
parse_events_terms__delete(terms);
}
@@ -262,7 +262,7 @@ PE_NAME '/' event_config '/'
if (!ok)
YYABORT;
}
- parse_events_terms__delete($3);
+ parse_events_terms__delete($2);
parse_events_terms__delete(orig_terms);
$$ = list;
}
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index e1dbc9821617..3094f11e7d81 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -111,17 +111,6 @@ void exit_probe_symbol_maps(void)
symbol__exit();
}
-static struct symbol *__find_kernel_function_by_name(const char *name,
- struct map **mapp)
-{
- return machine__find_kernel_function_by_name(host_machine, name, mapp);
-}
-
-static struct symbol *__find_kernel_function(u64 addr, struct map **mapp)
-{
- return machine__find_kernel_function(host_machine, addr, mapp);
-}
-
static struct ref_reloc_sym *kernel_get_ref_reloc_sym(void)
{
/* kmap->ref_reloc_sym should be set if host_machine is initialized */
@@ -149,7 +138,7 @@ static int kernel_get_symbol_address_by_name(const char *name, u64 *addr,
if (reloc_sym && strcmp(name, reloc_sym->name) == 0)
*addr = (reloc) ? reloc_sym->addr : reloc_sym->unrelocated_addr;
else {
- sym = __find_kernel_function_by_name(name, &map);
+ sym = machine__find_kernel_symbol_by_name(host_machine, name, &map);
if (!sym)
return -ENOENT;
*addr = map->unmap_ip(map, sym->start) -
@@ -161,8 +150,7 @@ static int kernel_get_symbol_address_by_name(const char *name, u64 *addr,
static struct map *kernel_get_module_map(const char *module)
{
- struct map_groups *grp = &host_machine->kmaps;
- struct maps *maps = &grp->maps[MAP__FUNCTION];
+ struct maps *maps = machine__kernel_maps(host_machine);
struct map *pos;
/* A file path -- this is an offline module */
@@ -341,7 +329,7 @@ static int kernel_get_module_dso(const char *module, struct dso **pdso)
char module_name[128];
snprintf(module_name, sizeof(module_name), "[%s]", module);
- map = map_groups__find_by_name(&host_machine->kmaps, MAP__FUNCTION, module_name);
+ map = map_groups__find_by_name(&host_machine->kmaps, module_name);
if (map) {
dso = map->dso;
goto found;
@@ -2098,7 +2086,7 @@ static int find_perf_probe_point_from_map(struct probe_trace_point *tp,
}
if (addr) {
addr += tp->offset;
- sym = __find_kernel_function(addr, &map);
+ sym = machine__find_kernel_symbol(host_machine, addr, &map);
}
}
@@ -3504,19 +3492,18 @@ int show_available_funcs(const char *target, struct nsinfo *nsi,
(target) ? : "kernel");
goto end;
}
- if (!dso__sorted_by_name(map->dso, map->type))
- dso__sort_by_name(map->dso, map->type);
+ if (!dso__sorted_by_name(map->dso))
+ dso__sort_by_name(map->dso);
/* Show all (filtered) symbols */
setup_pager();
- for (nd = rb_first(&map->dso->symbol_names[map->type]); nd; nd = rb_next(nd)) {
+ for (nd = rb_first(&map->dso->symbol_names); nd; nd = rb_next(nd)) {
struct symbol_name_rb_node *pos = rb_entry(nd, struct symbol_name_rb_node, rb_node);
if (strfilter__compare(_filter, pos->sym.name))
printf("%s\n", pos->sym.name);
- }
-
+ }
end:
map__put(map);
exit_probe_symbol_maps();
diff --git a/tools/perf/util/probe-file.c b/tools/perf/util/probe-file.c
index 4ae1123c6794..b76088fadf3d 100644
--- a/tools/perf/util/probe-file.c
+++ b/tools/perf/util/probe-file.c
@@ -84,8 +84,7 @@ int open_trace_file(const char *trace_file, bool readwrite)
char buf[PATH_MAX];
int ret;
- ret = e_snprintf(buf, PATH_MAX, "%s/%s",
- tracing_path, trace_file);
+ ret = e_snprintf(buf, PATH_MAX, "%s/%s", tracing_path_mount(), trace_file);
if (ret >= 0) {
pr_debug("Opening %s write=%d\n", buf, readwrite);
if (readwrite && !probe_event_dry_run)
diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c
index 10dd5fce082b..7f8afacd08ee 100644
--- a/tools/perf/util/scripting-engines/trace-event-python.c
+++ b/tools/perf/util/scripting-engines/trace-event-python.c
@@ -531,6 +531,8 @@ static PyObject *get_perf_sample_dict(struct perf_sample *sample,
PyLong_FromUnsignedLongLong(sample->period));
pydict_set_item_string_decref(dict_sample, "phys_addr",
PyLong_FromUnsignedLongLong(sample->phys_addr));
+ pydict_set_item_string_decref(dict_sample, "addr",
+ PyLong_FromUnsignedLongLong(sample->addr));
set_sample_read_in_dict(dict_sample, sample, evsel);
pydict_set_item_string_decref(dict, "sample", dict_sample);
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index f4a7a437ee87..b998bb475589 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -1973,12 +1973,11 @@ bool perf_session__has_traces(struct perf_session *session, const char *msg)
return false;
}
-int maps__set_kallsyms_ref_reloc_sym(struct map **maps,
- const char *symbol_name, u64 addr)
+int map__set_kallsyms_ref_reloc_sym(struct map *map, const char *symbol_name, u64 addr)
{
char *bracket;
- int i;
struct ref_reloc_sym *ref;
+ struct kmap *kmap;
ref = zalloc(sizeof(struct ref_reloc_sym));
if (ref == NULL)
@@ -1996,13 +1995,9 @@ int maps__set_kallsyms_ref_reloc_sym(struct map **maps,
ref->addr = addr;
- for (i = 0; i < MAP__NR_TYPES; ++i) {
- struct kmap *kmap = map__kmap(maps[i]);
-
- if (!kmap)
- continue;
+ kmap = map__kmap(map);
+ if (kmap)
kmap->ref_reloc_sym = ref;
- }
return 0;
}
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 26a68dfd8a4f..4058ade352a5 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -2,7 +2,7 @@
#include <errno.h>
#include <inttypes.h>
#include <regex.h>
-#include <sys/mman.h>
+#include <linux/mman.h>
#include "sort.h"
#include "hist.h"
#include "comm.h"
@@ -282,7 +282,7 @@ static int _hist_entry__sym_snprintf(struct map *map, struct symbol *sym,
ret += repsep_snprintf(bf + ret, size - ret, "[%c] ", level);
if (sym && map) {
- if (map->type == MAP__VARIABLE) {
+ if (sym->type == STT_OBJECT) {
ret += repsep_snprintf(bf + ret, size - ret, "%s", sym->name);
ret += repsep_snprintf(bf + ret, size - ret, "+0x%llx",
ip - map->unmap_ip(map, sym->start));
@@ -1211,7 +1211,7 @@ static int hist_entry__dcacheline_snprintf(struct hist_entry *he, char *bf,
/* print [s] for shared data mmaps */
if ((he->cpumode != PERF_RECORD_MISC_KERNEL) &&
- map && (map->type == MAP__VARIABLE) &&
+ map && !(map->prot & PROT_EXEC) &&
(map->flags & MAP_SHARED) &&
(map->maj || map->min || map->ino ||
map->ino_generation))
@@ -2582,7 +2582,7 @@ int sort_dimension__add(struct perf_hpp_list *list, const char *tok,
if (sort__mode != SORT_MODE__MEMORY)
return -EINVAL;
- if (sd->entry == &sort_mem_dcacheline && cacheline_size == 0)
+ if (sd->entry == &sort_mem_dcacheline && cacheline_size() == 0)
return -EINVAL;
if (sd->entry == &sort_mem_daddr_sym)
@@ -2628,7 +2628,7 @@ static int setup_sort_list(struct perf_hpp_list *list, char *str,
if (*tok) {
ret = sort_dimension__add(list, tok, evlist, level);
if (ret == -EINVAL) {
- if (!cacheline_size && !strncasecmp(tok, "dcacheline", strlen(tok)))
+ if (!cacheline_size() && !strncasecmp(tok, "dcacheline", strlen(tok)))
pr_err("The \"dcacheline\" --sort key needs to know the cacheline size and it couldn't be determined on this system");
else
pr_err("Invalid --sort key: `%s'", tok);
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index 035b62e2c60b..9e6896293bbd 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -186,13 +186,13 @@ static inline float hist_entry__get_percent_limit(struct hist_entry *he)
static inline u64 cl_address(u64 address)
{
/* return the cacheline of the address */
- return (address & ~(cacheline_size - 1));
+ return (address & ~(cacheline_size() - 1));
}
static inline u64 cl_offset(u64 address)
{
/* return the cacheline of the address */
- return (address & (cacheline_size - 1));
+ return (address & (cacheline_size() - 1));
}
enum sort_mode {
diff --git a/tools/perf/util/srcline.c b/tools/perf/util/srcline.c
index 3c21fd059b64..09d6746e6ec8 100644
--- a/tools/perf/util/srcline.c
+++ b/tools/perf/util/srcline.c
@@ -103,6 +103,7 @@ static struct symbol *new_inline_sym(struct dso *dso,
inline_sym = symbol__new(base_sym ? base_sym->start : 0,
base_sym ? base_sym->end : 0,
base_sym ? base_sym->binding : 0,
+ base_sym ? base_sym->type : 0,
funcname);
if (inline_sym)
inline_sym->inlined = 1;
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
index 8f56ba4fd258..36efb986f7fc 100644
--- a/tools/perf/util/stat.h
+++ b/tools/perf/util/stat.h
@@ -7,8 +7,7 @@
#include "xyarray.h"
#include "rblist.h"
-struct stats
-{
+struct stats {
double n, mean, M2;
u64 max, min;
};
diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
index 2de770511e70..29770ea61768 100644
--- a/tools/perf/util/symbol-elf.c
+++ b/tools/perf/util/symbol-elf.c
@@ -114,16 +114,9 @@ static inline int elf_sym__is_label(const GElf_Sym *sym)
sym->st_shndx != SHN_ABS;
}
-static bool elf_sym__is_a(GElf_Sym *sym, enum map_type type)
+static bool elf_sym__filter(GElf_Sym *sym)
{
- switch (type) {
- case MAP__FUNCTION:
- return elf_sym__is_function(sym);
- case MAP__VARIABLE:
- return elf_sym__is_object(sym);
- default:
- return false;
- }
+ return elf_sym__is_function(sym) || elf_sym__is_object(sym);
}
static inline const char *elf_sym__name(const GElf_Sym *sym,
@@ -150,17 +143,10 @@ static inline bool elf_sec__is_data(const GElf_Shdr *shdr,
return strstr(elf_sec__name(shdr, secstrs), "data") != NULL;
}
-static bool elf_sec__is_a(GElf_Shdr *shdr, Elf_Data *secstrs,
- enum map_type type)
+static bool elf_sec__filter(GElf_Shdr *shdr, Elf_Data *secstrs)
{
- switch (type) {
- case MAP__FUNCTION:
- return elf_sec__is_text(shdr, secstrs);
- case MAP__VARIABLE:
- return elf_sec__is_data(shdr, secstrs);
- default:
- return false;
- }
+ return elf_sec__is_text(shdr, secstrs) ||
+ elf_sec__is_data(shdr, secstrs);
}
static size_t elf_addr_to_index(Elf *elf, GElf_Addr addr)
@@ -256,7 +242,7 @@ static char *demangle_sym(struct dso *dso, int kmodule, const char *elf_name)
* And always look at the original dso, not at debuginfo packages, that
* have the PLT data stripped out (shdr_rel_plt.sh_type == SHT_NOBITS).
*/
-int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss, struct map *map)
+int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss)
{
uint32_t nr_rel_entries, idx;
GElf_Sym sym;
@@ -364,12 +350,12 @@ int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss, struct map *
free(demangled);
f = symbol__new(plt_offset, plt_entry_size,
- STB_GLOBAL, sympltname);
+ STB_GLOBAL, STT_FUNC, sympltname);
if (!f)
goto out_elf_end;
plt_offset += plt_entry_size;
- symbols__insert(&dso->symbols[map->type], f);
+ symbols__insert(&dso->symbols, f);
++nr;
}
} else if (shdr_rel_plt.sh_type == SHT_REL) {
@@ -390,12 +376,12 @@ int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss, struct map *
free(demangled);
f = symbol__new(plt_offset, plt_entry_size,
- STB_GLOBAL, sympltname);
+ STB_GLOBAL, STT_FUNC, sympltname);
if (!f)
goto out_elf_end;
plt_offset += plt_entry_size;
- symbols__insert(&dso->symbols[map->type], f);
+ symbols__insert(&dso->symbols, f);
++nr;
}
}
@@ -811,6 +797,110 @@ static u64 ref_reloc(struct kmap *kmap)
void __weak arch__sym_update(struct symbol *s __maybe_unused,
GElf_Sym *sym __maybe_unused) { }
+static int dso__process_kernel_symbol(struct dso *dso, struct map *map,
+ GElf_Sym *sym, GElf_Shdr *shdr,
+ struct map_groups *kmaps, struct kmap *kmap,
+ struct dso **curr_dsop, struct map **curr_mapp,
+ const char *section_name,
+ bool adjust_kernel_syms, bool kmodule, bool *remap_kernel)
+{
+ struct dso *curr_dso = *curr_dsop;
+ struct map *curr_map;
+ char dso_name[PATH_MAX];
+
+ /* Adjust symbol to map to file offset */
+ if (adjust_kernel_syms)
+ sym->st_value -= shdr->sh_addr - shdr->sh_offset;
+
+ if (strcmp(section_name, (curr_dso->short_name + dso->short_name_len)) == 0)
+ return 0;
+
+ if (strcmp(section_name, ".text") == 0) {
+ /*
+ * The initial kernel mapping is based on
+ * kallsyms and identity maps. Overwrite it to
+ * map to the kernel dso.
+ */
+ if (*remap_kernel && dso->kernel) {
+ *remap_kernel = false;
+ map->start = shdr->sh_addr + ref_reloc(kmap);
+ map->end = map->start + shdr->sh_size;
+ map->pgoff = shdr->sh_offset;
+ map->map_ip = map__map_ip;
+ map->unmap_ip = map__unmap_ip;
+ /* Ensure maps are correctly ordered */
+ if (kmaps) {
+ map__get(map);
+ map_groups__remove(kmaps, map);
+ map_groups__insert(kmaps, map);
+ map__put(map);
+ }
+ }
+
+ /*
+ * The initial module mapping is based on
+ * /proc/modules mapped to offset zero.
+ * Overwrite it to map to the module dso.
+ */
+ if (*remap_kernel && kmodule) {
+ *remap_kernel = false;
+ map->pgoff = shdr->sh_offset;
+ }
+
+ *curr_mapp = map;
+ *curr_dsop = dso;
+ return 0;
+ }
+
+ if (!kmap)
+ return 0;
+
+ snprintf(dso_name, sizeof(dso_name), "%s%s", dso->short_name, section_name);
+
+ curr_map = map_groups__find_by_name(kmaps, dso_name);
+ if (curr_map == NULL) {
+ u64 start = sym->st_value;
+
+ if (kmodule)
+ start += map->start + shdr->sh_offset;
+
+ curr_dso = dso__new(dso_name);
+ if (curr_dso == NULL)
+ return -1;
+ curr_dso->kernel = dso->kernel;
+ curr_dso->long_name = dso->long_name;
+ curr_dso->long_name_len = dso->long_name_len;
+ curr_map = map__new2(start, curr_dso);
+ dso__put(curr_dso);
+ if (curr_map == NULL)
+ return -1;
+
+ if (adjust_kernel_syms) {
+ curr_map->start = shdr->sh_addr + ref_reloc(kmap);
+ curr_map->end = curr_map->start + shdr->sh_size;
+ curr_map->pgoff = shdr->sh_offset;
+ } else {
+ curr_map->map_ip = curr_map->unmap_ip = identity__map_ip;
+ }
+ curr_dso->symtab_type = dso->symtab_type;
+ map_groups__insert(kmaps, curr_map);
+ /*
+ * Add it before we drop the referece to curr_map, i.e. while
+ * we still are sure to have a reference to this DSO via
+ * *curr_map->dso.
+ */
+ dsos__add(&map->groups->machine->dsos, curr_dso);
+ /* kmaps already got it */
+ map__put(curr_map);
+ dso__set_loaded(curr_dso);
+ *curr_mapp = curr_map;
+ *curr_dsop = curr_dso;
+ } else
+ *curr_dsop = curr_map->dso;
+
+ return 0;
+}
+
int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss,
struct symsrc *runtime_ss, int kmodule)
{
@@ -844,7 +934,7 @@ int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss,
* have the wrong values for the dso maps, so remove them.
*/
if (kmodule && syms_ss->symtab)
- symbols__delete(&dso->symbols[map->type]);
+ symbols__delete(&dso->symbols);
if (!syms_ss->symtab) {
/*
@@ -921,10 +1011,10 @@ int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss,
dso->adjust_symbols = runtime_ss->adjust_symbols || ref_reloc(kmap);
/*
- * Initial kernel and module mappings do not map to the dso. For
- * function mappings, flag the fixups.
+ * Initial kernel and module mappings do not map to the dso.
+ * Flag the fixups.
*/
- if (map->type == MAP__FUNCTION && (dso->kernel || kmodule)) {
+ if (dso->kernel || kmodule) {
remap_kernel = true;
adjust_kernel_syms = dso->adjust_symbols;
}
@@ -936,7 +1026,7 @@ int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss,
const char *section_name;
bool used_opd = false;
- if (!is_label && !elf_sym__is_a(&sym, map->type))
+ if (!is_label && !elf_sym__filter(&sym))
continue;
/* Reject ARM ELF "mapping symbols": these aren't unique and
@@ -974,7 +1064,7 @@ int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss,
gelf_getshdr(sec, &shdr);
- if (is_label && !elf_sec__is_a(&shdr, secstrs, map->type))
+ if (is_label && !elf_sec__filter(&shdr, secstrs))
continue;
section_name = elf_sec__name(&shdr, secstrs);
@@ -982,134 +1072,37 @@ int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss,
/* On ARM, symbols for thumb functions have 1 added to
* the symbol address as a flag - remove it */
if ((ehdr.e_machine == EM_ARM) &&
- (map->type == MAP__FUNCTION) &&
+ (GELF_ST_TYPE(sym.st_info) == STT_FUNC) &&
(sym.st_value & 1))
--sym.st_value;
if (dso->kernel || kmodule) {
- char dso_name[PATH_MAX];
-
- /* Adjust symbol to map to file offset */
- if (adjust_kernel_syms)
- sym.st_value -= shdr.sh_addr - shdr.sh_offset;
-
- if (strcmp(section_name,
- (curr_dso->short_name +
- dso->short_name_len)) == 0)
- goto new_symbol;
-
- if (strcmp(section_name, ".text") == 0) {
- /*
- * The initial kernel mapping is based on
- * kallsyms and identity maps. Overwrite it to
- * map to the kernel dso.
- */
- if (remap_kernel && dso->kernel) {
- remap_kernel = false;
- map->start = shdr.sh_addr +
- ref_reloc(kmap);
- map->end = map->start + shdr.sh_size;
- map->pgoff = shdr.sh_offset;
- map->map_ip = map__map_ip;
- map->unmap_ip = map__unmap_ip;
- /* Ensure maps are correctly ordered */
- if (kmaps) {
- map__get(map);
- map_groups__remove(kmaps, map);
- map_groups__insert(kmaps, map);
- map__put(map);
- }
- }
-
- /*
- * The initial module mapping is based on
- * /proc/modules mapped to offset zero.
- * Overwrite it to map to the module dso.
- */
- if (remap_kernel && kmodule) {
- remap_kernel = false;
- map->pgoff = shdr.sh_offset;
- }
-
- curr_map = map;
- curr_dso = dso;
- goto new_symbol;
- }
-
- if (!kmap)
- goto new_symbol;
-
- snprintf(dso_name, sizeof(dso_name),
- "%s%s", dso->short_name, section_name);
-
- curr_map = map_groups__find_by_name(kmaps, map->type, dso_name);
- if (curr_map == NULL) {
- u64 start = sym.st_value;
-
- if (kmodule)
- start += map->start + shdr.sh_offset;
-
- curr_dso = dso__new(dso_name);
- if (curr_dso == NULL)
- goto out_elf_end;
- curr_dso->kernel = dso->kernel;
- curr_dso->long_name = dso->long_name;
- curr_dso->long_name_len = dso->long_name_len;
- curr_map = map__new2(start, curr_dso,
- map->type);
- dso__put(curr_dso);
- if (curr_map == NULL) {
- goto out_elf_end;
- }
- if (adjust_kernel_syms) {
- curr_map->start = shdr.sh_addr +
- ref_reloc(kmap);
- curr_map->end = curr_map->start +
- shdr.sh_size;
- curr_map->pgoff = shdr.sh_offset;
- } else {
- curr_map->map_ip = identity__map_ip;
- curr_map->unmap_ip = identity__map_ip;
- }
- curr_dso->symtab_type = dso->symtab_type;
- map_groups__insert(kmaps, curr_map);
- /*
- * Add it before we drop the referece to curr_map,
- * i.e. while we still are sure to have a reference
- * to this DSO via curr_map->dso.
- */
- dsos__add(&map->groups->machine->dsos, curr_dso);
- /* kmaps already got it */
- map__put(curr_map);
- dso__set_loaded(curr_dso, map->type);
- } else
- curr_dso = curr_map->dso;
-
- goto new_symbol;
- }
-
- if ((used_opd && runtime_ss->adjust_symbols)
- || (!used_opd && syms_ss->adjust_symbols)) {
+ if (dso__process_kernel_symbol(dso, map, &sym, &shdr, kmaps, kmap, &curr_dso, &curr_map,
+ section_name, adjust_kernel_syms, kmodule, &remap_kernel))
+ goto out_elf_end;
+ } else if ((used_opd && runtime_ss->adjust_symbols) ||
+ (!used_opd && syms_ss->adjust_symbols)) {
pr_debug4("%s: adjusting symbol: st_value: %#" PRIx64 " "
"sh_addr: %#" PRIx64 " sh_offset: %#" PRIx64 "\n", __func__,
(u64)sym.st_value, (u64)shdr.sh_addr,
(u64)shdr.sh_offset);
sym.st_value -= shdr.sh_addr - shdr.sh_offset;
}
-new_symbol:
+
demangled = demangle_sym(dso, kmodule, elf_name);
if (demangled != NULL)
elf_name = demangled;
f = symbol__new(sym.st_value, sym.st_size,
- GELF_ST_BIND(sym.st_info), elf_name);
+ GELF_ST_BIND(sym.st_info),
+ GELF_ST_TYPE(sym.st_info), elf_name);
free(demangled);
if (!f)
goto out_elf_end;
arch__sym_update(f, &sym);
- __symbols__insert(&curr_dso->symbols[curr_map->type], f, dso->kernel);
+ __symbols__insert(&curr_dso->symbols, f, dso->kernel);
nr++;
}
@@ -1117,14 +1110,14 @@ new_symbol:
* For misannotated, zeroed, ASM function sizes.
*/
if (nr > 0) {
- symbols__fixup_end(&dso->symbols[map->type]);
- symbols__fixup_duplicate(&dso->symbols[map->type]);
+ symbols__fixup_end(&dso->symbols);
+ symbols__fixup_duplicate(&dso->symbols);
if (kmap) {
/*
* We need to fixup this here too because we create new
* maps here, for things like vsyscall sections.
*/
- __map_groups__fixup_end(kmaps, map->type);
+ map_groups__fixup_end(kmaps);
}
}
err = nr;
@@ -1393,8 +1386,16 @@ static off_t kcore__write(struct kcore *kcore)
struct phdr_data {
off_t offset;
+ off_t rel;
u64 addr;
u64 len;
+ struct list_head node;
+ struct phdr_data *remaps;
+};
+
+struct sym_data {
+ u64 addr;
+ struct list_head node;
};
struct kcore_copy_info {
@@ -1404,16 +1405,78 @@ struct kcore_copy_info {
u64 last_symbol;
u64 first_module;
u64 last_module_symbol;
- struct phdr_data kernel_map;
- struct phdr_data modules_map;
+ size_t phnum;
+ struct list_head phdrs;
+ struct list_head syms;
};
+#define kcore_copy__for_each_phdr(k, p) \
+ list_for_each_entry((p), &(k)->phdrs, node)
+
+static struct phdr_data *phdr_data__new(u64 addr, u64 len, off_t offset)
+{
+ struct phdr_data *p = zalloc(sizeof(*p));
+
+ if (p) {
+ p->addr = addr;
+ p->len = len;
+ p->offset = offset;
+ }
+
+ return p;
+}
+
+static struct phdr_data *kcore_copy_info__addnew(struct kcore_copy_info *kci,
+ u64 addr, u64 len,
+ off_t offset)
+{
+ struct phdr_data *p = phdr_data__new(addr, len, offset);
+
+ if (p)
+ list_add_tail(&p->node, &kci->phdrs);
+
+ return p;
+}
+
+static void kcore_copy__free_phdrs(struct kcore_copy_info *kci)
+{
+ struct phdr_data *p, *tmp;
+
+ list_for_each_entry_safe(p, tmp, &kci->phdrs, node) {
+ list_del(&p->node);
+ free(p);
+ }
+}
+
+static struct sym_data *kcore_copy__new_sym(struct kcore_copy_info *kci,
+ u64 addr)
+{
+ struct sym_data *s = zalloc(sizeof(*s));
+
+ if (s) {
+ s->addr = addr;
+ list_add_tail(&s->node, &kci->syms);
+ }
+
+ return s;
+}
+
+static void kcore_copy__free_syms(struct kcore_copy_info *kci)
+{
+ struct sym_data *s, *tmp;
+
+ list_for_each_entry_safe(s, tmp, &kci->syms, node) {
+ list_del(&s->node);
+ free(s);
+ }
+}
+
static int kcore_copy__process_kallsyms(void *arg, const char *name, char type,
u64 start)
{
struct kcore_copy_info *kci = arg;
- if (!symbol_type__is_a(type, MAP__FUNCTION))
+ if (!kallsyms__is_function(type))
return 0;
if (strchr(name, '[')) {
@@ -1438,6 +1501,9 @@ static int kcore_copy__process_kallsyms(void *arg, const char *name, char type,
return 0;
}
+ if (is_entry_trampoline(name) && !kcore_copy__new_sym(kci, start))
+ return -1;
+
return 0;
}
@@ -1487,27 +1553,39 @@ static int kcore_copy__parse_modules(struct kcore_copy_info *kci,
return 0;
}
-static void kcore_copy__map(struct phdr_data *p, u64 start, u64 end, u64 pgoff,
- u64 s, u64 e)
+static int kcore_copy__map(struct kcore_copy_info *kci, u64 start, u64 end,
+ u64 pgoff, u64 s, u64 e)
{
- if (p->addr || s < start || s >= end)
- return;
+ u64 len, offset;
+
+ if (s < start || s >= end)
+ return 0;
- p->addr = s;
- p->offset = (s - start) + pgoff;
- p->len = e < end ? e - s : end - s;
+ offset = (s - start) + pgoff;
+ len = e < end ? e - s : end - s;
+
+ return kcore_copy_info__addnew(kci, s, len, offset) ? 0 : -1;
}
static int kcore_copy__read_map(u64 start, u64 len, u64 pgoff, void *data)
{
struct kcore_copy_info *kci = data;
u64 end = start + len;
+ struct sym_data *sdat;
- kcore_copy__map(&kci->kernel_map, start, end, pgoff, kci->stext,
- kci->etext);
+ if (kcore_copy__map(kci, start, end, pgoff, kci->stext, kci->etext))
+ return -1;
- kcore_copy__map(&kci->modules_map, start, end, pgoff, kci->first_module,
- kci->last_module_symbol);
+ if (kcore_copy__map(kci, start, end, pgoff, kci->first_module,
+ kci->last_module_symbol))
+ return -1;
+
+ list_for_each_entry(sdat, &kci->syms, node) {
+ u64 s = round_down(sdat->addr, page_size);
+
+ if (kcore_copy__map(kci, start, end, pgoff, s, s + len))
+ return -1;
+ }
return 0;
}
@@ -1520,6 +1598,64 @@ static int kcore_copy__read_maps(struct kcore_copy_info *kci, Elf *elf)
return 0;
}
+static void kcore_copy__find_remaps(struct kcore_copy_info *kci)
+{
+ struct phdr_data *p, *k = NULL;
+ u64 kend;
+
+ if (!kci->stext)
+ return;
+
+ /* Find phdr that corresponds to the kernel map (contains stext) */
+ kcore_copy__for_each_phdr(kci, p) {
+ u64 pend = p->addr + p->len - 1;
+
+ if (p->addr <= kci->stext && pend >= kci->stext) {
+ k = p;
+ break;
+ }
+ }
+
+ if (!k)
+ return;
+
+ kend = k->offset + k->len;
+
+ /* Find phdrs that remap the kernel */
+ kcore_copy__for_each_phdr(kci, p) {
+ u64 pend = p->offset + p->len;
+
+ if (p == k)
+ continue;
+
+ if (p->offset >= k->offset && pend <= kend)
+ p->remaps = k;
+ }
+}
+
+static void kcore_copy__layout(struct kcore_copy_info *kci)
+{
+ struct phdr_data *p;
+ off_t rel = 0;
+
+ kcore_copy__find_remaps(kci);
+
+ kcore_copy__for_each_phdr(kci, p) {
+ if (!p->remaps) {
+ p->rel = rel;
+ rel += p->len;
+ }
+ kci->phnum += 1;
+ }
+
+ kcore_copy__for_each_phdr(kci, p) {
+ struct phdr_data *k = p->remaps;
+
+ if (k)
+ p->rel = p->offset - k->offset + k->rel;
+ }
+}
+
static int kcore_copy__calc_maps(struct kcore_copy_info *kci, const char *dir,
Elf *elf)
{
@@ -1555,7 +1691,12 @@ static int kcore_copy__calc_maps(struct kcore_copy_info *kci, const char *dir,
if (kci->first_module && !kci->last_module_symbol)
return -1;
- return kcore_copy__read_maps(kci, elf);
+ if (kcore_copy__read_maps(kci, elf))
+ return -1;
+
+ kcore_copy__layout(kci);
+
+ return 0;
}
static int kcore_copy__copy_file(const char *from_dir, const char *to_dir,
@@ -1678,12 +1819,15 @@ int kcore_copy(const char *from_dir, const char *to_dir)
{
struct kcore kcore;
struct kcore extract;
- size_t count = 2;
int idx = 0, err = -1;
- off_t offset = page_size, sz, modules_offset = 0;
+ off_t offset, sz;
struct kcore_copy_info kci = { .stext = 0, };
char kcore_filename[PATH_MAX];
char extract_filename[PATH_MAX];
+ struct phdr_data *p;
+
+ INIT_LIST_HEAD(&kci.phdrs);
+ INIT_LIST_HEAD(&kci.syms);
if (kcore_copy__copy_file(from_dir, to_dir, "kallsyms"))
return -1;
@@ -1703,20 +1847,17 @@ int kcore_copy(const char *from_dir, const char *to_dir)
if (kcore__init(&extract, extract_filename, kcore.elfclass, false))
goto out_kcore_close;
- if (!kci.modules_map.addr)
- count -= 1;
-
- if (kcore__copy_hdr(&kcore, &extract, count))
+ if (kcore__copy_hdr(&kcore, &extract, kci.phnum))
goto out_extract_close;
- if (kcore__add_phdr(&extract, idx++, offset, kci.kernel_map.addr,
- kci.kernel_map.len))
- goto out_extract_close;
+ offset = gelf_fsize(extract.elf, ELF_T_EHDR, 1, EV_CURRENT) +
+ gelf_fsize(extract.elf, ELF_T_PHDR, kci.phnum, EV_CURRENT);
+ offset = round_up(offset, page_size);
+
+ kcore_copy__for_each_phdr(&kci, p) {
+ off_t offs = p->rel + offset;
- if (kci.modules_map.addr) {
- modules_offset = offset + kci.kernel_map.len;
- if (kcore__add_phdr(&extract, idx, modules_offset,
- kci.modules_map.addr, kci.modules_map.len))
+ if (kcore__add_phdr(&extract, idx++, offs, p->addr, p->len))
goto out_extract_close;
}
@@ -1724,14 +1865,14 @@ int kcore_copy(const char *from_dir, const char *to_dir)
if (sz < 0 || sz > offset)
goto out_extract_close;
- if (copy_bytes(kcore.fd, kci.kernel_map.offset, extract.fd, offset,
- kci.kernel_map.len))
- goto out_extract_close;
+ kcore_copy__for_each_phdr(&kci, p) {
+ off_t offs = p->rel + offset;
- if (modules_offset && copy_bytes(kcore.fd, kci.modules_map.offset,
- extract.fd, modules_offset,
- kci.modules_map.len))
- goto out_extract_close;
+ if (p->remaps)
+ continue;
+ if (copy_bytes(kcore.fd, p->offset, extract.fd, offs, p->len))
+ goto out_extract_close;
+ }
if (kcore_copy__compare_file(from_dir, to_dir, "modules"))
goto out_extract_close;
@@ -1754,6 +1895,9 @@ out_unlink_kallsyms:
if (err)
kcore_copy__unlink(to_dir, "kallsyms");
+ kcore_copy__free_phdrs(&kci);
+ kcore_copy__free_syms(&kci);
+
return err;
}
diff --git a/tools/perf/util/symbol-minimal.c b/tools/perf/util/symbol-minimal.c
index ff48d0d49584..7119df77dc0b 100644
--- a/tools/perf/util/symbol-minimal.c
+++ b/tools/perf/util/symbol-minimal.c
@@ -288,8 +288,7 @@ void symsrc__destroy(struct symsrc *ss)
}
int dso__synthesize_plt_symbols(struct dso *dso __maybe_unused,
- struct symsrc *ss __maybe_unused,
- struct map *map __maybe_unused)
+ struct symsrc *ss __maybe_unused)
{
return 0;
}
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 1466814ebada..8c84437f2a10 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -5,6 +5,7 @@
#include <stdio.h>
#include <string.h>
#include <linux/kernel.h>
+#include <linux/mman.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/param.h>
@@ -70,18 +71,10 @@ static enum dso_binary_type binary_type_symtab[] = {
#define DSO_BINARY_TYPE__SYMTAB_CNT ARRAY_SIZE(binary_type_symtab)
-bool symbol_type__is_a(char symbol_type, enum map_type map_type)
+static bool symbol_type__filter(char symbol_type)
{
symbol_type = toupper(symbol_type);
-
- switch (map_type) {
- case MAP__FUNCTION:
- return symbol_type == 'T' || symbol_type == 'W';
- case MAP__VARIABLE:
- return symbol_type == 'D';
- default:
- return false;
- }
+ return symbol_type == 'T' || symbol_type == 'W' || symbol_type == 'D';
}
static int prefix_underscores_count(const char *str)
@@ -228,9 +221,9 @@ void symbols__fixup_end(struct rb_root *symbols)
curr->end = roundup(curr->start, 4096) + 4096;
}
-void __map_groups__fixup_end(struct map_groups *mg, enum map_type type)
+void map_groups__fixup_end(struct map_groups *mg)
{
- struct maps *maps = &mg->maps[type];
+ struct maps *maps = &mg->maps;
struct map *next, *curr;
down_write(&maps->lock);
@@ -256,7 +249,7 @@ out_unlock:
up_write(&maps->lock);
}
-struct symbol *symbol__new(u64 start, u64 len, u8 binding, const char *name)
+struct symbol *symbol__new(u64 start, u64 len, u8 binding, u8 type, const char *name)
{
size_t namelen = strlen(name) + 1;
struct symbol *sym = calloc(1, (symbol_conf.priv_size +
@@ -274,6 +267,7 @@ struct symbol *symbol__new(u64 start, u64 len, u8 binding, const char *name)
sym->start = start;
sym->end = len ? start + len : start;
+ sym->type = type;
sym->binding = binding;
sym->namelen = namelen - 1;
@@ -484,45 +478,40 @@ static struct symbol *symbols__find_by_name(struct rb_root *symbols,
void dso__reset_find_symbol_cache(struct dso *dso)
{
- enum map_type type;
-
- for (type = MAP__FUNCTION; type <= MAP__VARIABLE; ++type) {
- dso->last_find_result[type].addr = 0;
- dso->last_find_result[type].symbol = NULL;
- }
+ dso->last_find_result.addr = 0;
+ dso->last_find_result.symbol = NULL;
}
-void dso__insert_symbol(struct dso *dso, enum map_type type, struct symbol *sym)
+void dso__insert_symbol(struct dso *dso, struct symbol *sym)
{
- __symbols__insert(&dso->symbols[type], sym, dso->kernel);
+ __symbols__insert(&dso->symbols, sym, dso->kernel);
/* update the symbol cache if necessary */
- if (dso->last_find_result[type].addr >= sym->start &&
- (dso->last_find_result[type].addr < sym->end ||
+ if (dso->last_find_result.addr >= sym->start &&
+ (dso->last_find_result.addr < sym->end ||
sym->start == sym->end)) {
- dso->last_find_result[type].symbol = sym;
+ dso->last_find_result.symbol = sym;
}
}
-struct symbol *dso__find_symbol(struct dso *dso,
- enum map_type type, u64 addr)
+struct symbol *dso__find_symbol(struct dso *dso, u64 addr)
{
- if (dso->last_find_result[type].addr != addr || dso->last_find_result[type].symbol == NULL) {
- dso->last_find_result[type].addr = addr;
- dso->last_find_result[type].symbol = symbols__find(&dso->symbols[type], addr);
+ if (dso->last_find_result.addr != addr || dso->last_find_result.symbol == NULL) {
+ dso->last_find_result.addr = addr;
+ dso->last_find_result.symbol = symbols__find(&dso->symbols, addr);
}
- return dso->last_find_result[type].symbol;
+ return dso->last_find_result.symbol;
}
-struct symbol *dso__first_symbol(struct dso *dso, enum map_type type)
+struct symbol *dso__first_symbol(struct dso *dso)
{
- return symbols__first(&dso->symbols[type]);
+ return symbols__first(&dso->symbols);
}
-struct symbol *dso__last_symbol(struct dso *dso, enum map_type type)
+struct symbol *dso__last_symbol(struct dso *dso)
{
- return symbols__last(&dso->symbols[type]);
+ return symbols__last(&dso->symbols);
}
struct symbol *dso__next_symbol(struct symbol *sym)
@@ -539,24 +528,22 @@ struct symbol *symbol__next_by_name(struct symbol *sym)
}
/*
- * Teturns first symbol that matched with @name.
+ * Returns first symbol that matched with @name.
*/
-struct symbol *dso__find_symbol_by_name(struct dso *dso, enum map_type type,
- const char *name)
+struct symbol *dso__find_symbol_by_name(struct dso *dso, const char *name)
{
- struct symbol *s = symbols__find_by_name(&dso->symbol_names[type], name,
+ struct symbol *s = symbols__find_by_name(&dso->symbol_names, name,
SYMBOL_TAG_INCLUDE__NONE);
if (!s)
- s = symbols__find_by_name(&dso->symbol_names[type], name,
+ s = symbols__find_by_name(&dso->symbol_names, name,
SYMBOL_TAG_INCLUDE__DEFAULT_ONLY);
return s;
}
-void dso__sort_by_name(struct dso *dso, enum map_type type)
+void dso__sort_by_name(struct dso *dso)
{
- dso__set_sorted_by_name(dso, type);
- return symbols__sort_by_name(&dso->symbol_names[type],
- &dso->symbols[type]);
+ dso__set_sorted_by_name(dso);
+ return symbols__sort_by_name(&dso->symbol_names, &dso->symbols);
}
int modules__parse(const char *filename, void *arg,
@@ -621,11 +608,6 @@ out:
return err;
}
-struct process_kallsyms_args {
- struct map *map;
- struct dso *dso;
-};
-
/*
* These are symbols in the kernel image, so make sure that
* sym is from a kernel DSO.
@@ -661,10 +643,10 @@ static int map__process_kallsym_symbol(void *arg, const char *name,
char type, u64 start)
{
struct symbol *sym;
- struct process_kallsyms_args *a = arg;
- struct rb_root *root = &a->dso->symbols[a->map->type];
+ struct dso *dso = arg;
+ struct rb_root *root = &dso->symbols;
- if (!symbol_type__is_a(type, a->map->type))
+ if (!symbol_type__filter(type))
return 0;
/*
@@ -672,7 +654,7 @@ static int map__process_kallsym_symbol(void *arg, const char *name,
* symbols, setting length to 0, and rely on
* symbols__fixup_end() to fix it up.
*/
- sym = symbol__new(start, 0, kallsyms2elf_binding(type), name);
+ sym = symbol__new(start, 0, kallsyms2elf_binding(type), kallsyms2elf_type(type), name);
if (sym == NULL)
return -ENOMEM;
/*
@@ -689,21 +671,18 @@ static int map__process_kallsym_symbol(void *arg, const char *name,
* so that we can in the next step set the symbol ->end address and then
* call kernel_maps__split_kallsyms.
*/
-static int dso__load_all_kallsyms(struct dso *dso, const char *filename,
- struct map *map)
+static int dso__load_all_kallsyms(struct dso *dso, const char *filename)
{
- struct process_kallsyms_args args = { .map = map, .dso = dso, };
- return kallsyms__parse(filename, &args, map__process_kallsym_symbol);
+ return kallsyms__parse(filename, dso, map__process_kallsym_symbol);
}
-static int dso__split_kallsyms_for_kcore(struct dso *dso, struct map *map)
+static int map_groups__split_kallsyms_for_kcore(struct map_groups *kmaps, struct dso *dso)
{
- struct map_groups *kmaps = map__kmaps(map);
struct map *curr_map;
struct symbol *pos;
int count = 0;
- struct rb_root old_root = dso->symbols[map->type];
- struct rb_root *root = &dso->symbols[map->type];
+ struct rb_root old_root = dso->symbols;
+ struct rb_root *root = &dso->symbols;
struct rb_node *next = rb_first(root);
if (!kmaps)
@@ -723,7 +702,7 @@ static int dso__split_kallsyms_for_kcore(struct dso *dso, struct map *map)
if (module)
*module = '\0';
- curr_map = map_groups__find(kmaps, map->type, pos->start);
+ curr_map = map_groups__find(kmaps, pos->start);
if (!curr_map) {
symbol__delete(pos);
@@ -733,7 +712,7 @@ static int dso__split_kallsyms_for_kcore(struct dso *dso, struct map *map)
pos->start -= curr_map->start - curr_map->pgoff;
if (pos->end)
pos->end -= curr_map->start - curr_map->pgoff;
- symbols__insert(&curr_map->dso->symbols[curr_map->type], pos);
+ symbols__insert(&curr_map->dso->symbols, pos);
++count;
}
@@ -748,22 +727,25 @@ static int dso__split_kallsyms_for_kcore(struct dso *dso, struct map *map)
* kernel range is broken in several maps, named [kernel].N, as we don't have
* the original ELF section names vmlinux have.
*/
-static int dso__split_kallsyms(struct dso *dso, struct map *map, u64 delta)
+static int map_groups__split_kallsyms(struct map_groups *kmaps, struct dso *dso, u64 delta,
+ struct map *initial_map)
{
- struct map_groups *kmaps = map__kmaps(map);
struct machine *machine;
- struct map *curr_map = map;
+ struct map *curr_map = initial_map;
struct symbol *pos;
int count = 0, moved = 0;
- struct rb_root *root = &dso->symbols[map->type];
+ struct rb_root *root = &dso->symbols;
struct rb_node *next = rb_first(root);
int kernel_range = 0;
+ bool x86_64;
if (!kmaps)
return -1;
machine = kmaps->machine;
+ x86_64 = machine__is(machine, "x86_64");
+
while (next) {
char *module;
@@ -778,7 +760,7 @@ static int dso__split_kallsyms(struct dso *dso, struct map *map, u64 delta)
*module++ = '\0';
if (strcmp(curr_map->dso->short_name, module)) {
- if (curr_map != map &&
+ if (curr_map != initial_map &&
dso->kernel == DSO_TYPE_GUEST_KERNEL &&
machine__is_default_guest(machine)) {
/*
@@ -788,18 +770,16 @@ static int dso__split_kallsyms(struct dso *dso, struct map *map, u64 delta)
* symbols are in its kmap. Mark it as
* loaded.
*/
- dso__set_loaded(curr_map->dso,
- curr_map->type);
+ dso__set_loaded(curr_map->dso);
}
- curr_map = map_groups__find_by_name(kmaps,
- map->type, module);
+ curr_map = map_groups__find_by_name(kmaps, module);
if (curr_map == NULL) {
pr_debug("%s/proc/{kallsyms,modules} "
"inconsistency while looking "
"for \"%s\" module!\n",
machine->root_dir, module);
- curr_map = map;
+ curr_map = initial_map;
goto discard_symbol;
}
@@ -809,11 +789,21 @@ static int dso__split_kallsyms(struct dso *dso, struct map *map, u64 delta)
}
/*
* So that we look just like we get from .ko files,
- * i.e. not prelinked, relative to map->start.
+ * i.e. not prelinked, relative to initial_map->start.
*/
pos->start = curr_map->map_ip(curr_map, pos->start);
pos->end = curr_map->map_ip(curr_map, pos->end);
- } else if (curr_map != map) {
+ } else if (x86_64 && is_entry_trampoline(pos->name)) {
+ /*
+ * These symbols are not needed anymore since the
+ * trampoline maps refer to the text section and it's
+ * symbols instead. Avoid having to deal with
+ * relocations, and the assumption that the first symbol
+ * is the start of kernel text, by simply removing the
+ * symbols at this point.
+ */
+ goto discard_symbol;
+ } else if (curr_map != initial_map) {
char dso_name[PATH_MAX];
struct dso *ndso;
@@ -824,7 +814,7 @@ static int dso__split_kallsyms(struct dso *dso, struct map *map, u64 delta)
}
if (count == 0) {
- curr_map = map;
+ curr_map = initial_map;
goto add_symbol;
}
@@ -843,7 +833,7 @@ static int dso__split_kallsyms(struct dso *dso, struct map *map, u64 delta)
ndso->kernel = dso->kernel;
- curr_map = map__new2(pos->start, ndso, map->type);
+ curr_map = map__new2(pos->start, ndso);
if (curr_map == NULL) {
dso__put(ndso);
return -1;
@@ -858,9 +848,9 @@ static int dso__split_kallsyms(struct dso *dso, struct map *map, u64 delta)
pos->end -= delta;
}
add_symbol:
- if (curr_map != map) {
+ if (curr_map != initial_map) {
rb_erase(&pos->rb_node, root);
- symbols__insert(&curr_map->dso->symbols[curr_map->type], pos);
+ symbols__insert(&curr_map->dso->symbols, pos);
++moved;
} else
++count;
@@ -871,10 +861,10 @@ discard_symbol:
symbol__delete(pos);
}
- if (curr_map != map &&
+ if (curr_map != initial_map &&
dso->kernel == DSO_TYPE_GUEST_KERNEL &&
machine__is_default_guest(kmaps->machine)) {
- dso__set_loaded(curr_map->dso, curr_map->type);
+ dso__set_loaded(curr_map->dso);
}
return count + moved;
@@ -1035,7 +1025,12 @@ out_delete_from:
return ret;
}
-static int do_validate_kcore_modules(const char *filename, struct map *map,
+struct map *map_groups__first(struct map_groups *mg)
+{
+ return maps__first(&mg->maps);
+}
+
+static int do_validate_kcore_modules(const char *filename,
struct map_groups *kmaps)
{
struct rb_root modules = RB_ROOT;
@@ -1046,13 +1041,12 @@ static int do_validate_kcore_modules(const char *filename, struct map *map,
if (err)
return err;
- old_map = map_groups__first(kmaps, map->type);
+ old_map = map_groups__first(kmaps);
while (old_map) {
struct map *next = map_groups__next(old_map);
struct module_info *mi;
- if (old_map == map || old_map->start == map->start) {
- /* The kernel map */
+ if (!__map__is_kmodule(old_map)) {
old_map = next;
continue;
}
@@ -1109,7 +1103,7 @@ static int validate_kcore_modules(const char *kallsyms_filename,
kallsyms_filename))
return -EINVAL;
- if (do_validate_kcore_modules(modules_filename, map, kmaps))
+ if (do_validate_kcore_modules(modules_filename, kmaps))
return -EINVAL;
return 0;
@@ -1138,7 +1132,6 @@ static int validate_kcore_addresses(const char *kallsyms_filename,
struct kcore_mapfn_data {
struct dso *dso;
- enum map_type type;
struct list_head maps;
};
@@ -1147,7 +1140,7 @@ static int kcore_mapfn(u64 start, u64 len, u64 pgoff, void *data)
struct kcore_mapfn_data *md = data;
struct map *map;
- map = map__new2(start, md->dso, md->type);
+ map = map__new2(start, md->dso);
if (map == NULL)
return -ENOMEM;
@@ -1163,13 +1156,13 @@ static int dso__load_kcore(struct dso *dso, struct map *map,
const char *kallsyms_filename)
{
struct map_groups *kmaps = map__kmaps(map);
- struct machine *machine;
struct kcore_mapfn_data md;
struct map *old_map, *new_map, *replacement_map = NULL;
+ struct machine *machine;
bool is_64_bit;
int err, fd;
char kcore_filename[PATH_MAX];
- struct symbol *sym;
+ u64 stext;
if (!kmaps)
return -EINVAL;
@@ -1177,7 +1170,7 @@ static int dso__load_kcore(struct dso *dso, struct map *map,
machine = kmaps->machine;
/* This function requires that the map is the kernel map */
- if (map != machine->vmlinux_maps[map->type])
+ if (!__map__is_kernel(map))
return -EINVAL;
if (!filename_from_kallsyms_filename(kcore_filename, "kcore",
@@ -1189,7 +1182,6 @@ static int dso__load_kcore(struct dso *dso, struct map *map,
return -EINVAL;
md.dso = dso;
- md.type = map->type;
INIT_LIST_HEAD(&md.maps);
fd = open(kcore_filename, O_RDONLY);
@@ -1200,7 +1192,7 @@ static int dso__load_kcore(struct dso *dso, struct map *map,
}
/* Read new maps into temporary lists */
- err = file__read_maps(fd, md.type == MAP__FUNCTION, kcore_mapfn, &md,
+ err = file__read_maps(fd, map->prot & PROT_EXEC, kcore_mapfn, &md,
&is_64_bit);
if (err)
goto out_err;
@@ -1212,7 +1204,7 @@ static int dso__load_kcore(struct dso *dso, struct map *map,
}
/* Remove old maps */
- old_map = map_groups__first(kmaps, map->type);
+ old_map = map_groups__first(kmaps);
while (old_map) {
struct map *next = map_groups__next(old_map);
@@ -1220,14 +1212,15 @@ static int dso__load_kcore(struct dso *dso, struct map *map,
map_groups__remove(kmaps, old_map);
old_map = next;
}
+ machine->trampolines_mapped = false;
- /* Find the kernel map using the first symbol */
- sym = dso__first_symbol(dso, map->type);
- list_for_each_entry(new_map, &md.maps, node) {
- if (sym && sym->start >= new_map->start &&
- sym->start < new_map->end) {
- replacement_map = new_map;
- break;
+ /* Find the kernel map using the '_stext' symbol */
+ if (!kallsyms__get_function_start(kallsyms_filename, "_stext", &stext)) {
+ list_for_each_entry(new_map, &md.maps, node) {
+ if (stext >= new_map->start && stext < new_map->end) {
+ replacement_map = new_map;
+ break;
+ }
}
}
@@ -1256,6 +1249,19 @@ static int dso__load_kcore(struct dso *dso, struct map *map,
map__put(new_map);
}
+ if (machine__is(machine, "x86_64")) {
+ u64 addr;
+
+ /*
+ * If one of the corresponding symbols is there, assume the
+ * entry trampoline maps are too.
+ */
+ if (!kallsyms__get_function_start(kallsyms_filename,
+ ENTRY_TRAMPOLINE_NAME,
+ &addr))
+ machine->trampolines_mapped = true;
+ }
+
/*
* Set the data type and long name so that kcore can be read via
* dso__data_read_addr().
@@ -1268,7 +1274,7 @@ static int dso__load_kcore(struct dso *dso, struct map *map,
close(fd);
- if (map->type == MAP__FUNCTION)
+ if (map->prot & PROT_EXEC)
pr_debug("Using %s for kernel object code\n", kcore_filename);
else
pr_debug("Using %s for kernel data\n", kcore_filename);
@@ -1289,14 +1295,10 @@ out_err:
* If the kernel is relocated at boot time, kallsyms won't match. Compute the
* delta based on the relocation reference symbol.
*/
-static int kallsyms__delta(struct map *map, const char *filename, u64 *delta)
+static int kallsyms__delta(struct kmap *kmap, const char *filename, u64 *delta)
{
- struct kmap *kmap = map__kmap(map);
u64 addr;
- if (!kmap)
- return -1;
-
if (!kmap->ref_reloc_sym || !kmap->ref_reloc_sym->name)
return 0;
@@ -1310,19 +1312,23 @@ static int kallsyms__delta(struct map *map, const char *filename, u64 *delta)
int __dso__load_kallsyms(struct dso *dso, const char *filename,
struct map *map, bool no_kcore)
{
+ struct kmap *kmap = map__kmap(map);
u64 delta = 0;
if (symbol__restricted_filename(filename, "/proc/kallsyms"))
return -1;
- if (dso__load_all_kallsyms(dso, filename, map) < 0)
+ if (!kmap || !kmap->kmaps)
return -1;
- if (kallsyms__delta(map, filename, &delta))
+ if (dso__load_all_kallsyms(dso, filename) < 0)
return -1;
- symbols__fixup_end(&dso->symbols[map->type]);
- symbols__fixup_duplicate(&dso->symbols[map->type]);
+ if (kallsyms__delta(kmap, filename, &delta))
+ return -1;
+
+ symbols__fixup_end(&dso->symbols);
+ symbols__fixup_duplicate(&dso->symbols);
if (dso->kernel == DSO_TYPE_GUEST_KERNEL)
dso->symtab_type = DSO_BINARY_TYPE__GUEST_KALLSYMS;
@@ -1330,9 +1336,9 @@ int __dso__load_kallsyms(struct dso *dso, const char *filename,
dso->symtab_type = DSO_BINARY_TYPE__KALLSYMS;
if (!no_kcore && !dso__load_kcore(dso, map, filename))
- return dso__split_kallsyms_for_kcore(dso, map);
+ return map_groups__split_kallsyms_for_kcore(kmap->kmaps, dso);
else
- return dso__split_kallsyms(dso, map, delta);
+ return map_groups__split_kallsyms(kmap->kmaps, dso, delta, map);
}
int dso__load_kallsyms(struct dso *dso, const char *filename,
@@ -1341,8 +1347,7 @@ int dso__load_kallsyms(struct dso *dso, const char *filename,
return __dso__load_kallsyms(dso, filename, map, false);
}
-static int dso__load_perf_map(const char *map_path, struct dso *dso,
- struct map *map)
+static int dso__load_perf_map(const char *map_path, struct dso *dso)
{
char *line = NULL;
size_t n;
@@ -1379,12 +1384,12 @@ static int dso__load_perf_map(const char *map_path, struct dso *dso,
if (len + 2 >= line_len)
continue;
- sym = symbol__new(start, size, STB_GLOBAL, line + len);
+ sym = symbol__new(start, size, STB_GLOBAL, STT_FUNC, line + len);
if (sym == NULL)
goto out_delete_line;
- symbols__insert(&dso->symbols[map->type], sym);
+ symbols__insert(&dso->symbols, sym);
nr_syms++;
}
@@ -1509,25 +1514,27 @@ int dso__load(struct dso *dso, struct map *map)
pthread_mutex_lock(&dso->lock);
/* check again under the dso->lock */
- if (dso__loaded(dso, map->type)) {
+ if (dso__loaded(dso)) {
ret = 1;
goto out;
}
+ if (map->groups && map->groups->machine)
+ machine = map->groups->machine;
+ else
+ machine = NULL;
+
if (dso->kernel) {
if (dso->kernel == DSO_TYPE_KERNEL)
ret = dso__load_kernel_sym(dso, map);
else if (dso->kernel == DSO_TYPE_GUEST_KERNEL)
ret = dso__load_guest_kernel_sym(dso, map);
+ if (machine__is(machine, "x86_64"))
+ machine__map_x86_64_entry_trampolines(machine, dso);
goto out;
}
- if (map->groups && map->groups->machine)
- machine = map->groups->machine;
- else
- machine = NULL;
-
dso->adjust_symbols = 0;
if (perfmap) {
@@ -1542,7 +1549,7 @@ int dso__load(struct dso *dso, struct map *map)
goto out;
}
- ret = dso__load_perf_map(map_path, dso, map);
+ ret = dso__load_perf_map(map_path, dso);
dso->symtab_type = ret > 0 ? DSO_BINARY_TYPE__JAVA_JIT :
DSO_BINARY_TYPE__NOT_FOUND;
goto out;
@@ -1651,7 +1658,7 @@ int dso__load(struct dso *dso, struct map *map)
if (ret > 0) {
int nr_plt;
- nr_plt = dso__synthesize_plt_symbols(dso, runtime_ss, map);
+ nr_plt = dso__synthesize_plt_symbols(dso, runtime_ss);
if (nr_plt > 0)
ret += nr_plt;
}
@@ -1663,17 +1670,16 @@ out_free:
if (ret < 0 && strstr(dso->name, " (deleted)") != NULL)
ret = 0;
out:
- dso__set_loaded(dso, map->type);
+ dso__set_loaded(dso);
pthread_mutex_unlock(&dso->lock);
nsinfo__mountns_exit(&nsc);
return ret;
}
-struct map *map_groups__find_by_name(struct map_groups *mg,
- enum map_type type, const char *name)
+struct map *map_groups__find_by_name(struct map_groups *mg, const char *name)
{
- struct maps *maps = &mg->maps[type];
+ struct maps *maps = &mg->maps;
struct map *map;
down_read(&maps->lock);
@@ -1720,7 +1726,7 @@ int dso__load_vmlinux(struct dso *dso, struct map *map,
else
dso->binary_type = DSO_BINARY_TYPE__VMLINUX;
dso__set_long_name(dso, vmlinux, vmlinux_allocated);
- dso__set_loaded(dso, map->type);
+ dso__set_loaded(dso);
pr_debug("Using %s for symbols\n", symfs_vmlinux);
}
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index 70c16741f50a..1a16438eb3ce 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -57,7 +57,8 @@ struct symbol {
u64 start;
u64 end;
u16 namelen;
- u8 binding;
+ u8 type:4;
+ u8 binding:4;
u8 idle:1;
u8 ignore:1;
u8 inlined:1;
@@ -259,17 +260,16 @@ int __dso__load_kallsyms(struct dso *dso, const char *filename, struct map *map,
bool no_kcore);
int dso__load_kallsyms(struct dso *dso, const char *filename, struct map *map);
-void dso__insert_symbol(struct dso *dso, enum map_type type,
+void dso__insert_symbol(struct dso *dso,
struct symbol *sym);
-struct symbol *dso__find_symbol(struct dso *dso, enum map_type type,
- u64 addr);
-struct symbol *dso__find_symbol_by_name(struct dso *dso, enum map_type type,
- const char *name);
+struct symbol *dso__find_symbol(struct dso *dso, u64 addr);
+struct symbol *dso__find_symbol_by_name(struct dso *dso, const char *name);
+
struct symbol *symbol__next_by_name(struct symbol *sym);
-struct symbol *dso__first_symbol(struct dso *dso, enum map_type type);
-struct symbol *dso__last_symbol(struct dso *dso, enum map_type type);
+struct symbol *dso__first_symbol(struct dso *dso);
+struct symbol *dso__last_symbol(struct dso *dso);
struct symbol *dso__next_symbol(struct symbol *sym);
enum dso_type dso__type_fd(int fd);
@@ -288,7 +288,7 @@ void symbol__exit(void);
void symbol__elf_init(void);
int symbol__annotation_init(void);
-struct symbol *symbol__new(u64 start, u64 len, u8 binding, const char *name);
+struct symbol *symbol__new(u64 start, u64 len, u8 binding, u8 type, const char *name);
size_t __symbol__fprintf_symname_offs(const struct symbol *sym,
const struct addr_location *al,
bool unknown_as_addr,
@@ -300,7 +300,6 @@ size_t __symbol__fprintf_symname(const struct symbol *sym,
bool unknown_as_addr, FILE *fp);
size_t symbol__fprintf_symname(const struct symbol *sym, FILE *fp);
size_t symbol__fprintf(struct symbol *sym, FILE *fp);
-bool symbol_type__is_a(char symbol_type, enum map_type map_type);
bool symbol__restricted_filename(const char *filename,
const char *restricted_filename);
int symbol__config_symfs(const struct option *opt __maybe_unused,
@@ -308,8 +307,7 @@ int symbol__config_symfs(const struct option *opt __maybe_unused,
int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss,
struct symsrc *runtime_ss, int kmodule);
-int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss,
- struct map *map);
+int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss);
char *dso__demangle_sym(struct dso *dso, int kmodule, const char *elf_name);
@@ -317,7 +315,7 @@ void __symbols__insert(struct rb_root *symbols, struct symbol *sym, bool kernel)
void symbols__insert(struct rb_root *symbols, struct symbol *sym);
void symbols__fixup_duplicate(struct rb_root *symbols);
void symbols__fixup_end(struct rb_root *symbols);
-void __map_groups__fixup_end(struct map_groups *mg, enum map_type type);
+void map_groups__fixup_end(struct map_groups *mg);
typedef int (*mapfn_t)(u64 start, u64 len, u64 pgoff, void *data);
int file__read_maps(int fd, bool exe, mapfn_t mapfn, void *data,
diff --git a/tools/perf/util/symbol_fprintf.c b/tools/perf/util/symbol_fprintf.c
index 6dd2cb88ccbe..ed0205cc7942 100644
--- a/tools/perf/util/symbol_fprintf.c
+++ b/tools/perf/util/symbol_fprintf.c
@@ -58,13 +58,13 @@ size_t symbol__fprintf_symname(const struct symbol *sym, FILE *fp)
}
size_t dso__fprintf_symbols_by_name(struct dso *dso,
- enum map_type type, FILE *fp)
+ FILE *fp)
{
size_t ret = 0;
struct rb_node *nd;
struct symbol_name_rb_node *pos;
- for (nd = rb_first(&dso->symbol_names[type]); nd; nd = rb_next(nd)) {
+ for (nd = rb_first(&dso->symbol_names); nd; nd = rb_next(nd)) {
pos = rb_entry(nd, struct symbol_name_rb_node, rb_node);
fprintf(fp, "%s\n", pos->sym.name);
}
diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c
index 68b65b10579b..2048d393ece6 100644
--- a/tools/perf/util/thread.c
+++ b/tools/perf/util/thread.c
@@ -302,23 +302,20 @@ int thread__insert_map(struct thread *thread, struct map *map)
static int __thread__prepare_access(struct thread *thread)
{
bool initialized = false;
- int i, err = 0;
-
- for (i = 0; i < MAP__NR_TYPES; ++i) {
- struct maps *maps = &thread->mg->maps[i];
- struct map *map;
+ int err = 0;
+ struct maps *maps = &thread->mg->maps;
+ struct map *map;
- down_read(&maps->lock);
+ down_read(&maps->lock);
- for (map = maps__first(maps); map; map = map__next(map)) {
- err = unwind__prepare_access(thread, map, &initialized);
- if (err || initialized)
- break;
- }
-
- up_read(&maps->lock);
+ for (map = maps__first(maps); map; map = map__next(map)) {
+ err = unwind__prepare_access(thread, map, &initialized);
+ if (err || initialized)
+ break;
}
+ up_read(&maps->lock);
+
return err;
}
@@ -335,8 +332,6 @@ static int thread__prepare_access(struct thread *thread)
static int thread__clone_map_groups(struct thread *thread,
struct thread *parent)
{
- int i;
-
/* This is new thread, we share map groups for process. */
if (thread->pid_ == parent->pid_)
return thread__prepare_access(thread);
@@ -348,9 +343,8 @@ static int thread__clone_map_groups(struct thread *thread,
}
/* But this one is new process, copy maps. */
- for (i = 0; i < MAP__NR_TYPES; ++i)
- if (map_groups__clone(thread, parent->mg, i) < 0)
- return -ENOMEM;
+ if (map_groups__clone(thread, parent->mg) < 0)
+ return -ENOMEM;
return 0;
}
@@ -371,8 +365,7 @@ int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp)
return thread__clone_map_groups(thread, parent);
}
-void thread__find_cpumode_addr_location(struct thread *thread,
- enum map_type type, u64 addr,
+void thread__find_cpumode_addr_location(struct thread *thread, u64 addr,
struct addr_location *al)
{
size_t i;
@@ -384,7 +377,7 @@ void thread__find_cpumode_addr_location(struct thread *thread,
};
for (i = 0; i < ARRAY_SIZE(cpumodes); i++) {
- thread__find_addr_location(thread, cpumodes[i], type, addr, al);
+ thread__find_symbol(thread, cpumodes[i], addr, al);
if (al->map)
break;
}
diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h
index 14d44c3235b8..07606aa6998d 100644
--- a/tools/perf/util/thread.h
+++ b/tools/perf/util/thread.h
@@ -92,16 +92,13 @@ size_t thread__fprintf(struct thread *thread, FILE *fp);
struct thread *thread__main_thread(struct machine *machine, struct thread *thread);
-void thread__find_addr_map(struct thread *thread,
- u8 cpumode, enum map_type type, u64 addr,
- struct addr_location *al);
+struct map *thread__find_map(struct thread *thread, u8 cpumode, u64 addr,
+ struct addr_location *al);
-void thread__find_addr_location(struct thread *thread,
- u8 cpumode, enum map_type type, u64 addr,
- struct addr_location *al);
+struct symbol *thread__find_symbol(struct thread *thread, u8 cpumode,
+ u64 addr, struct addr_location *al);
-void thread__find_cpumode_addr_location(struct thread *thread,
- enum map_type type, u64 addr,
+void thread__find_cpumode_addr_location(struct thread *thread, u64 addr,
struct addr_location *al);
static inline void *thread__priv(struct thread *thread)
diff --git a/tools/perf/util/trace-event-info.c b/tools/perf/util/trace-event-info.c
index d7f2113462fb..c85d0d1a65ed 100644
--- a/tools/perf/util/trace-event-info.c
+++ b/tools/perf/util/trace-event-info.c
@@ -103,11 +103,10 @@ out:
static int record_header_files(void)
{
- char *path;
+ char *path = get_events_file("header_page");
struct stat st;
int err = -EIO;
- path = get_tracing_file("events/header_page");
if (!path) {
pr_debug("can't get tracing/events/header_page");
return -ENOMEM;
@@ -128,9 +127,9 @@ static int record_header_files(void)
goto out;
}
- put_tracing_file(path);
+ put_events_file(path);
- path = get_tracing_file("events/header_event");
+ path = get_events_file("header_event");
if (!path) {
pr_debug("can't get tracing/events/header_event");
err = -ENOMEM;
@@ -154,7 +153,7 @@ static int record_header_files(void)
err = 0;
out:
- put_tracing_file(path);
+ put_events_file(path);
return err;
}
@@ -243,7 +242,7 @@ static int record_ftrace_files(struct tracepoint_path *tps)
char *path;
int ret;
- path = get_tracing_file("events/ftrace");
+ path = get_events_file("ftrace");
if (!path) {
pr_debug("can't get tracing/events/ftrace");
return -ENOMEM;
diff --git a/tools/perf/util/trace-event.c b/tools/perf/util/trace-event.c
index 16a776371d03..1aa368603268 100644
--- a/tools/perf/util/trace-event.c
+++ b/tools/perf/util/trace-event.c
@@ -75,6 +75,7 @@ void trace_event__cleanup(struct trace_event *t)
static struct event_format*
tp_format(const char *sys, const char *name)
{
+ char *tp_dir = get_events_file(sys);
struct pevent *pevent = tevent.pevent;
struct event_format *event = NULL;
char path[PATH_MAX];
@@ -82,8 +83,11 @@ tp_format(const char *sys, const char *name)
char *data;
int err;
- scnprintf(path, PATH_MAX, "%s/%s/%s/format",
- tracing_events_path, sys, name);
+ if (!tp_dir)
+ return ERR_PTR(-errno);
+
+ scnprintf(path, PATH_MAX, "%s/%s/format", tp_dir, name);
+ put_events_file(tp_dir);
err = filename__read_str(path, &data, &size);
if (err)
diff --git a/tools/perf/util/unwind-libdw.c b/tools/perf/util/unwind-libdw.c
index 7bdd239c795c..538db4e5d1e6 100644
--- a/tools/perf/util/unwind-libdw.c
+++ b/tools/perf/util/unwind-libdw.c
@@ -28,10 +28,11 @@ static int __report_module(struct addr_location *al, u64 ip,
{
Dwfl_Module *mod;
struct dso *dso = NULL;
-
- thread__find_addr_location(ui->thread,
- PERF_RECORD_MISC_USER,
- MAP__FUNCTION, ip, al);
+ /*
+ * Some callers will use al->sym, so we can't just use the
+ * cheaper thread__find_map() here.
+ */
+ thread__find_symbol(ui->thread, PERF_RECORD_MISC_USER, ip, al);
if (al->map)
dso = al->map->dso;
@@ -103,19 +104,7 @@ static int access_dso_mem(struct unwind_info *ui, Dwarf_Addr addr,
struct addr_location al;
ssize_t size;
- thread__find_addr_map(ui->thread, PERF_RECORD_MISC_USER,
- MAP__FUNCTION, addr, &al);
- if (!al.map) {
- /*
- * We've seen cases (softice) where DWARF unwinder went
- * through non executable mmaps, which we need to lookup
- * in MAP__VARIABLE tree.
- */
- thread__find_addr_map(ui->thread, PERF_RECORD_MISC_USER,
- MAP__VARIABLE, addr, &al);
- }
-
- if (!al.map) {
+ if (!thread__find_map(ui->thread, PERF_RECORD_MISC_USER, addr, &al)) {
pr_debug("unwind: no map for %lx\n", (unsigned long)addr);
return -1;
}
diff --git a/tools/perf/util/unwind-libunwind-local.c b/tools/perf/util/unwind-libunwind-local.c
index af873044d33a..6a11bc7e6b27 100644
--- a/tools/perf/util/unwind-libunwind-local.c
+++ b/tools/perf/util/unwind-libunwind-local.c
@@ -366,19 +366,7 @@ static int read_unwind_spec_debug_frame(struct dso *dso,
static struct map *find_map(unw_word_t ip, struct unwind_info *ui)
{
struct addr_location al;
-
- thread__find_addr_map(ui->thread, PERF_RECORD_MISC_USER,
- MAP__FUNCTION, ip, &al);
- if (!al.map) {
- /*
- * We've seen cases (softice) where DWARF unwinder went
- * through non executable mmaps, which we need to lookup
- * in MAP__VARIABLE tree.
- */
- thread__find_addr_map(ui->thread, PERF_RECORD_MISC_USER,
- MAP__VARIABLE, ip, &al);
- }
- return al.map;
+ return thread__find_map(ui->thread, PERF_RECORD_MISC_USER, ip, &al);
}
static int
@@ -586,12 +574,9 @@ static int entry(u64 ip, struct thread *thread,
struct unwind_entry e;
struct addr_location al;
- thread__find_addr_location(thread, PERF_RECORD_MISC_USER,
- MAP__FUNCTION, ip, &al);
-
+ e.sym = thread__find_symbol(thread, PERF_RECORD_MISC_USER, ip, &al);
e.ip = al.addr;
e.map = al.map;
- e.sym = al.sym;
pr_debug("unwind: %s:ip = 0x%" PRIx64 " (0x%" PRIx64 ")\n",
al.sym ? al.sym->name : "''",
diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c
index 1019bbc5dbd8..eac5b858a371 100644
--- a/tools/perf/util/util.c
+++ b/tools/perf/util/util.c
@@ -38,11 +38,43 @@ void perf_set_multithreaded(void)
}
unsigned int page_size;
-int cacheline_size;
+
+#ifdef _SC_LEVEL1_DCACHE_LINESIZE
+#define cache_line_size(cacheline_sizep) *cacheline_sizep = sysconf(_SC_LEVEL1_DCACHE_LINESIZE)
+#else
+static void cache_line_size(int *cacheline_sizep)
+{
+ if (sysfs__read_int("devices/system/cpu/cpu0/cache/index0/coherency_line_size", cacheline_sizep))
+ pr_debug("cannot determine cache line size");
+}
+#endif
+
+int cacheline_size(void)
+{
+ static int size;
+
+ if (!size)
+ cache_line_size(&size);
+
+ return size;
+}
int sysctl_perf_event_max_stack = PERF_MAX_STACK_DEPTH;
int sysctl_perf_event_max_contexts_per_stack = PERF_MAX_CONTEXTS_PER_STACK;
+int sysctl__max_stack(void)
+{
+ int value;
+
+ if (sysctl__read_int("kernel/perf_event_max_stack", &value) == 0)
+ sysctl_perf_event_max_stack = value;
+
+ if (sysctl__read_int("kernel/perf_event_max_contexts_per_stack", &value) == 0)
+ sysctl_perf_event_max_contexts_per_stack = value;
+
+ return sysctl_perf_event_max_stack;
+}
+
bool test_attr__enabled;
bool perf_host = true;
diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h
index c9626c206208..dc58254a2b69 100644
--- a/tools/perf/util/util.h
+++ b/tools/perf/util/util.h
@@ -43,7 +43,9 @@ size_t hex_width(u64 v);
int hex2u64(const char *ptr, u64 *val);
extern unsigned int page_size;
-extern int cacheline_size;
+int __pure cacheline_size(void);
+
+int sysctl__max_stack(void);
int fetch_kernel_version(unsigned int *puint,
char *str, size_t str_sz);
diff --git a/tools/perf/util/vdso.c b/tools/perf/util/vdso.c
index 0acb1ec0e2f0..741af209b19d 100644
--- a/tools/perf/util/vdso.c
+++ b/tools/perf/util/vdso.c
@@ -139,12 +139,10 @@ static enum dso_type machine__thread_dso_type(struct machine *machine,
struct thread *thread)
{
enum dso_type dso_type = DSO__TYPE_UNKNOWN;
- struct map *map;
- struct dso *dso;
+ struct map *map = map_groups__first(thread->mg);
- map = map_groups__first(thread->mg, MAP__FUNCTION);
for (; map ; map = map_groups__next(map)) {
- dso = map->dso;
+ struct dso *dso = map->dso;
if (!dso || dso->long_name[0] != '/')
continue;
dso_type = dso__type(dso, machine);
diff --git a/tools/power/acpi/Makefile.config b/tools/power/acpi/Makefile.config
index 2cccbba64418..f304be71c278 100644
--- a/tools/power/acpi/Makefile.config
+++ b/tools/power/acpi/Makefile.config
@@ -56,6 +56,7 @@ INSTALL_SCRIPT = ${INSTALL_PROGRAM}
# to compile vs uClibc, that can be done here as well.
CROSS = #/usr/i386-linux-uclibc/usr/bin/i386-uclibc-
CROSS_COMPILE ?= $(CROSS)
+LD = $(CC)
HOSTCC = gcc
# check if compiler option is supported
diff --git a/tools/testing/radix-tree/Makefile b/tools/testing/radix-tree/Makefile
index fa7ee369b3c9..db66f8a0d4be 100644
--- a/tools/testing/radix-tree/Makefile
+++ b/tools/testing/radix-tree/Makefile
@@ -17,7 +17,7 @@ ifeq ($(BUILD), 32)
LDFLAGS += -m32
endif
-targets: mapshift $(TARGETS)
+targets: generated/map-shift.h $(TARGETS)
main: $(OFILES)
@@ -42,9 +42,7 @@ radix-tree.c: ../../../lib/radix-tree.c
idr.c: ../../../lib/idr.c
sed -e 's/^static //' -e 's/__always_inline //' -e 's/inline //' < $< > $@
-.PHONY: mapshift
-
-mapshift:
+generated/map-shift.h:
@if ! grep -qws $(SHIFT) generated/map-shift.h; then \
echo "#define RADIX_TREE_MAP_SHIFT $(SHIFT)" > \
generated/map-shift.h; \
diff --git a/tools/testing/radix-tree/idr-test.c b/tools/testing/radix-tree/idr-test.c
index 6c645eb77d42..ee820fcc29b0 100644
--- a/tools/testing/radix-tree/idr-test.c
+++ b/tools/testing/radix-tree/idr-test.c
@@ -252,6 +252,13 @@ void idr_checks(void)
idr_remove(&idr, 3);
idr_remove(&idr, 0);
+ assert(idr_alloc(&idr, DUMMY_PTR, 0, 0, GFP_KERNEL) == 0);
+ idr_remove(&idr, 1);
+ for (i = 1; i < RADIX_TREE_MAP_SIZE; i++)
+ assert(idr_alloc(&idr, DUMMY_PTR, 0, 0, GFP_KERNEL) == i);
+ idr_remove(&idr, 1 << 30);
+ idr_destroy(&idr);
+
for (i = INT_MAX - 3UL; i < INT_MAX + 1UL; i++) {
struct item *item = item_create(i, 0);
assert(idr_alloc(&idr, item, i, i + 10, GFP_KERNEL) == i);
diff --git a/tools/testing/radix-tree/multiorder.c b/tools/testing/radix-tree/multiorder.c
index 59245b3d587c..7bf405638b0b 100644
--- a/tools/testing/radix-tree/multiorder.c
+++ b/tools/testing/radix-tree/multiorder.c
@@ -16,6 +16,7 @@
#include <linux/radix-tree.h>
#include <linux/slab.h>
#include <linux/errno.h>
+#include <pthread.h>
#include "test.h"
@@ -624,6 +625,67 @@ static void multiorder_account(void)
item_kill_tree(&tree);
}
+bool stop_iteration = false;
+
+static void *creator_func(void *ptr)
+{
+ /* 'order' is set up to ensure we have sibling entries */
+ unsigned int order = RADIX_TREE_MAP_SHIFT - 1;
+ struct radix_tree_root *tree = ptr;
+ int i;
+
+ for (i = 0; i < 10000; i++) {
+ item_insert_order(tree, 0, order);
+ item_delete_rcu(tree, 0);
+ }
+
+ stop_iteration = true;
+ return NULL;
+}
+
+static void *iterator_func(void *ptr)
+{
+ struct radix_tree_root *tree = ptr;
+ struct radix_tree_iter iter;
+ struct item *item;
+ void **slot;
+
+ while (!stop_iteration) {
+ rcu_read_lock();
+ radix_tree_for_each_slot(slot, tree, &iter, 0) {
+ item = radix_tree_deref_slot(slot);
+
+ if (!item)
+ continue;
+ if (radix_tree_deref_retry(item)) {
+ slot = radix_tree_iter_retry(&iter);
+ continue;
+ }
+
+ item_sanity(item, iter.index);
+ }
+ rcu_read_unlock();
+ }
+ return NULL;
+}
+
+static void multiorder_iteration_race(void)
+{
+ const int num_threads = sysconf(_SC_NPROCESSORS_ONLN);
+ pthread_t worker_thread[num_threads];
+ RADIX_TREE(tree, GFP_KERNEL);
+ int i;
+
+ pthread_create(&worker_thread[0], NULL, &creator_func, &tree);
+ for (i = 1; i < num_threads; i++)
+ pthread_create(&worker_thread[i], NULL, &iterator_func, &tree);
+
+ for (i = 0; i < num_threads; i++)
+ pthread_join(worker_thread[i], NULL);
+
+ item_kill_tree(&tree);
+}
+
void multiorder_checks(void)
{
int i;
@@ -644,6 +706,7 @@ void multiorder_checks(void)
multiorder_join();
multiorder_split();
multiorder_account();
+ multiorder_iteration_race();
radix_tree_cpu_dead(0);
}
diff --git a/tools/testing/radix-tree/test.c b/tools/testing/radix-tree/test.c
index 5978ab1f403d..def6015570b2 100644
--- a/tools/testing/radix-tree/test.c
+++ b/tools/testing/radix-tree/test.c
@@ -75,6 +75,25 @@ int item_delete(struct radix_tree_root *root, unsigned long index)
return 0;
}
+static void item_free_rcu(struct rcu_head *head)
+{
+ struct item *item = container_of(head, struct item, rcu_head);
+
+ free(item);
+}
+
+int item_delete_rcu(struct radix_tree_root *root, unsigned long index)
+{
+ struct item *item = radix_tree_delete(root, index);
+
+ if (item) {
+ item_sanity(item, index);
+ call_rcu(&item->rcu_head, item_free_rcu);
+ return 1;
+ }
+ return 0;
+}
+
void item_check_present(struct radix_tree_root *root, unsigned long index)
{
struct item *item;
diff --git a/tools/testing/radix-tree/test.h b/tools/testing/radix-tree/test.h
index d9c031dbeb1a..31f1d9b6f506 100644
--- a/tools/testing/radix-tree/test.h
+++ b/tools/testing/radix-tree/test.h
@@ -5,6 +5,7 @@
#include <linux/rcupdate.h>
struct item {
+ struct rcu_head rcu_head;
unsigned long index;
unsigned int order;
};
@@ -12,9 +13,11 @@ struct item {
struct item *item_create(unsigned long index, unsigned int order);
int __item_insert(struct radix_tree_root *root, struct item *item);
int item_insert(struct radix_tree_root *root, unsigned long index);
+void item_sanity(struct item *item, unsigned long index);
int item_insert_order(struct radix_tree_root *root, unsigned long index,
unsigned order);
int item_delete(struct radix_tree_root *root, unsigned long index);
+int item_delete_rcu(struct radix_tree_root *root, unsigned long index);
struct item *item_lookup(struct radix_tree_root *root, unsigned long index);
void item_check_present(struct radix_tree_root *root, unsigned long index);
diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config
index 983dd25d49f4..1eefe211a4a8 100644
--- a/tools/testing/selftests/bpf/config
+++ b/tools/testing/selftests/bpf/config
@@ -5,3 +5,5 @@ CONFIG_BPF_EVENTS=y
CONFIG_TEST_BPF=m
CONFIG_CGROUP_BPF=y
CONFIG_NETDEVSIM=m
+CONFIG_NET_CLS_ACT=y
+CONFIG_NET_SCH_INGRESS=y
diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index faadbe233966..4123d0ab90ba 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -1108,7 +1108,7 @@ static void test_stacktrace_build_id(void)
assert(system("dd if=/dev/urandom of=/dev/zero count=4 2> /dev/null")
== 0);
- assert(system("./urandom_read if=/dev/urandom of=/dev/zero count=4 2> /dev/null") == 0);
+ assert(system("./urandom_read") == 0);
/* disable stack trace collection */
key = 0;
val = 1;
@@ -1158,7 +1158,7 @@ static void test_stacktrace_build_id(void)
} while (bpf_map_get_next_key(stackmap_fd, &previous_key, &key) == 0);
CHECK(build_id_matches < 1, "build id match",
- "Didn't find expected build ID from the map");
+ "Didn't find expected build ID from the map\n");
disable_pmu:
ioctl(pmu_fd, PERF_EVENT_IOC_DISABLE);
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 3e7718b1a9ae..fd7de7eb329e 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -11713,6 +11713,11 @@ static void get_unpriv_disabled()
FILE *fd;
fd = fopen("/proc/sys/"UNPRIV_SYSCTL, "r");
+ if (!fd) {
+ perror("fopen /proc/sys/"UNPRIV_SYSCTL);
+ unpriv_disabled = true;
+ return;
+ }
if (fgets(buf, 2, fd) == buf && atoi(buf))
unpriv_disabled = true;
fclose(fd);
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index 2ddcc96ae456..d9d00319b07c 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -15,7 +15,7 @@ LIBKVM += $(LIBKVM_$(UNAME_M))
INSTALL_HDR_PATH = $(top_srcdir)/usr
LINUX_HDR_PATH = $(INSTALL_HDR_PATH)/include/
-CFLAGS += -O2 -g -std=gnu99 -I$(LINUX_HDR_PATH) -Iinclude -I$(<D)
+CFLAGS += -O2 -g -std=gnu99 -I$(LINUX_HDR_PATH) -Iinclude -I$(<D) -I..
# After inclusion, $(OUTPUT) is defined and
# $(TEST_GEN_PROGS) starts with $(OUTPUT)/
diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h
index 7ab98e41324f..ac53730b30aa 100644
--- a/tools/testing/selftests/kvm/include/test_util.h
+++ b/tools/testing/selftests/kvm/include/test_util.h
@@ -19,6 +19,7 @@
#include <errno.h>
#include <unistd.h>
#include <fcntl.h>
+#include "kselftest.h"
ssize_t test_write(int fd, const void *buf, size_t count);
ssize_t test_read(int fd, void *buf, size_t count);
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index 2cedfda181d4..37e2a787d2fc 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -50,8 +50,8 @@ int kvm_check_cap(long cap)
int kvm_fd;
kvm_fd = open(KVM_DEV_PATH, O_RDONLY);
- TEST_ASSERT(kvm_fd >= 0, "open %s failed, rc: %i errno: %i",
- KVM_DEV_PATH, kvm_fd, errno);
+ if (kvm_fd < 0)
+ exit(KSFT_SKIP);
ret = ioctl(kvm_fd, KVM_CHECK_EXTENSION, cap);
TEST_ASSERT(ret != -1, "KVM_CHECK_EXTENSION IOCTL failed,\n"
@@ -91,8 +91,8 @@ struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm)
vm->mode = mode;
kvm_fd = open(KVM_DEV_PATH, perm);
- TEST_ASSERT(kvm_fd >= 0, "open %s failed, rc: %i errno: %i",
- KVM_DEV_PATH, kvm_fd, errno);
+ if (kvm_fd < 0)
+ exit(KSFT_SKIP);
/* Create VM. */
vm->fd = ioctl(kvm_fd, KVM_CREATE_VM, NULL);
@@ -418,8 +418,8 @@ struct kvm_cpuid2 *kvm_get_supported_cpuid(void)
cpuid = allocate_kvm_cpuid2();
kvm_fd = open(KVM_DEV_PATH, O_RDONLY);
- TEST_ASSERT(kvm_fd >= 0, "open %s failed, rc: %i errno: %i",
- KVM_DEV_PATH, kvm_fd, errno);
+ if (kvm_fd < 0)
+ exit(KSFT_SKIP);
ret = ioctl(kvm_fd, KVM_GET_SUPPORTED_CPUID, cpuid);
TEST_ASSERT(ret == 0, "KVM_GET_SUPPORTED_CPUID failed %d %d\n",
@@ -675,8 +675,8 @@ static int vcpu_mmap_sz(void)
int dev_fd, ret;
dev_fd = open(KVM_DEV_PATH, O_RDONLY);
- TEST_ASSERT(dev_fd >= 0, "%s open %s failed, rc: %i errno: %i",
- __func__, KVM_DEV_PATH, dev_fd, errno);
+ if (dev_fd < 0)
+ exit(KSFT_SKIP);
ret = ioctl(dev_fd, KVM_GET_VCPU_MMAP_SIZE, NULL);
TEST_ASSERT(ret >= sizeof(struct kvm_run),
diff --git a/tools/testing/selftests/kvm/sync_regs_test.c b/tools/testing/selftests/kvm/sync_regs_test.c
index 428e9473f5e2..eae1ece3c31b 100644
--- a/tools/testing/selftests/kvm/sync_regs_test.c
+++ b/tools/testing/selftests/kvm/sync_regs_test.c
@@ -85,6 +85,9 @@ static void compare_vcpu_events(struct kvm_vcpu_events *left,
{
}
+#define TEST_SYNC_FIELDS (KVM_SYNC_X86_REGS|KVM_SYNC_X86_SREGS|KVM_SYNC_X86_EVENTS)
+#define INVALID_SYNC_FIELD 0x80000000
+
int main(int argc, char *argv[])
{
struct kvm_vm *vm;
@@ -98,9 +101,14 @@ int main(int argc, char *argv[])
setbuf(stdout, NULL);
cap = kvm_check_cap(KVM_CAP_SYNC_REGS);
- TEST_ASSERT((unsigned long)cap == KVM_SYNC_X86_VALID_FIELDS,
- "KVM_CAP_SYNC_REGS (0x%x) != KVM_SYNC_X86_VALID_FIELDS (0x%lx)\n",
- cap, KVM_SYNC_X86_VALID_FIELDS);
+ if ((cap & TEST_SYNC_FIELDS) != TEST_SYNC_FIELDS) {
+ fprintf(stderr, "KVM_CAP_SYNC_REGS not supported, skipping test\n");
+ exit(KSFT_SKIP);
+ }
+ if ((cap & INVALID_SYNC_FIELD) != 0) {
+ fprintf(stderr, "The \"invalid\" field is not invalid, skipping test\n");
+ exit(KSFT_SKIP);
+ }
/* Create VM */
vm = vm_create_default(VCPU_ID, guest_code);
@@ -108,7 +116,14 @@ int main(int argc, char *argv[])
run = vcpu_state(vm, VCPU_ID);
/* Request reading invalid register set from VCPU. */
- run->kvm_valid_regs = KVM_SYNC_X86_VALID_FIELDS << 1;
+ run->kvm_valid_regs = INVALID_SYNC_FIELD;
+ rv = _vcpu_run(vm, VCPU_ID);
+ TEST_ASSERT(rv < 0 && errno == EINVAL,
+ "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d\n",
+ rv);
+ vcpu_state(vm, VCPU_ID)->kvm_valid_regs = 0;
+
+ run->kvm_valid_regs = INVALID_SYNC_FIELD | TEST_SYNC_FIELDS;
rv = _vcpu_run(vm, VCPU_ID);
TEST_ASSERT(rv < 0 && errno == EINVAL,
"Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d\n",
@@ -116,7 +131,14 @@ int main(int argc, char *argv[])
vcpu_state(vm, VCPU_ID)->kvm_valid_regs = 0;
/* Request setting invalid register set into VCPU. */
- run->kvm_dirty_regs = KVM_SYNC_X86_VALID_FIELDS << 1;
+ run->kvm_dirty_regs = INVALID_SYNC_FIELD;
+ rv = _vcpu_run(vm, VCPU_ID);
+ TEST_ASSERT(rv < 0 && errno == EINVAL,
+ "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d\n",
+ rv);
+ vcpu_state(vm, VCPU_ID)->kvm_dirty_regs = 0;
+
+ run->kvm_dirty_regs = INVALID_SYNC_FIELD | TEST_SYNC_FIELDS;
rv = _vcpu_run(vm, VCPU_ID);
TEST_ASSERT(rv < 0 && errno == EINVAL,
"Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d\n",
@@ -125,7 +147,7 @@ int main(int argc, char *argv[])
/* Request and verify all valid register sets. */
/* TODO: BUILD TIME CHECK: TEST_ASSERT(KVM_SYNC_X86_NUM_FIELDS != 3); */
- run->kvm_valid_regs = KVM_SYNC_X86_VALID_FIELDS;
+ run->kvm_valid_regs = TEST_SYNC_FIELDS;
rv = _vcpu_run(vm, VCPU_ID);
TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
"Unexpected exit reason: %u (%s),\n",
@@ -146,7 +168,7 @@ int main(int argc, char *argv[])
run->s.regs.sregs.apic_base = 1 << 11;
/* TODO run->s.regs.events.XYZ = ABC; */
- run->kvm_valid_regs = KVM_SYNC_X86_VALID_FIELDS;
+ run->kvm_valid_regs = TEST_SYNC_FIELDS;
run->kvm_dirty_regs = KVM_SYNC_X86_REGS | KVM_SYNC_X86_SREGS;
rv = _vcpu_run(vm, VCPU_ID);
TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
@@ -172,7 +194,7 @@ int main(int argc, char *argv[])
/* Clear kvm_dirty_regs bits, verify new s.regs values are
* overwritten with existing guest values.
*/
- run->kvm_valid_regs = KVM_SYNC_X86_VALID_FIELDS;
+ run->kvm_valid_regs = TEST_SYNC_FIELDS;
run->kvm_dirty_regs = 0;
run->s.regs.regs.r11 = 0xDEADBEEF;
rv = _vcpu_run(vm, VCPU_ID);
@@ -211,7 +233,7 @@ int main(int argc, char *argv[])
* with kvm_sync_regs values.
*/
run->kvm_valid_regs = 0;
- run->kvm_dirty_regs = KVM_SYNC_X86_VALID_FIELDS;
+ run->kvm_dirty_regs = TEST_SYNC_FIELDS;
run->s.regs.regs.r11 = 0xBBBB;
rv = _vcpu_run(vm, VCPU_ID);
TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
diff --git a/tools/testing/selftests/kvm/vmx_tsc_adjust_test.c b/tools/testing/selftests/kvm/vmx_tsc_adjust_test.c
index 8f7f62093add..aaa633263b2c 100644
--- a/tools/testing/selftests/kvm/vmx_tsc_adjust_test.c
+++ b/tools/testing/selftests/kvm/vmx_tsc_adjust_test.c
@@ -189,8 +189,8 @@ int main(int argc, char *argv[])
struct kvm_cpuid_entry2 *entry = kvm_get_supported_cpuid_entry(1);
if (!(entry->ecx & CPUID_VMX)) {
- printf("nested VMX not enabled, skipping test");
- return 0;
+ fprintf(stderr, "nested VMX not enabled, skipping test\n");
+ exit(KSFT_SKIP);
}
vm = vm_create_default_vmx(VCPU_ID, (void *) l1_guest_code);
diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk
index 195e9d4739a9..c1b1a4dc6a96 100644
--- a/tools/testing/selftests/lib.mk
+++ b/tools/testing/selftests/lib.mk
@@ -20,10 +20,10 @@ all: $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES)
.ONESHELL:
define RUN_TESTS
- @export KSFT_TAP_LEVEL=`echo 1`;
- @test_num=`echo 0`;
- @echo "TAP version 13";
- @for TEST in $(1); do \
+ @export KSFT_TAP_LEVEL=`echo 1`; \
+ test_num=`echo 0`; \
+ echo "TAP version 13"; \
+ for TEST in $(1); do \
BASENAME_TEST=`basename $$TEST`; \
test_num=`echo $$test_num+1 | bc`; \
echo "selftests: $$BASENAME_TEST"; \
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 8f1e13d2e547..3ff81a478dbe 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -5,7 +5,8 @@ CFLAGS = -Wall -Wl,--no-as-needed -O2 -g
CFLAGS += -I../../../../usr/include/
TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh netdevice.sh rtnetlink.sh
-TEST_PROGS += fib_tests.sh fib-onlink-tests.sh in_netns.sh pmtu.sh
+TEST_PROGS += fib_tests.sh fib-onlink-tests.sh pmtu.sh
+TEST_PROGS_EXTENDED := in_netns.sh
TEST_GEN_FILES = socket
TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy
TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa
diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config
index 6a75a3ea44ad..7ba089b33e8b 100644
--- a/tools/testing/selftests/net/config
+++ b/tools/testing/selftests/net/config
@@ -7,3 +7,8 @@ CONFIG_NET_L3_MASTER_DEV=y
CONFIG_IPV6=y
CONFIG_IPV6_MULTIPLE_TABLES=y
CONFIG_VETH=y
+CONFIG_INET_XFRM_MODE_TUNNEL=y
+CONFIG_NET_IPVTI=y
+CONFIG_INET6_XFRM_MODE_TUNNEL=y
+CONFIG_IPV6_VTI=y
+CONFIG_DUMMY=y
diff --git a/tools/testing/selftests/net/reuseport_bpf_numa.c b/tools/testing/selftests/net/reuseport_bpf_numa.c
index 365c32e84189..c9f478b40996 100644
--- a/tools/testing/selftests/net/reuseport_bpf_numa.c
+++ b/tools/testing/selftests/net/reuseport_bpf_numa.c
@@ -23,6 +23,8 @@
#include <unistd.h>
#include <numa.h>
+#include "../kselftest.h"
+
static const int PORT = 8888;
static void build_rcv_group(int *rcv_fd, size_t len, int family, int proto)
@@ -229,7 +231,7 @@ int main(void)
int *rcv_fd, nodes;
if (numa_available() < 0)
- error(1, errno, "no numa api support");
+ ksft_exit_skip("no numa api support\n");
nodes = numa_max_node() + 1;
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh b/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh
new file mode 100755
index 000000000000..98f650c9bf54
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh
@@ -0,0 +1,56 @@
+#!/bin/sh
+#
+# Invoke a text editor on all console.log files for all runs with diagnostics,
+# that is, on all such files having a console.log.diags counterpart.
+# Note that both console.log.diags and console.log are passed to the
+# editor (currently defaulting to "vi"), allowing the user to get an
+# idea of what to search for in the console.log file.
+#
+# Usage: kvm-find-errors.sh directory
+#
+# The "directory" above should end with the date/time directory, for example,
+# "tools/testing/selftests/rcutorture/res/2018.02.25-14:27:27".
+
+rundir="${1}"
+if test -z "$rundir" -o ! -d "$rundir"
+then
+ echo Usage: $0 directory
+fi
+editor=${EDITOR-vi}
+
+# Find builds with errors
+files=
+for i in ${rundir}/*/Make.out
+do
+ if egrep -q "error:|warning:" < $i
+ then
+ egrep "error:|warning:" < $i > $i.diags
+ files="$files $i.diags $i"
+ fi
+done
+if test -n "$files"
+then
+ $editor $files
+else
+ echo No build errors.
+fi
+if grep -q -e "--buildonly" < ${rundir}/log
+then
+ echo Build-only run, no console logs to check.
+fi
+
+# Find console logs with errors
+files=
+for i in ${rundir}/*/console.log
+do
+ if test -r $i.diags
+ then
+ files="$files $i.diags $i"
+ fi
+done
+if test -n "$files"
+then
+ $editor $files
+else
+ echo No errors in console logs.
+fi
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh
index c2e1bb6d0cba..477ecb1293ab 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh
@@ -34,11 +34,15 @@ fi
configfile=`echo $i | sed -e 's/^.*\///'`
ngps=`grep ver: $i/console.log 2> /dev/null | tail -1 | sed -e 's/^.* ver: //' -e 's/ .*$//'`
+stopstate="`grep 'End-test grace-period state: g' $i/console.log 2> /dev/null |
+ tail -1 | sed -e 's/^\[[ 0-9.]*] //' |
+ awk '{ print \"[\" $1 \" \" $5 \" \" $6 \" \" $7 \"]\"; }' |
+ tr -d '\012\015'`"
if test -z "$ngps"
then
- echo "$configfile -------"
+ echo "$configfile ------- " $stopstate
else
- title="$configfile ------- $ngps grace periods"
+ title="$configfile ------- $ngps GPs"
dur=`sed -e 's/^.* rcutorture.shutdown_secs=//' -e 's/ .*$//' < $i/qemu-cmd 2> /dev/null`
if test -z "$dur"
then
@@ -46,9 +50,9 @@ else
else
ngpsps=`awk -v ngps=$ngps -v dur=$dur '
BEGIN { print ngps / dur }' < /dev/null`
- title="$title ($ngpsps per second)"
+ title="$title ($ngpsps/s)"
fi
- echo $title
+ echo $title $stopstate
nclosecalls=`grep --binary-files=text 'torture: Reader Batch' $i/console.log | tail -1 | awk '{for (i=NF-8;i<=NF;i++) sum+=$i; } END {print sum}'`
if test -z "$nclosecalls"
then
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh
index f7e988f369dd..c27e97824163 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh
@@ -48,10 +48,6 @@ do
cat $i/Make.oldconfig.err
fi
parse-build.sh $i/Make.out $configfile
- if test "$TORTURE_SUITE" != rcuperf
- then
- parse-torture.sh $i/console.log $configfile
- fi
parse-console.sh $i/console.log $configfile
if test -r $i/Warnings
then
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
index 5f8fbb0d7c17..c5b0f94341d9 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
@@ -267,5 +267,4 @@ then
echo Unknown PID, cannot kill qemu command
fi
-parse-torture.sh $resdir/console.log $title
parse-console.sh $resdir/console.log $title
diff --git a/tools/testing/selftests/rcutorture/bin/parse-console.sh b/tools/testing/selftests/rcutorture/bin/parse-console.sh
index 08aa7d50ae0e..17293436f551 100755
--- a/tools/testing/selftests/rcutorture/bin/parse-console.sh
+++ b/tools/testing/selftests/rcutorture/bin/parse-console.sh
@@ -24,57 +24,146 @@
#
# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+T=${TMPDIR-/tmp}/parse-console.sh.$$
file="$1"
title="$2"
+trap 'rm -f $T.seq $T.diags' 0
+
. functions.sh
+# Check for presence and readability of console output file
+if test -f "$file" -a -r "$file"
+then
+ :
+else
+ echo $title unreadable console output file: $file
+ exit 1
+fi
if grep -Pq '\x00' < $file
then
print_warning Console output contains nul bytes, old qemu still running?
fi
-egrep 'Badness|WARNING:|Warn|BUG|===========|Call Trace:|Oops:|detected stalls on CPUs/tasks:|self-detected stall on CPU|Stall ended before state dump start|\?\?\? Writer stall state|rcu_.*kthread starved for' < $file | grep -v 'ODEBUG: ' | grep -v 'Warning: unable to open an initial console' > $1.diags
-if test -s $1.diags
+cat /dev/null > $file.diags
+
+# Check for proper termination, except that rcuperf runs don't indicate this.
+if test "$TORTURE_SUITE" != rcuperf
then
- print_warning Assertion failure in $file $title
- # cat $1.diags
+ # check for abject failure
+
+ if grep -q FAILURE $file || grep -q -e '-torture.*!!!' $file
+ then
+ nerrs=`grep --binary-files=text '!!!' $file |
+ tail -1 |
+ awk '
+ {
+ for (i=NF-8;i<=NF;i++)
+ sum+=$i;
+ }
+ END { print sum }'`
+ print_bug $title FAILURE, $nerrs instances
+ exit
+ fi
+
+ grep --binary-files=text 'torture:.*ver:' $file |
+ egrep --binary-files=text -v '\(null\)|rtc: 000000000* ' |
+ sed -e 's/^(initramfs)[^]]*] //' -e 's/^\[[^]]*] //' |
+ awk '
+ BEGIN {
+ ver = 0;
+ badseq = 0;
+ }
+
+ {
+ if (!badseq && ($5 + 0 != $5 || $5 <= ver)) {
+ badseqno1 = ver;
+ badseqno2 = $5;
+ badseqnr = NR;
+ badseq = 1;
+ }
+ ver = $5
+ }
+
+ END {
+ if (badseq) {
+ if (badseqno1 == badseqno2 && badseqno2 == ver)
+ print "GP HANG at " ver " torture stat " badseqnr;
+ else
+ print "BAD SEQ " badseqno1 ":" badseqno2 " last:" ver " version " badseqnr;
+ }
+ }' > $T.seq
+
+ if grep -q SUCCESS $file
+ then
+ if test -s $T.seq
+ then
+ print_warning $title `cat $T.seq`
+ echo " " $file
+ exit 2
+ fi
+ else
+ if grep -q "_HOTPLUG:" $file
+ then
+ print_warning HOTPLUG FAILURES $title `cat $T.seq`
+ echo " " $file
+ exit 3
+ fi
+ echo $title no success message, `grep --binary-files=text 'ver:' $file | wc -l` successful version messages
+ if test -s $T.seq
+ then
+ print_warning $title `cat $T.seq`
+ fi
+ exit 2
+ fi
+fi | tee -a $file.diags
+
+egrep 'Badness|WARNING:|Warn|BUG|===========|Call Trace:|Oops:|detected stalls on CPUs/tasks:|self-detected stall on CPU|Stall ended before state dump start|\?\?\? Writer stall state|rcu_.*kthread starved for' < $file |
+grep -v 'ODEBUG: ' |
+grep -v 'Warning: unable to open an initial console' > $T.diags
+if test -s $T.diags
+then
+ print_warning "Assertion failure in $file $title"
+ # cat $T.diags
summary=""
- n_badness=`grep -c Badness $1`
+ n_badness=`grep -c Badness $file`
if test "$n_badness" -ne 0
then
summary="$summary Badness: $n_badness"
fi
- n_warn=`grep -v 'Warning: unable to open an initial console' $1 | egrep -c 'WARNING:|Warn'`
+ n_warn=`grep -v 'Warning: unable to open an initial console' $file | egrep -c 'WARNING:|Warn'`
if test "$n_warn" -ne 0
then
summary="$summary Warnings: $n_warn"
fi
- n_bugs=`egrep -c 'BUG|Oops:' $1`
+ n_bugs=`egrep -c 'BUG|Oops:' $file`
if test "$n_bugs" -ne 0
then
summary="$summary Bugs: $n_bugs"
fi
- n_calltrace=`grep -c 'Call Trace:' $1`
+ n_calltrace=`grep -c 'Call Trace:' $file`
if test "$n_calltrace" -ne 0
then
summary="$summary Call Traces: $n_calltrace"
fi
- n_lockdep=`grep -c =========== $1`
+ n_lockdep=`grep -c =========== $file`
if test "$n_badness" -ne 0
then
summary="$summary lockdep: $n_badness"
fi
- n_stalls=`egrep -c 'detected stalls on CPUs/tasks:|self-detected stall on CPU|Stall ended before state dump start|\?\?\? Writer stall state' $1`
+ n_stalls=`egrep -c 'detected stalls on CPUs/tasks:|self-detected stall on CPU|Stall ended before state dump start|\?\?\? Writer stall state' $file`
if test "$n_stalls" -ne 0
then
summary="$summary Stalls: $n_stalls"
fi
- n_starves=`grep -c 'rcu_.*kthread starved for' $1`
+ n_starves=`grep -c 'rcu_.*kthread starved for' $file`
if test "$n_starves" -ne 0
then
summary="$summary Starves: $n_starves"
fi
print_warning Summary: $summary
-else
- rm $1.diags
+ cat $T.diags >> $file.diags
+fi
+if ! test -s $file.diags
+then
+ rm -f $file.diags
fi
diff --git a/tools/testing/selftests/rcutorture/bin/parse-torture.sh b/tools/testing/selftests/rcutorture/bin/parse-torture.sh
deleted file mode 100755
index 5987e50cfeb4..000000000000
--- a/tools/testing/selftests/rcutorture/bin/parse-torture.sh
+++ /dev/null
@@ -1,105 +0,0 @@
-#!/bin/bash
-#
-# Check the console output from a torture run for goodness.
-# The "file" is a pathname on the local system, and "title" is
-# a text string for error-message purposes.
-#
-# The file must contain torture output, but can be interspersed
-# with other dmesg text, as in console-log output.
-#
-# Usage: parse-torture.sh file title
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, you can access it online at
-# http://www.gnu.org/licenses/gpl-2.0.html.
-#
-# Copyright (C) IBM Corporation, 2011
-#
-# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
-
-T=${TMPDIR-/tmp}/parse-torture.sh.$$
-file="$1"
-title="$2"
-
-trap 'rm -f $T.seq' 0
-
-. functions.sh
-
-# check for presence of torture output file.
-
-if test -f "$file" -a -r "$file"
-then
- :
-else
- echo $title unreadable torture output file: $file
- exit 1
-fi
-
-# check for abject failure
-
-if grep -q FAILURE $file || grep -q -e '-torture.*!!!' $file
-then
- nerrs=`grep --binary-files=text '!!!' $file | tail -1 | awk '{for (i=NF-8;i<=NF;i++) sum+=$i; } END {print sum}'`
- print_bug $title FAILURE, $nerrs instances
- echo " " $url
- exit
-fi
-
-grep --binary-files=text 'torture:.*ver:' $file | egrep --binary-files=text -v '\(null\)|rtc: 000000000* ' | sed -e 's/^(initramfs)[^]]*] //' -e 's/^\[[^]]*] //' |
-awk '
-BEGIN {
- ver = 0;
- badseq = 0;
- }
-
- {
- if (!badseq && ($5 + 0 != $5 || $5 <= ver)) {
- badseqno1 = ver;
- badseqno2 = $5;
- badseqnr = NR;
- badseq = 1;
- }
- ver = $5
- }
-
-END {
- if (badseq) {
- if (badseqno1 == badseqno2 && badseqno2 == ver)
- print "GP HANG at " ver " torture stat " badseqnr;
- else
- print "BAD SEQ " badseqno1 ":" badseqno2 " last:" ver " version " badseqnr;
- }
- }' > $T.seq
-
-if grep -q SUCCESS $file
-then
- if test -s $T.seq
- then
- print_warning $title $title `cat $T.seq`
- echo " " $file
- exit 2
- fi
-else
- if grep -q "_HOTPLUG:" $file
- then
- print_warning HOTPLUG FAILURES $title `cat $T.seq`
- echo " " $file
- exit 3
- fi
- echo $title no success message, `grep --binary-files=text 'ver:' $file | wc -l` successful version messages
- if test -s $T.seq
- then
- print_warning $title `cat $T.seq`
- fi
- exit 2
-fi
diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
index 168c66d74fc5..e1473234968d 100644
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -134,11 +134,15 @@ struct seccomp_data {
#endif
#ifndef SECCOMP_FILTER_FLAG_TSYNC
-#define SECCOMP_FILTER_FLAG_TSYNC 1
+#define SECCOMP_FILTER_FLAG_TSYNC (1UL << 0)
#endif
#ifndef SECCOMP_FILTER_FLAG_LOG
-#define SECCOMP_FILTER_FLAG_LOG 2
+#define SECCOMP_FILTER_FLAG_LOG (1UL << 1)
+#endif
+
+#ifndef SECCOMP_FILTER_FLAG_SPEC_ALLOW
+#define SECCOMP_FILTER_FLAG_SPEC_ALLOW (1UL << 2)
#endif
#ifndef PTRACE_SECCOMP_GET_METADATA
@@ -2072,14 +2076,26 @@ TEST(seccomp_syscall_mode_lock)
TEST(detect_seccomp_filter_flags)
{
unsigned int flags[] = { SECCOMP_FILTER_FLAG_TSYNC,
- SECCOMP_FILTER_FLAG_LOG };
+ SECCOMP_FILTER_FLAG_LOG,
+ SECCOMP_FILTER_FLAG_SPEC_ALLOW };
unsigned int flag, all_flags;
int i;
long ret;
/* Test detection of known-good filter flags */
for (i = 0, all_flags = 0; i < ARRAY_SIZE(flags); i++) {
+ int bits = 0;
+
flag = flags[i];
+ /* Make sure the flag is a single bit! */
+ while (flag) {
+ if (flag & 0x1)
+ bits ++;
+ flag >>= 1;
+ }
+ ASSERT_EQ(1, bits);
+ flag = flags[i];
+
ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL);
ASSERT_NE(ENOSYS, errno) {
TH_LOG("Kernel does not support seccomp syscall!");
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json b/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json
index 5b012f4981d4..6f289a49e5ec 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json
@@ -66,7 +66,7 @@
"cmdUnderTest": "$TC action add action bpf object-file _b.o index 667",
"expExitCode": "0",
"verifyCmd": "$TC action get action bpf index 667",
- "matchPattern": "action order [0-9]*: bpf _b.o:\\[action\\] id [0-9]* tag 3b185187f1855c4c default-action pipe.*index 667 ref",
+ "matchPattern": "action order [0-9]*: bpf _b.o:\\[action\\] id [0-9]* tag 3b185187f1855c4c( jited)? default-action pipe.*index 667 ref",
"matchCount": "1",
"teardown": [
"$TC action flush action bpf",
@@ -92,10 +92,15 @@
"cmdUnderTest": "$TC action add action bpf object-file _c.o index 667",
"expExitCode": "255",
"verifyCmd": "$TC action get action bpf index 667",
- "matchPattern": "action order [0-9]*: bpf _b.o:\\[action\\] id [0-9].*index 667 ref",
+ "matchPattern": "action order [0-9]*: bpf _c.o:\\[action\\] id [0-9].*index 667 ref",
"matchCount": "0",
"teardown": [
- "$TC action flush action bpf",
+ [
+ "$TC action flush action bpf",
+ 0,
+ 1,
+ 255
+ ],
"rm -f _c.o"
]
},
diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile
index d744991c0f4f..39f66bc29b82 100644
--- a/tools/testing/selftests/x86/Makefile
+++ b/tools/testing/selftests/x86/Makefile
@@ -11,7 +11,7 @@ CAN_BUILD_X86_64 := $(shell ./check_cc.sh $(CC) trivial_64bit_program.c)
TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt test_mremap_vdso \
check_initial_reg_state sigreturn iopl mpx-mini-test ioperm \
- protection_keys test_vdso test_vsyscall
+ protection_keys test_vdso test_vsyscall mov_ss_trap
TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso unwind_vdso \
test_FCMOV test_FCOMI test_FISTTP \
vdso_restorer
diff --git a/tools/testing/selftests/x86/mov_ss_trap.c b/tools/testing/selftests/x86/mov_ss_trap.c
new file mode 100644
index 000000000000..3c3a022654f3
--- /dev/null
+++ b/tools/testing/selftests/x86/mov_ss_trap.c
@@ -0,0 +1,285 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * mov_ss_trap.c: Exercise the bizarre side effects of a watchpoint on MOV SS
+ *
+ * This does MOV SS from a watchpointed address followed by various
+ * types of kernel entries. A MOV SS that hits a watchpoint will queue
+ * up a #DB trap but will not actually deliver that trap. The trap
+ * will be delivered after the next instruction instead. The CPU's logic
+ * seems to be:
+ *
+ * - Any fault: drop the pending #DB trap.
+ * - INT $N, INT3, INTO, SYSCALL, SYSENTER: enter the kernel and then
+ * deliver #DB.
+ * - ICEBP: enter the kernel but do not deliver the watchpoint trap
+ * - breakpoint: only one #DB is delivered (phew!)
+ *
+ * There are plenty of ways for a kernel to handle this incorrectly. This
+ * test tries to exercise all the cases.
+ *
+ * This should mostly cover CVE-2018-1087 and CVE-2018-8897.
+ */
+#define _GNU_SOURCE
+
+#include <stdlib.h>
+#include <sys/ptrace.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <sys/user.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+#include <errno.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <err.h>
+#include <string.h>
+#include <setjmp.h>
+#include <sys/prctl.h>
+
+#define X86_EFLAGS_RF (1UL << 16)
+
+#if __x86_64__
+# define REG_IP REG_RIP
+#else
+# define REG_IP REG_EIP
+#endif
+
+unsigned short ss;
+extern unsigned char breakpoint_insn[];
+sigjmp_buf jmpbuf;
+static unsigned char altstack_data[SIGSTKSZ];
+
+static void enable_watchpoint(void)
+{
+ pid_t parent = getpid();
+ int status;
+
+ pid_t child = fork();
+ if (child < 0)
+ err(1, "fork");
+
+ if (child) {
+ if (waitpid(child, &status, 0) != child)
+ err(1, "waitpid for child");
+ } else {
+ unsigned long dr0, dr1, dr7;
+
+ dr0 = (unsigned long)&ss;
+ dr1 = (unsigned long)breakpoint_insn;
+ dr7 = ((1UL << 1) | /* G0 */
+ (3UL << 16) | /* RW0 = read or write */
+ (1UL << 18) | /* LEN0 = 2 bytes */
+ (1UL << 3)); /* G1, RW1 = insn */
+
+ if (ptrace(PTRACE_ATTACH, parent, NULL, NULL) != 0)
+ err(1, "PTRACE_ATTACH");
+
+ if (waitpid(parent, &status, 0) != parent)
+ err(1, "waitpid for child");
+
+ if (ptrace(PTRACE_POKEUSER, parent, (void *)offsetof(struct user, u_debugreg[0]), dr0) != 0)
+ err(1, "PTRACE_POKEUSER DR0");
+
+ if (ptrace(PTRACE_POKEUSER, parent, (void *)offsetof(struct user, u_debugreg[1]), dr1) != 0)
+ err(1, "PTRACE_POKEUSER DR1");
+
+ if (ptrace(PTRACE_POKEUSER, parent, (void *)offsetof(struct user, u_debugreg[7]), dr7) != 0)
+ err(1, "PTRACE_POKEUSER DR7");
+
+ printf("\tDR0 = %lx, DR1 = %lx, DR7 = %lx\n", dr0, dr1, dr7);
+
+ if (ptrace(PTRACE_DETACH, parent, NULL, NULL) != 0)
+ err(1, "PTRACE_DETACH");
+
+ exit(0);
+ }
+}
+
+static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
+ int flags)
+{
+ struct sigaction sa;
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_sigaction = handler;
+ sa.sa_flags = SA_SIGINFO | flags;
+ sigemptyset(&sa.sa_mask);
+ if (sigaction(sig, &sa, 0))
+ err(1, "sigaction");
+}
+
+static char const * const signames[] = {
+ [SIGSEGV] = "SIGSEGV",
+ [SIGBUS] = "SIBGUS",
+ [SIGTRAP] = "SIGTRAP",
+ [SIGILL] = "SIGILL",
+};
+
+static void sigtrap(int sig, siginfo_t *si, void *ctx_void)
+{
+ ucontext_t *ctx = ctx_void;
+
+ printf("\tGot SIGTRAP with RIP=%lx, EFLAGS.RF=%d\n",
+ (unsigned long)ctx->uc_mcontext.gregs[REG_IP],
+ !!(ctx->uc_mcontext.gregs[REG_EFL] & X86_EFLAGS_RF));
+}
+
+static void handle_and_return(int sig, siginfo_t *si, void *ctx_void)
+{
+ ucontext_t *ctx = ctx_void;
+
+ printf("\tGot %s with RIP=%lx\n", signames[sig],
+ (unsigned long)ctx->uc_mcontext.gregs[REG_IP]);
+}
+
+static void handle_and_longjmp(int sig, siginfo_t *si, void *ctx_void)
+{
+ ucontext_t *ctx = ctx_void;
+
+ printf("\tGot %s with RIP=%lx\n", signames[sig],
+ (unsigned long)ctx->uc_mcontext.gregs[REG_IP]);
+
+ siglongjmp(jmpbuf, 1);
+}
+
+int main()
+{
+ unsigned long nr;
+
+ asm volatile ("mov %%ss, %[ss]" : [ss] "=m" (ss));
+ printf("\tSS = 0x%hx, &SS = 0x%p\n", ss, &ss);
+
+ if (prctl(PR_SET_PTRACER, PR_SET_PTRACER_ANY, 0, 0, 0) == 0)
+ printf("\tPR_SET_PTRACER_ANY succeeded\n");
+
+ printf("\tSet up a watchpoint\n");
+ sethandler(SIGTRAP, sigtrap, 0);
+ enable_watchpoint();
+
+ printf("[RUN]\tRead from watched memory (should get SIGTRAP)\n");
+ asm volatile ("mov %[ss], %[tmp]" : [tmp] "=r" (nr) : [ss] "m" (ss));
+
+ printf("[RUN]\tMOV SS; INT3\n");
+ asm volatile ("mov %[ss], %%ss; int3" :: [ss] "m" (ss));
+
+ printf("[RUN]\tMOV SS; INT 3\n");
+ asm volatile ("mov %[ss], %%ss; .byte 0xcd, 0x3" :: [ss] "m" (ss));
+
+ printf("[RUN]\tMOV SS; CS CS INT3\n");
+ asm volatile ("mov %[ss], %%ss; .byte 0x2e, 0x2e; int3" :: [ss] "m" (ss));
+
+ printf("[RUN]\tMOV SS; CSx14 INT3\n");
+ asm volatile ("mov %[ss], %%ss; .fill 14,1,0x2e; int3" :: [ss] "m" (ss));
+
+ printf("[RUN]\tMOV SS; INT 4\n");
+ sethandler(SIGSEGV, handle_and_return, SA_RESETHAND);
+ asm volatile ("mov %[ss], %%ss; int $4" :: [ss] "m" (ss));
+
+#ifdef __i386__
+ printf("[RUN]\tMOV SS; INTO\n");
+ sethandler(SIGSEGV, handle_and_return, SA_RESETHAND);
+ nr = -1;
+ asm volatile ("add $1, %[tmp]; mov %[ss], %%ss; into"
+ : [tmp] "+r" (nr) : [ss] "m" (ss));
+#endif
+
+ if (sigsetjmp(jmpbuf, 1) == 0) {
+ printf("[RUN]\tMOV SS; ICEBP\n");
+
+ /* Some emulators (e.g. QEMU TCG) don't emulate ICEBP. */
+ sethandler(SIGILL, handle_and_longjmp, SA_RESETHAND);
+
+ asm volatile ("mov %[ss], %%ss; .byte 0xf1" :: [ss] "m" (ss));
+ }
+
+ if (sigsetjmp(jmpbuf, 1) == 0) {
+ printf("[RUN]\tMOV SS; CLI\n");
+ sethandler(SIGSEGV, handle_and_longjmp, SA_RESETHAND);
+ asm volatile ("mov %[ss], %%ss; cli" :: [ss] "m" (ss));
+ }
+
+ if (sigsetjmp(jmpbuf, 1) == 0) {
+ printf("[RUN]\tMOV SS; #PF\n");
+ sethandler(SIGSEGV, handle_and_longjmp, SA_RESETHAND);
+ asm volatile ("mov %[ss], %%ss; mov (-1), %[tmp]"
+ : [tmp] "=r" (nr) : [ss] "m" (ss));
+ }
+
+ /*
+ * INT $1: if #DB has DPL=3 and there isn't special handling,
+ * then the kernel will die.
+ */
+ if (sigsetjmp(jmpbuf, 1) == 0) {
+ printf("[RUN]\tMOV SS; INT 1\n");
+ sethandler(SIGSEGV, handle_and_longjmp, SA_RESETHAND);
+ asm volatile ("mov %[ss], %%ss; int $1" :: [ss] "m" (ss));
+ }
+
+#ifdef __x86_64__
+ /*
+ * In principle, we should test 32-bit SYSCALL as well, but
+ * the calling convention is so unpredictable that it's
+ * not obviously worth the effort.
+ */
+ if (sigsetjmp(jmpbuf, 1) == 0) {
+ printf("[RUN]\tMOV SS; SYSCALL\n");
+ sethandler(SIGILL, handle_and_longjmp, SA_RESETHAND);
+ nr = SYS_getpid;
+ /*
+ * Toggle the high bit of RSP to make it noncanonical to
+ * strengthen this test on non-SMAP systems.
+ */
+ asm volatile ("btc $63, %%rsp\n\t"
+ "mov %[ss], %%ss; syscall\n\t"
+ "btc $63, %%rsp"
+ : "+a" (nr) : [ss] "m" (ss)
+ : "rcx"
+#ifdef __x86_64__
+ , "r11"
+#endif
+ );
+ }
+#endif
+
+ printf("[RUN]\tMOV SS; breakpointed NOP\n");
+ asm volatile ("mov %[ss], %%ss; breakpoint_insn: nop" :: [ss] "m" (ss));
+
+ /*
+ * Invoking SYSENTER directly breaks all the rules. Just handle
+ * the SIGSEGV.
+ */
+ if (sigsetjmp(jmpbuf, 1) == 0) {
+ printf("[RUN]\tMOV SS; SYSENTER\n");
+ stack_t stack = {
+ .ss_sp = altstack_data,
+ .ss_size = SIGSTKSZ,
+ };
+ if (sigaltstack(&stack, NULL) != 0)
+ err(1, "sigaltstack");
+ sethandler(SIGSEGV, handle_and_longjmp, SA_RESETHAND | SA_ONSTACK);
+ nr = SYS_getpid;
+ asm volatile ("mov %[ss], %%ss; SYSENTER" : "+a" (nr)
+ : [ss] "m" (ss) : "flags", "rcx"
+#ifdef __x86_64__
+ , "r11"
+#endif
+ );
+
+ /* We're unreachable here. SYSENTER forgets RIP. */
+ }
+
+ if (sigsetjmp(jmpbuf, 1) == 0) {
+ printf("[RUN]\tMOV SS; INT $0x80\n");
+ sethandler(SIGSEGV, handle_and_longjmp, SA_RESETHAND);
+ nr = 20; /* compat getpid */
+ asm volatile ("mov %[ss], %%ss; int $0x80"
+ : "+a" (nr) : [ss] "m" (ss)
+ : "flags"
+#ifdef __x86_64__
+ , "r8", "r9", "r10", "r11"
+#endif
+ );
+ }
+
+ printf("[OK]\tI aten't dead\n");
+ return 0;
+}
diff --git a/tools/testing/selftests/x86/mpx-mini-test.c b/tools/testing/selftests/x86/mpx-mini-test.c
index 9c0325e1ea68..50f7e9272481 100644
--- a/tools/testing/selftests/x86/mpx-mini-test.c
+++ b/tools/testing/selftests/x86/mpx-mini-test.c
@@ -368,6 +368,11 @@ static int expected_bnd_index = -1;
uint64_t shadow_plb[NR_MPX_BOUNDS_REGISTERS][2]; /* shadow MPX bound registers */
unsigned long shadow_map[NR_MPX_BOUNDS_REGISTERS];
+/* Failed address bound checks: */
+#ifndef SEGV_BNDERR
+# define SEGV_BNDERR 3
+#endif
+
/*
* The kernel is supposed to provide some information about the bounds
* exception in the siginfo. It should match what we have in the bounds
@@ -419,8 +424,6 @@ void handler(int signum, siginfo_t *si, void *vucontext)
br_count++;
dprintf1("#BR 0x%jx (total seen: %d)\n", status, br_count);
-#define SEGV_BNDERR 3 /* failed address bound checks */
-
dprintf2("Saw a #BR! status 0x%jx at %016lx br_reason: %jx\n",
status, ip, br_reason);
dprintf2("si_signo: %d\n", si->si_signo);
diff --git a/tools/testing/selftests/x86/pkey-helpers.h b/tools/testing/selftests/x86/pkey-helpers.h
index b3cb7670e026..254e5436bdd9 100644
--- a/tools/testing/selftests/x86/pkey-helpers.h
+++ b/tools/testing/selftests/x86/pkey-helpers.h
@@ -26,30 +26,26 @@ static inline void sigsafe_printf(const char *format, ...)
{
va_list ap;
- va_start(ap, format);
if (!dprint_in_signal) {
+ va_start(ap, format);
vprintf(format, ap);
+ va_end(ap);
} else {
int ret;
- int len = vsnprintf(dprint_in_signal_buffer,
- DPRINT_IN_SIGNAL_BUF_SIZE,
- format, ap);
/*
- * len is amount that would have been printed,
- * but actual write is truncated at BUF_SIZE.
+ * No printf() functions are signal-safe.
+ * They deadlock easily. Write the format
+ * string to get some output, even if
+ * incomplete.
*/
- if (len > DPRINT_IN_SIGNAL_BUF_SIZE)
- len = DPRINT_IN_SIGNAL_BUF_SIZE;
- ret = write(1, dprint_in_signal_buffer, len);
+ ret = write(1, format, strlen(format));
if (ret < 0)
- abort();
+ exit(1);
}
- va_end(ap);
}
#define dprintf_level(level, args...) do { \
if (level <= DEBUG_LEVEL) \
sigsafe_printf(args); \
- fflush(NULL); \
} while (0)
#define dprintf0(args...) dprintf_level(0, args)
#define dprintf1(args...) dprintf_level(1, args)
diff --git a/tools/testing/selftests/x86/protection_keys.c b/tools/testing/selftests/x86/protection_keys.c
index f15aa5a76fe3..460b4bdf4c1e 100644
--- a/tools/testing/selftests/x86/protection_keys.c
+++ b/tools/testing/selftests/x86/protection_keys.c
@@ -72,10 +72,9 @@ extern void abort_hooks(void);
test_nr, iteration_nr); \
dprintf0("errno at assert: %d", errno); \
abort_hooks(); \
- assert(condition); \
+ exit(__LINE__); \
} \
} while (0)
-#define raw_assert(cond) assert(cond)
void cat_into_file(char *str, char *file)
{
@@ -87,12 +86,17 @@ void cat_into_file(char *str, char *file)
* these need to be raw because they are called under
* pkey_assert()
*/
- raw_assert(fd >= 0);
+ if (fd < 0) {
+ fprintf(stderr, "error opening '%s'\n", str);
+ perror("error: ");
+ exit(__LINE__);
+ }
+
ret = write(fd, str, strlen(str));
if (ret != strlen(str)) {
perror("write to file failed");
fprintf(stderr, "filename: '%s' str: '%s'\n", file, str);
- raw_assert(0);
+ exit(__LINE__);
}
close(fd);
}
@@ -191,26 +195,30 @@ void lots_o_noops_around_write(int *write_to_me)
#ifdef __i386__
#ifndef SYS_mprotect_key
-# define SYS_mprotect_key 380
+# define SYS_mprotect_key 380
#endif
+
#ifndef SYS_pkey_alloc
-# define SYS_pkey_alloc 381
-# define SYS_pkey_free 382
+# define SYS_pkey_alloc 381
+# define SYS_pkey_free 382
#endif
-#define REG_IP_IDX REG_EIP
-#define si_pkey_offset 0x14
+
+#define REG_IP_IDX REG_EIP
+#define si_pkey_offset 0x14
#else
#ifndef SYS_mprotect_key
-# define SYS_mprotect_key 329
+# define SYS_mprotect_key 329
#endif
+
#ifndef SYS_pkey_alloc
-# define SYS_pkey_alloc 330
-# define SYS_pkey_free 331
+# define SYS_pkey_alloc 330
+# define SYS_pkey_free 331
#endif
-#define REG_IP_IDX REG_RIP
-#define si_pkey_offset 0x20
+
+#define REG_IP_IDX REG_RIP
+#define si_pkey_offset 0x20
#endif
@@ -225,8 +233,14 @@ void dump_mem(void *dumpme, int len_bytes)
}
}
-#define SEGV_BNDERR 3 /* failed address bound checks */
-#define SEGV_PKUERR 4
+/* Failed address bound checks: */
+#ifndef SEGV_BNDERR
+# define SEGV_BNDERR 3
+#endif
+
+#ifndef SEGV_PKUERR
+# define SEGV_PKUERR 4
+#endif
static char *si_code_str(int si_code)
{
@@ -289,13 +303,6 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext)
dump_mem(pkru_ptr - 128, 256);
pkey_assert(*pkru_ptr);
- si_pkey_ptr = (u32 *)(((u8 *)si) + si_pkey_offset);
- dprintf1("si_pkey_ptr: %p\n", si_pkey_ptr);
- dump_mem(si_pkey_ptr - 8, 24);
- siginfo_pkey = *si_pkey_ptr;
- pkey_assert(siginfo_pkey < NR_PKEYS);
- last_si_pkey = siginfo_pkey;
-
if ((si->si_code == SEGV_MAPERR) ||
(si->si_code == SEGV_ACCERR) ||
(si->si_code == SEGV_BNDERR)) {
@@ -303,6 +310,13 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext)
exit(4);
}
+ si_pkey_ptr = (u32 *)(((u8 *)si) + si_pkey_offset);
+ dprintf1("si_pkey_ptr: %p\n", si_pkey_ptr);
+ dump_mem((u8 *)si_pkey_ptr - 8, 24);
+ siginfo_pkey = *si_pkey_ptr;
+ pkey_assert(siginfo_pkey < NR_PKEYS);
+ last_si_pkey = siginfo_pkey;
+
dprintf1("signal pkru from xsave: %08x\n", *pkru_ptr);
/* need __rdpkru() version so we do not do shadow_pkru checking */
dprintf1("signal pkru from pkru: %08x\n", __rdpkru());
@@ -311,22 +325,6 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext)
dprintf1("WARNING: set PRKU=0 to allow faulting instruction to continue\n");
pkru_faults++;
dprintf1("<<<<==================================================\n");
- return;
- if (trapno == 14) {
- fprintf(stderr,
- "ERROR: In signal handler, page fault, trapno = %d, ip = %016lx\n",
- trapno, ip);
- fprintf(stderr, "si_addr %p\n", si->si_addr);
- fprintf(stderr, "REG_ERR: %lx\n",
- (unsigned long)uctxt->uc_mcontext.gregs[REG_ERR]);
- exit(1);
- } else {
- fprintf(stderr, "unexpected trap %d! at 0x%lx\n", trapno, ip);
- fprintf(stderr, "si_addr %p\n", si->si_addr);
- fprintf(stderr, "REG_ERR: %lx\n",
- (unsigned long)uctxt->uc_mcontext.gregs[REG_ERR]);
- exit(2);
- }
dprint_in_signal = 0;
}
@@ -393,10 +391,15 @@ pid_t fork_lazy_child(void)
return forkret;
}
-#define PKEY_DISABLE_ACCESS 0x1
-#define PKEY_DISABLE_WRITE 0x2
+#ifndef PKEY_DISABLE_ACCESS
+# define PKEY_DISABLE_ACCESS 0x1
+#endif
+
+#ifndef PKEY_DISABLE_WRITE
+# define PKEY_DISABLE_WRITE 0x2
+#endif
-u32 pkey_get(int pkey, unsigned long flags)
+static u32 hw_pkey_get(int pkey, unsigned long flags)
{
u32 mask = (PKEY_DISABLE_ACCESS|PKEY_DISABLE_WRITE);
u32 pkru = __rdpkru();
@@ -418,7 +421,7 @@ u32 pkey_get(int pkey, unsigned long flags)
return masked_pkru;
}
-int pkey_set(int pkey, unsigned long rights, unsigned long flags)
+static int hw_pkey_set(int pkey, unsigned long rights, unsigned long flags)
{
u32 mask = (PKEY_DISABLE_ACCESS|PKEY_DISABLE_WRITE);
u32 old_pkru = __rdpkru();
@@ -452,15 +455,15 @@ void pkey_disable_set(int pkey, int flags)
pkey, flags);
pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE));
- pkey_rights = pkey_get(pkey, syscall_flags);
+ pkey_rights = hw_pkey_get(pkey, syscall_flags);
- dprintf1("%s(%d) pkey_get(%d): %x\n", __func__,
+ dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__,
pkey, pkey, pkey_rights);
pkey_assert(pkey_rights >= 0);
pkey_rights |= flags;
- ret = pkey_set(pkey, pkey_rights, syscall_flags);
+ ret = hw_pkey_set(pkey, pkey_rights, syscall_flags);
assert(!ret);
/*pkru and flags have the same format */
shadow_pkru |= flags << (pkey * 2);
@@ -468,8 +471,8 @@ void pkey_disable_set(int pkey, int flags)
pkey_assert(ret >= 0);
- pkey_rights = pkey_get(pkey, syscall_flags);
- dprintf1("%s(%d) pkey_get(%d): %x\n", __func__,
+ pkey_rights = hw_pkey_get(pkey, syscall_flags);
+ dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__,
pkey, pkey, pkey_rights);
dprintf1("%s(%d) pkru: 0x%x\n", __func__, pkey, rdpkru());
@@ -483,24 +486,24 @@ void pkey_disable_clear(int pkey, int flags)
{
unsigned long syscall_flags = 0;
int ret;
- int pkey_rights = pkey_get(pkey, syscall_flags);
+ int pkey_rights = hw_pkey_get(pkey, syscall_flags);
u32 orig_pkru = rdpkru();
pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE));
- dprintf1("%s(%d) pkey_get(%d): %x\n", __func__,
+ dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__,
pkey, pkey, pkey_rights);
pkey_assert(pkey_rights >= 0);
pkey_rights |= flags;
- ret = pkey_set(pkey, pkey_rights, 0);
+ ret = hw_pkey_set(pkey, pkey_rights, 0);
/* pkru and flags have the same format */
shadow_pkru &= ~(flags << (pkey * 2));
pkey_assert(ret >= 0);
- pkey_rights = pkey_get(pkey, syscall_flags);
- dprintf1("%s(%d) pkey_get(%d): %x\n", __func__,
+ pkey_rights = hw_pkey_get(pkey, syscall_flags);
+ dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__,
pkey, pkey, pkey_rights);
dprintf1("%s(%d) pkru: 0x%x\n", __func__, pkey, rdpkru());
@@ -674,10 +677,12 @@ int mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot,
struct pkey_malloc_record {
void *ptr;
long size;
+ int prot;
};
struct pkey_malloc_record *pkey_malloc_records;
+struct pkey_malloc_record *pkey_last_malloc_record;
long nr_pkey_malloc_records;
-void record_pkey_malloc(void *ptr, long size)
+void record_pkey_malloc(void *ptr, long size, int prot)
{
long i;
struct pkey_malloc_record *rec = NULL;
@@ -709,6 +714,8 @@ void record_pkey_malloc(void *ptr, long size)
(int)(rec - pkey_malloc_records), rec, ptr, size);
rec->ptr = ptr;
rec->size = size;
+ rec->prot = prot;
+ pkey_last_malloc_record = rec;
nr_pkey_malloc_records++;
}
@@ -753,7 +760,7 @@ void *malloc_pkey_with_mprotect(long size, int prot, u16 pkey)
pkey_assert(ptr != (void *)-1);
ret = mprotect_pkey((void *)ptr, PAGE_SIZE, prot, pkey);
pkey_assert(!ret);
- record_pkey_malloc(ptr, size);
+ record_pkey_malloc(ptr, size, prot);
rdpkru();
dprintf1("%s() for pkey %d @ %p\n", __func__, pkey, ptr);
@@ -774,7 +781,7 @@ void *malloc_pkey_anon_huge(long size, int prot, u16 pkey)
size = ALIGN_UP(size, HPAGE_SIZE * 2);
ptr = mmap(NULL, size, PROT_NONE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
pkey_assert(ptr != (void *)-1);
- record_pkey_malloc(ptr, size);
+ record_pkey_malloc(ptr, size, prot);
mprotect_pkey(ptr, size, prot, pkey);
dprintf1("unaligned ptr: %p\n", ptr);
@@ -847,7 +854,7 @@ void *malloc_pkey_hugetlb(long size, int prot, u16 pkey)
pkey_assert(ptr != (void *)-1);
mprotect_pkey(ptr, size, prot, pkey);
- record_pkey_malloc(ptr, size);
+ record_pkey_malloc(ptr, size, prot);
dprintf1("mmap()'d hugetlbfs for pkey %d @ %p\n", pkey, ptr);
return ptr;
@@ -869,7 +876,7 @@ void *malloc_pkey_mmap_dax(long size, int prot, u16 pkey)
mprotect_pkey(ptr, size, prot, pkey);
- record_pkey_malloc(ptr, size);
+ record_pkey_malloc(ptr, size, prot);
dprintf1("mmap()'d for pkey %d @ %p\n", pkey, ptr);
close(fd);
@@ -918,13 +925,21 @@ void *malloc_pkey(long size, int prot, u16 pkey)
}
int last_pkru_faults;
+#define UNKNOWN_PKEY -2
void expected_pk_fault(int pkey)
{
dprintf2("%s(): last_pkru_faults: %d pkru_faults: %d\n",
__func__, last_pkru_faults, pkru_faults);
dprintf2("%s(%d): last_si_pkey: %d\n", __func__, pkey, last_si_pkey);
pkey_assert(last_pkru_faults + 1 == pkru_faults);
- pkey_assert(last_si_pkey == pkey);
+
+ /*
+ * For exec-only memory, we do not know the pkey in
+ * advance, so skip this check.
+ */
+ if (pkey != UNKNOWN_PKEY)
+ pkey_assert(last_si_pkey == pkey);
+
/*
* The signal handler shold have cleared out PKRU to let the
* test program continue. We now have to restore it.
@@ -939,10 +954,11 @@ void expected_pk_fault(int pkey)
last_si_pkey = -1;
}
-void do_not_expect_pk_fault(void)
-{
- pkey_assert(last_pkru_faults == pkru_faults);
-}
+#define do_not_expect_pk_fault(msg) do { \
+ if (last_pkru_faults != pkru_faults) \
+ dprintf0("unexpected PK fault: %s\n", msg); \
+ pkey_assert(last_pkru_faults == pkru_faults); \
+} while (0)
int test_fds[10] = { -1 };
int nr_test_fds;
@@ -1151,12 +1167,15 @@ void test_pkey_alloc_exhaust(int *ptr, u16 pkey)
pkey_assert(i < NR_PKEYS*2);
/*
- * There are 16 pkeys supported in hardware. One is taken
- * up for the default (0) and another can be taken up by
- * an execute-only mapping. Ensure that we can allocate
- * at least 14 (16-2).
+ * There are 16 pkeys supported in hardware. Three are
+ * allocated by the time we get here:
+ * 1. The default key (0)
+ * 2. One possibly consumed by an execute-only mapping.
+ * 3. One allocated by the test code and passed in via
+ * 'pkey' to this function.
+ * Ensure that we can allocate at least another 13 (16-3).
*/
- pkey_assert(i >= NR_PKEYS-2);
+ pkey_assert(i >= NR_PKEYS-3);
for (i = 0; i < nr_allocated_pkeys; i++) {
err = sys_pkey_free(allocated_pkeys[i]);
@@ -1165,6 +1184,35 @@ void test_pkey_alloc_exhaust(int *ptr, u16 pkey)
}
}
+/*
+ * pkey 0 is special. It is allocated by default, so you do not
+ * have to call pkey_alloc() to use it first. Make sure that it
+ * is usable.
+ */
+void test_mprotect_with_pkey_0(int *ptr, u16 pkey)
+{
+ long size;
+ int prot;
+
+ assert(pkey_last_malloc_record);
+ size = pkey_last_malloc_record->size;
+ /*
+ * This is a bit of a hack. But mprotect() requires
+ * huge-page-aligned sizes when operating on hugetlbfs.
+ * So, make sure that we use something that's a multiple
+ * of a huge page when we can.
+ */
+ if (size >= HPAGE_SIZE)
+ size = HPAGE_SIZE;
+ prot = pkey_last_malloc_record->prot;
+
+ /* Use pkey 0 */
+ mprotect_pkey(ptr, size, prot, 0);
+
+ /* Make sure that we can set it back to the original pkey. */
+ mprotect_pkey(ptr, size, prot, pkey);
+}
+
void test_ptrace_of_child(int *ptr, u16 pkey)
{
__attribute__((__unused__)) int peek_result;
@@ -1228,7 +1276,7 @@ void test_ptrace_of_child(int *ptr, u16 pkey)
pkey_assert(ret != -1);
/* Now access from the current task, and expect NO exception: */
peek_result = read_ptr(plain_ptr);
- do_not_expect_pk_fault();
+ do_not_expect_pk_fault("read plain pointer after ptrace");
ret = ptrace(PTRACE_DETACH, child_pid, ignored, 0);
pkey_assert(ret != -1);
@@ -1241,12 +1289,9 @@ void test_ptrace_of_child(int *ptr, u16 pkey)
free(plain_ptr_unaligned);
}
-void test_executing_on_unreadable_memory(int *ptr, u16 pkey)
+void *get_pointer_to_instructions(void)
{
void *p1;
- int scratch;
- int ptr_contents;
- int ret;
p1 = ALIGN_PTR_UP(&lots_o_noops_around_write, PAGE_SIZE);
dprintf3("&lots_o_noops: %p\n", &lots_o_noops_around_write);
@@ -1256,7 +1301,23 @@ void test_executing_on_unreadable_memory(int *ptr, u16 pkey)
/* Point 'p1' at the *second* page of the function: */
p1 += PAGE_SIZE;
+ /*
+ * Try to ensure we fault this in on next touch to ensure
+ * we get an instruction fault as opposed to a data one
+ */
madvise(p1, PAGE_SIZE, MADV_DONTNEED);
+
+ return p1;
+}
+
+void test_executing_on_unreadable_memory(int *ptr, u16 pkey)
+{
+ void *p1;
+ int scratch;
+ int ptr_contents;
+ int ret;
+
+ p1 = get_pointer_to_instructions();
lots_o_noops_around_write(&scratch);
ptr_contents = read_ptr(p1);
dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents);
@@ -1272,12 +1333,55 @@ void test_executing_on_unreadable_memory(int *ptr, u16 pkey)
*/
madvise(p1, PAGE_SIZE, MADV_DONTNEED);
lots_o_noops_around_write(&scratch);
- do_not_expect_pk_fault();
+ do_not_expect_pk_fault("executing on PROT_EXEC memory");
ptr_contents = read_ptr(p1);
dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents);
expected_pk_fault(pkey);
}
+void test_implicit_mprotect_exec_only_memory(int *ptr, u16 pkey)
+{
+ void *p1;
+ int scratch;
+ int ptr_contents;
+ int ret;
+
+ dprintf1("%s() start\n", __func__);
+
+ p1 = get_pointer_to_instructions();
+ lots_o_noops_around_write(&scratch);
+ ptr_contents = read_ptr(p1);
+ dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents);
+
+ /* Use a *normal* mprotect(), not mprotect_pkey(): */
+ ret = mprotect(p1, PAGE_SIZE, PROT_EXEC);
+ pkey_assert(!ret);
+
+ dprintf2("pkru: %x\n", rdpkru());
+
+ /* Make sure this is an *instruction* fault */
+ madvise(p1, PAGE_SIZE, MADV_DONTNEED);
+ lots_o_noops_around_write(&scratch);
+ do_not_expect_pk_fault("executing on PROT_EXEC memory");
+ ptr_contents = read_ptr(p1);
+ dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents);
+ expected_pk_fault(UNKNOWN_PKEY);
+
+ /*
+ * Put the memory back to non-PROT_EXEC. Should clear the
+ * exec-only pkey off the VMA and allow it to be readable
+ * again. Go to PROT_NONE first to check for a kernel bug
+ * that did not clear the pkey when doing PROT_NONE.
+ */
+ ret = mprotect(p1, PAGE_SIZE, PROT_NONE);
+ pkey_assert(!ret);
+
+ ret = mprotect(p1, PAGE_SIZE, PROT_READ|PROT_EXEC);
+ pkey_assert(!ret);
+ ptr_contents = read_ptr(p1);
+ do_not_expect_pk_fault("plain read on recently PROT_EXEC area");
+}
+
void test_mprotect_pkey_on_unsupported_cpu(int *ptr, u16 pkey)
{
int size = PAGE_SIZE;
@@ -1302,6 +1406,8 @@ void (*pkey_tests[])(int *ptr, u16 pkey) = {
test_kernel_gup_of_access_disabled_region,
test_kernel_gup_write_to_write_disabled_region,
test_executing_on_unreadable_memory,
+ test_implicit_mprotect_exec_only_memory,
+ test_mprotect_with_pkey_0,
test_ptrace_of_child,
test_pkey_syscalls_on_non_allocated_pkey,
test_pkey_syscalls_bad_args,
diff --git a/tools/virtio/linux/dma-mapping.h b/tools/virtio/linux/dma-mapping.h
index 1571e24e9494..f91aeb5fe571 100644
--- a/tools/virtio/linux/dma-mapping.h
+++ b/tools/virtio/linux/dma-mapping.h
@@ -6,8 +6,6 @@
# error Virtio userspace code does not support CONFIG_HAS_DMA
#endif
-#define PCI_DMA_BUS_IS_PHYS 1
-
enum dma_data_direction {
DMA_BIDIRECTIONAL = 0,
DMA_TO_DEVICE = 1,
diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c
index 7f6a944db23d..8d90de213ce9 100644
--- a/virt/kvm/arm/mmu.c
+++ b/virt/kvm/arm/mmu.c
@@ -1401,6 +1401,7 @@ static void kvm_send_hwpoison_signal(unsigned long address,
{
siginfo_t info;
+ clear_siginfo(&info);
info.si_signo = SIGBUS;
info.si_errno = 0;
info.si_code = BUS_MCEERR_AR;
diff --git a/virt/kvm/arm/vgic/vgic-debug.c b/virt/kvm/arm/vgic/vgic-debug.c
index 10b38178cff2..4ffc0b5e6105 100644
--- a/virt/kvm/arm/vgic/vgic-debug.c
+++ b/virt/kvm/arm/vgic/vgic-debug.c
@@ -211,6 +211,7 @@ static int vgic_debug_show(struct seq_file *s, void *v)
struct vgic_state_iter *iter = (struct vgic_state_iter *)v;
struct vgic_irq *irq;
struct kvm_vcpu *vcpu = NULL;
+ unsigned long flags;
if (iter->dist_id == 0) {
print_dist_state(s, &kvm->arch.vgic);
@@ -227,9 +228,9 @@ static int vgic_debug_show(struct seq_file *s, void *v)
irq = &kvm->arch.vgic.spis[iter->intid - VGIC_NR_PRIVATE_IRQS];
}
- spin_lock(&irq->irq_lock);
+ spin_lock_irqsave(&irq->irq_lock, flags);
print_irq_state(s, irq, vcpu);
- spin_unlock(&irq->irq_lock);
+ spin_unlock_irqrestore(&irq->irq_lock, flags);
return 0;
}
diff --git a/virt/kvm/arm/vgic/vgic-init.c b/virt/kvm/arm/vgic/vgic-init.c
index 68378fe17a0e..e07156c30323 100644
--- a/virt/kvm/arm/vgic/vgic-init.c
+++ b/virt/kvm/arm/vgic/vgic-init.c
@@ -423,7 +423,7 @@ static irqreturn_t vgic_maintenance_handler(int irq, void *data)
* We cannot rely on the vgic maintenance interrupt to be
* delivered synchronously. This means we can only use it to
* exit the VM, and we perform the handling of EOIed
- * interrupts on the exit path (see vgic_process_maintenance).
+ * interrupts on the exit path (see vgic_fold_lr_state).
*/
return IRQ_HANDLED;
}
diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c
index a8f07243aa9f..4ed79c939fb4 100644
--- a/virt/kvm/arm/vgic/vgic-its.c
+++ b/virt/kvm/arm/vgic/vgic-its.c
@@ -52,6 +52,7 @@ static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid,
{
struct vgic_dist *dist = &kvm->arch.vgic;
struct vgic_irq *irq = vgic_get_irq(kvm, NULL, intid), *oldirq;
+ unsigned long flags;
int ret;
/* In this case there is no put, since we keep the reference. */
@@ -71,7 +72,7 @@ static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid,
irq->intid = intid;
irq->target_vcpu = vcpu;
- spin_lock(&dist->lpi_list_lock);
+ spin_lock_irqsave(&dist->lpi_list_lock, flags);
/*
* There could be a race with another vgic_add_lpi(), so we need to
@@ -99,7 +100,7 @@ static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid,
dist->lpi_list_count++;
out_unlock:
- spin_unlock(&dist->lpi_list_lock);
+ spin_unlock_irqrestore(&dist->lpi_list_lock, flags);
/*
* We "cache" the configuration table entries in our struct vgic_irq's.
@@ -280,8 +281,8 @@ static int update_lpi_config(struct kvm *kvm, struct vgic_irq *irq,
int ret;
unsigned long flags;
- ret = kvm_read_guest(kvm, propbase + irq->intid - GIC_LPI_OFFSET,
- &prop, 1);
+ ret = kvm_read_guest_lock(kvm, propbase + irq->intid - GIC_LPI_OFFSET,
+ &prop, 1);
if (ret)
return ret;
@@ -315,6 +316,7 @@ static int vgic_copy_lpi_list(struct kvm_vcpu *vcpu, u32 **intid_ptr)
{
struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
struct vgic_irq *irq;
+ unsigned long flags;
u32 *intids;
int irq_count, i = 0;
@@ -330,7 +332,7 @@ static int vgic_copy_lpi_list(struct kvm_vcpu *vcpu, u32 **intid_ptr)
if (!intids)
return -ENOMEM;
- spin_lock(&dist->lpi_list_lock);
+ spin_lock_irqsave(&dist->lpi_list_lock, flags);
list_for_each_entry(irq, &dist->lpi_list_head, lpi_list) {
if (i == irq_count)
break;
@@ -339,7 +341,7 @@ static int vgic_copy_lpi_list(struct kvm_vcpu *vcpu, u32 **intid_ptr)
continue;
intids[i++] = irq->intid;
}
- spin_unlock(&dist->lpi_list_lock);
+ spin_unlock_irqrestore(&dist->lpi_list_lock, flags);
*intid_ptr = intids;
return i;
@@ -348,10 +350,11 @@ static int vgic_copy_lpi_list(struct kvm_vcpu *vcpu, u32 **intid_ptr)
static int update_affinity(struct vgic_irq *irq, struct kvm_vcpu *vcpu)
{
int ret = 0;
+ unsigned long flags;
- spin_lock(&irq->irq_lock);
+ spin_lock_irqsave(&irq->irq_lock, flags);
irq->target_vcpu = vcpu;
- spin_unlock(&irq->irq_lock);
+ spin_unlock_irqrestore(&irq->irq_lock, flags);
if (irq->hw) {
struct its_vlpi_map map;
@@ -441,8 +444,9 @@ static int its_sync_lpi_pending_table(struct kvm_vcpu *vcpu)
* this very same byte in the last iteration. Reuse that.
*/
if (byte_offset != last_byte_offset) {
- ret = kvm_read_guest(vcpu->kvm, pendbase + byte_offset,
- &pendmask, 1);
+ ret = kvm_read_guest_lock(vcpu->kvm,
+ pendbase + byte_offset,
+ &pendmask, 1);
if (ret) {
kfree(intids);
return ret;
@@ -786,7 +790,7 @@ static bool vgic_its_check_id(struct vgic_its *its, u64 baser, u32 id,
return false;
/* Each 1st level entry is represented by a 64-bit value. */
- if (kvm_read_guest(its->dev->kvm,
+ if (kvm_read_guest_lock(its->dev->kvm,
BASER_ADDRESS(baser) + index * sizeof(indirect_ptr),
&indirect_ptr, sizeof(indirect_ptr)))
return false;
@@ -1367,8 +1371,8 @@ static void vgic_its_process_commands(struct kvm *kvm, struct vgic_its *its)
cbaser = CBASER_ADDRESS(its->cbaser);
while (its->cwriter != its->creadr) {
- int ret = kvm_read_guest(kvm, cbaser + its->creadr,
- cmd_buf, ITS_CMD_SIZE);
+ int ret = kvm_read_guest_lock(kvm, cbaser + its->creadr,
+ cmd_buf, ITS_CMD_SIZE);
/*
* If kvm_read_guest() fails, this could be due to the guest
* programming a bogus value in CBASER or something else going
@@ -1893,7 +1897,7 @@ static int scan_its_table(struct vgic_its *its, gpa_t base, int size, int esz,
int next_offset;
size_t byte_offset;
- ret = kvm_read_guest(kvm, gpa, entry, esz);
+ ret = kvm_read_guest_lock(kvm, gpa, entry, esz);
if (ret)
return ret;
@@ -2263,7 +2267,7 @@ static int vgic_its_restore_cte(struct vgic_its *its, gpa_t gpa, int esz)
int ret;
BUG_ON(esz > sizeof(val));
- ret = kvm_read_guest(kvm, gpa, &val, esz);
+ ret = kvm_read_guest_lock(kvm, gpa, &val, esz);
if (ret)
return ret;
val = le64_to_cpu(val);
diff --git a/virt/kvm/arm/vgic/vgic-mmio.c b/virt/kvm/arm/vgic/vgic-mmio.c
index dbe99d635c80..ff9655cfeb2f 100644
--- a/virt/kvm/arm/vgic/vgic-mmio.c
+++ b/virt/kvm/arm/vgic/vgic-mmio.c
@@ -289,10 +289,16 @@ static void vgic_mmio_change_active(struct kvm_vcpu *vcpu, struct vgic_irq *irq,
irq->vcpu->cpu != -1) /* VCPU thread is running */
cond_resched_lock(&irq->irq_lock);
- if (irq->hw)
+ if (irq->hw) {
vgic_hw_irq_change_active(vcpu, irq, active, !requester_vcpu);
- else
+ } else {
+ u32 model = vcpu->kvm->arch.vgic.vgic_model;
+
irq->active = active;
+ if (model == KVM_DEV_TYPE_ARM_VGIC_V2 &&
+ active && vgic_irq_is_sgi(irq->intid))
+ irq->active_source = requester_vcpu->vcpu_id;
+ }
if (irq->active)
vgic_queue_irq_unlock(vcpu->kvm, irq, flags);
diff --git a/virt/kvm/arm/vgic/vgic-v2.c b/virt/kvm/arm/vgic/vgic-v2.c
index 45aa433f018f..a5f2e44f1c33 100644
--- a/virt/kvm/arm/vgic/vgic-v2.c
+++ b/virt/kvm/arm/vgic/vgic-v2.c
@@ -37,13 +37,6 @@ void vgic_v2_init_lrs(void)
vgic_v2_write_lr(i, 0);
}
-void vgic_v2_set_npie(struct kvm_vcpu *vcpu)
-{
- struct vgic_v2_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v2;
-
- cpuif->vgic_hcr |= GICH_HCR_NPIE;
-}
-
void vgic_v2_set_underflow(struct kvm_vcpu *vcpu)
{
struct vgic_v2_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v2;
@@ -71,13 +64,18 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu)
int lr;
unsigned long flags;
- cpuif->vgic_hcr &= ~(GICH_HCR_UIE | GICH_HCR_NPIE);
+ cpuif->vgic_hcr &= ~GICH_HCR_UIE;
for (lr = 0; lr < vgic_cpu->used_lrs; lr++) {
u32 val = cpuif->vgic_lr[lr];
- u32 intid = val & GICH_LR_VIRTUALID;
+ u32 cpuid, intid = val & GICH_LR_VIRTUALID;
struct vgic_irq *irq;
+ /* Extract the source vCPU id from the LR */
+ cpuid = val & GICH_LR_PHYSID_CPUID;
+ cpuid >>= GICH_LR_PHYSID_CPUID_SHIFT;
+ cpuid &= 7;
+
/* Notify fds when the guest EOI'ed a level-triggered SPI */
if (lr_signals_eoi_mi(val) && vgic_valid_spi(vcpu->kvm, intid))
kvm_notify_acked_irq(vcpu->kvm, 0,
@@ -90,17 +88,16 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu)
/* Always preserve the active bit */
irq->active = !!(val & GICH_LR_ACTIVE_BIT);
+ if (irq->active && vgic_irq_is_sgi(intid))
+ irq->active_source = cpuid;
+
/* Edge is the only case where we preserve the pending bit */
if (irq->config == VGIC_CONFIG_EDGE &&
(val & GICH_LR_PENDING_BIT)) {
irq->pending_latch = true;
- if (vgic_irq_is_sgi(intid)) {
- u32 cpuid = val & GICH_LR_PHYSID_CPUID;
-
- cpuid >>= GICH_LR_PHYSID_CPUID_SHIFT;
+ if (vgic_irq_is_sgi(intid))
irq->source |= (1 << cpuid);
- }
}
/*
@@ -152,8 +149,15 @@ void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr)
u32 val = irq->intid;
bool allow_pending = true;
- if (irq->active)
+ if (irq->active) {
val |= GICH_LR_ACTIVE_BIT;
+ if (vgic_irq_is_sgi(irq->intid))
+ val |= irq->active_source << GICH_LR_PHYSID_CPUID_SHIFT;
+ if (vgic_irq_is_multi_sgi(irq)) {
+ allow_pending = false;
+ val |= GICH_LR_EOI;
+ }
+ }
if (irq->hw) {
val |= GICH_LR_HW;
@@ -190,8 +194,10 @@ void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr)
BUG_ON(!src);
val |= (src - 1) << GICH_LR_PHYSID_CPUID_SHIFT;
irq->source &= ~(1 << (src - 1));
- if (irq->source)
+ if (irq->source) {
irq->pending_latch = true;
+ val |= GICH_LR_EOI;
+ }
}
}
diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c
index 8195f52ae6f0..bdcf8e7a6161 100644
--- a/virt/kvm/arm/vgic/vgic-v3.c
+++ b/virt/kvm/arm/vgic/vgic-v3.c
@@ -27,13 +27,6 @@ static bool group1_trap;
static bool common_trap;
static bool gicv4_enable;
-void vgic_v3_set_npie(struct kvm_vcpu *vcpu)
-{
- struct vgic_v3_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v3;
-
- cpuif->vgic_hcr |= ICH_HCR_NPIE;
-}
-
void vgic_v3_set_underflow(struct kvm_vcpu *vcpu)
{
struct vgic_v3_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v3;
@@ -55,17 +48,23 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu)
int lr;
unsigned long flags;
- cpuif->vgic_hcr &= ~(ICH_HCR_UIE | ICH_HCR_NPIE);
+ cpuif->vgic_hcr &= ~ICH_HCR_UIE;
for (lr = 0; lr < vgic_cpu->used_lrs; lr++) {
u64 val = cpuif->vgic_lr[lr];
- u32 intid;
+ u32 intid, cpuid;
struct vgic_irq *irq;
+ bool is_v2_sgi = false;
- if (model == KVM_DEV_TYPE_ARM_VGIC_V3)
+ cpuid = val & GICH_LR_PHYSID_CPUID;
+ cpuid >>= GICH_LR_PHYSID_CPUID_SHIFT;
+
+ if (model == KVM_DEV_TYPE_ARM_VGIC_V3) {
intid = val & ICH_LR_VIRTUAL_ID_MASK;
- else
+ } else {
intid = val & GICH_LR_VIRTUALID;
+ is_v2_sgi = vgic_irq_is_sgi(intid);
+ }
/* Notify fds when the guest EOI'ed a level-triggered IRQ */
if (lr_signals_eoi_mi(val) && vgic_valid_spi(vcpu->kvm, intid))
@@ -81,18 +80,16 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu)
/* Always preserve the active bit */
irq->active = !!(val & ICH_LR_ACTIVE_BIT);
+ if (irq->active && is_v2_sgi)
+ irq->active_source = cpuid;
+
/* Edge is the only case where we preserve the pending bit */
if (irq->config == VGIC_CONFIG_EDGE &&
(val & ICH_LR_PENDING_BIT)) {
irq->pending_latch = true;
- if (vgic_irq_is_sgi(intid) &&
- model == KVM_DEV_TYPE_ARM_VGIC_V2) {
- u32 cpuid = val & GICH_LR_PHYSID_CPUID;
-
- cpuid >>= GICH_LR_PHYSID_CPUID_SHIFT;
+ if (is_v2_sgi)
irq->source |= (1 << cpuid);
- }
}
/*
@@ -133,10 +130,20 @@ void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr)
{
u32 model = vcpu->kvm->arch.vgic.vgic_model;
u64 val = irq->intid;
- bool allow_pending = true;
+ bool allow_pending = true, is_v2_sgi;
- if (irq->active)
+ is_v2_sgi = (vgic_irq_is_sgi(irq->intid) &&
+ model == KVM_DEV_TYPE_ARM_VGIC_V2);
+
+ if (irq->active) {
val |= ICH_LR_ACTIVE_BIT;
+ if (is_v2_sgi)
+ val |= irq->active_source << GICH_LR_PHYSID_CPUID_SHIFT;
+ if (vgic_irq_is_multi_sgi(irq)) {
+ allow_pending = false;
+ val |= ICH_LR_EOI;
+ }
+ }
if (irq->hw) {
val |= ICH_LR_HW;
@@ -174,8 +181,10 @@ void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr)
BUG_ON(!src);
val |= (src - 1) << GICH_LR_PHYSID_CPUID_SHIFT;
irq->source &= ~(1 << (src - 1));
- if (irq->source)
+ if (irq->source) {
irq->pending_latch = true;
+ val |= ICH_LR_EOI;
+ }
}
}
@@ -335,7 +344,7 @@ retry:
bit_nr = irq->intid % BITS_PER_BYTE;
ptr = pendbase + byte_offset;
- ret = kvm_read_guest(kvm, ptr, &val, 1);
+ ret = kvm_read_guest_lock(kvm, ptr, &val, 1);
if (ret)
return ret;
@@ -388,7 +397,7 @@ int vgic_v3_save_pending_tables(struct kvm *kvm)
ptr = pendbase + byte_offset;
if (byte_offset != last_byte_offset) {
- ret = kvm_read_guest(kvm, ptr, &val, 1);
+ ret = kvm_read_guest_lock(kvm, ptr, &val, 1);
if (ret)
return ret;
last_byte_offset = byte_offset;
diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c
index 702936cbe173..33c8325c8f35 100644
--- a/virt/kvm/arm/vgic/vgic.c
+++ b/virt/kvm/arm/vgic/vgic.c
@@ -43,9 +43,13 @@ struct vgic_global kvm_vgic_global_state __ro_after_init = {
* kvm->lock (mutex)
* its->cmd_lock (mutex)
* its->its_lock (mutex)
- * vgic_cpu->ap_list_lock
- * kvm->lpi_list_lock
- * vgic_irq->irq_lock
+ * vgic_cpu->ap_list_lock must be taken with IRQs disabled
+ * kvm->lpi_list_lock must be taken with IRQs disabled
+ * vgic_irq->irq_lock must be taken with IRQs disabled
+ *
+ * As the ap_list_lock might be taken from the timer interrupt handler,
+ * we have to disable IRQs before taking this lock and everything lower
+ * than it.
*
* If you need to take multiple locks, always take the upper lock first,
* then the lower ones, e.g. first take the its_lock, then the irq_lock.
@@ -72,8 +76,9 @@ static struct vgic_irq *vgic_get_lpi(struct kvm *kvm, u32 intid)
{
struct vgic_dist *dist = &kvm->arch.vgic;
struct vgic_irq *irq = NULL;
+ unsigned long flags;
- spin_lock(&dist->lpi_list_lock);
+ spin_lock_irqsave(&dist->lpi_list_lock, flags);
list_for_each_entry(irq, &dist->lpi_list_head, lpi_list) {
if (irq->intid != intid)
@@ -89,7 +94,7 @@ static struct vgic_irq *vgic_get_lpi(struct kvm *kvm, u32 intid)
irq = NULL;
out_unlock:
- spin_unlock(&dist->lpi_list_lock);
+ spin_unlock_irqrestore(&dist->lpi_list_lock, flags);
return irq;
}
@@ -134,19 +139,20 @@ static void vgic_irq_release(struct kref *ref)
void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq)
{
struct vgic_dist *dist = &kvm->arch.vgic;
+ unsigned long flags;
if (irq->intid < VGIC_MIN_LPI)
return;
- spin_lock(&dist->lpi_list_lock);
+ spin_lock_irqsave(&dist->lpi_list_lock, flags);
if (!kref_put(&irq->refcount, vgic_irq_release)) {
- spin_unlock(&dist->lpi_list_lock);
+ spin_unlock_irqrestore(&dist->lpi_list_lock, flags);
return;
};
list_del(&irq->lpi_list);
dist->lpi_list_count--;
- spin_unlock(&dist->lpi_list_lock);
+ spin_unlock_irqrestore(&dist->lpi_list_lock, flags);
kfree(irq);
}
@@ -725,14 +731,6 @@ static inline void vgic_set_underflow(struct kvm_vcpu *vcpu)
vgic_v3_set_underflow(vcpu);
}
-static inline void vgic_set_npie(struct kvm_vcpu *vcpu)
-{
- if (kvm_vgic_global_state.type == VGIC_V2)
- vgic_v2_set_npie(vcpu);
- else
- vgic_v3_set_npie(vcpu);
-}
-
/* Requires the ap_list_lock to be held. */
static int compute_ap_list_depth(struct kvm_vcpu *vcpu,
bool *multi_sgi)
@@ -746,17 +744,15 @@ static int compute_ap_list_depth(struct kvm_vcpu *vcpu,
DEBUG_SPINLOCK_BUG_ON(!spin_is_locked(&vgic_cpu->ap_list_lock));
list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) {
+ int w;
+
spin_lock(&irq->irq_lock);
/* GICv2 SGIs can count for more than one... */
- if (vgic_irq_is_sgi(irq->intid) && irq->source) {
- int w = hweight8(irq->source);
-
- count += w;
- *multi_sgi |= (w > 1);
- } else {
- count++;
- }
+ w = vgic_irq_get_lr_count(irq);
spin_unlock(&irq->irq_lock);
+
+ count += w;
+ *multi_sgi |= (w > 1);
}
return count;
}
@@ -767,7 +763,6 @@ static void vgic_flush_lr_state(struct kvm_vcpu *vcpu)
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
struct vgic_irq *irq;
int count;
- bool npie = false;
bool multi_sgi;
u8 prio = 0xff;
@@ -797,10 +792,8 @@ static void vgic_flush_lr_state(struct kvm_vcpu *vcpu)
if (likely(vgic_target_oracle(irq) == vcpu)) {
vgic_populate_lr(vcpu, irq, count++);
- if (irq->source) {
- npie = true;
+ if (irq->source)
prio = irq->priority;
- }
}
spin_unlock(&irq->irq_lock);
@@ -813,9 +806,6 @@ static void vgic_flush_lr_state(struct kvm_vcpu *vcpu)
}
}
- if (npie)
- vgic_set_npie(vcpu);
-
vcpu->arch.vgic_cpu.used_lrs = count;
/* Nuke remaining LRs */
diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h
index 830e815748a0..32c25d42c93f 100644
--- a/virt/kvm/arm/vgic/vgic.h
+++ b/virt/kvm/arm/vgic/vgic.h
@@ -110,6 +110,20 @@ static inline bool vgic_irq_is_mapped_level(struct vgic_irq *irq)
return irq->config == VGIC_CONFIG_LEVEL && irq->hw;
}
+static inline int vgic_irq_get_lr_count(struct vgic_irq *irq)
+{
+ /* Account for the active state as an interrupt */
+ if (vgic_irq_is_sgi(irq->intid) && irq->source)
+ return hweight8(irq->source) + irq->active;
+
+ return irq_is_pending(irq) || irq->active;
+}
+
+static inline bool vgic_irq_is_multi_sgi(struct vgic_irq *irq)
+{
+ return vgic_irq_get_lr_count(irq) > 1;
+}
+
/*
* This struct provides an intermediate representation of the fields contained
* in the GICH_VMCR and ICH_VMCR registers, such that code exporting the GIC
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index 6e865e8b5b10..90d30fbe95ae 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -397,7 +397,7 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
* Check if there was an event already pending on the eventfd
* before we registered, and trigger it as if we didn't miss it.
*/
- events = f.file->f_op->poll(f.file, &irqfd->pt);
+ events = vfs_poll(f.file, &irqfd->pt);
if (events & EPOLLIN)
schedule_work(&irqfd->inject);